Skip to content

Commit

Permalink
feat: support for chars
Browse files Browse the repository at this point in the history
  • Loading branch information
viddrobnic committed Jun 16, 2024
1 parent 97da460 commit 948e4aa
Show file tree
Hide file tree
Showing 14 changed files with 250 additions and 21 deletions.
45 changes: 24 additions & 21 deletions examples/aoc_day_01.aoc
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,23 @@ for (line = input(); line; line = input()) {
// Part one
res = 0
for (i = 0; i < len(data); i = i + 1) {
chars = split(data[i], "")
line = data[i]
n = 0

// First number
for (j = 0; j < len(chars); j = j + 1) {
if (int(chars[j])) {
n = int(chars[j])
for (j = 0; j < len(line); j = j + 1) {
c = int(line[j]) - int('0')
if (c >= 0 & c < 10) {
n = c
break
}
}

// Last number
for (j = len(chars) - 1; j >= 0; j = j - 1) {
if (int(chars[j])) {
n = n * 10 + int(chars[j])
for (j = len(line) - 1; j >= 0; j = j - 1) {
c = int(line[j]) - int('0')
if (c >= 0 & c < 10) {
n = n * 10 + c
break
}
}
Expand All @@ -38,13 +40,13 @@ for (i = 0; i < len(data); i = i + 1) {
print("Part one: " + str(res))

// Part two
substr_is = fn(target_ch, position, lookup_ch) {
if (position + len(lookup_ch) > len(target_ch)) {
substr_is = fn(target, position, lookup) {
if (position + len(lookup) > len(target)) {
return false
}

for (i = 0; i < len(lookup_ch); i = i + 1) {
if (target_ch[position + i] != lookup_ch[i]) {
for (i = 0; i < len(lookup); i = i + 1) {
if (target[position + i] != lookup[i]) {
return false
}
}
Expand All @@ -53,13 +55,14 @@ substr_is = fn(target_ch, position, lookup_ch) {
}

digits = ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"]
digit = fn(chars, position) {
if (int(chars[position])) {
return int(chars[position])
digit = fn(string, position) {
d = int(string[position]) - int('0')
if (d >= 0 & d < 10) {
return d
}

for (d = 0; d < len(digits); d = d + 1) {
if (substr_is(chars, position, split(digits[d], ""))) {
if (substr_is(string, position, digits[d])) {
return d + 1
}
}
Expand All @@ -68,12 +71,12 @@ digit = fn(chars, position) {

res = 0
for (i = 0; i < len(data); i = i + 1) {
chars = split(data[i], "")
line = data[i]
n = 0

// First number
for (j = 0; j < len(chars); j = j + 1) {
d = digit(chars, j)
for (j = 0; j < len(line); j = j + 1) {
d = digit(line, j)
if (d) {
n = d
break
Expand All @@ -82,8 +85,8 @@ for (i = 0; i < len(data); i = i + 1) {
}

// Last number
for (j = len(chars) - 1; j >= 0; j = j - 1) {
d = digit(chars, j)
for (j = len(line) - 1; j >= 0; j = j - 1) {
d = digit(line, j)
if (d) {
n = n * 10 + d
break
Expand Down
45 changes: 45 additions & 0 deletions examples/strings.aoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// AoC lang support utf8, so we can do cool stuff like this:
string = "🚗"
print(string) // 🚗

// But it makes some things counter intuitive:
print(len(string)) // 4

// The behavior is similar to that of rust strings, so you can read
// all about it here: https://doc.rust-lang.org/std/string/struct.String.html#utf-8

// TL;DR of the above article is: strings are always utf8, which means that one
// graphene can span multiple characters. In AoC lang, char is a single byte.
// This makes everything work nicely if you are using just ascii strings,
// but you have to be careful when using other chars.

// Let's take a look at some examples. You can construct a single char as:
ch = 'A'
print(ch) // A

// And convert it to string with builtin str
string = str(ch)
print(string) // A

// Length returns number of bytes, which as we saw above is not the same as number of
// graphenes
print(len("AB")) // 2
print(len("🚗")) // 4

// We can use index notation to get a specific char from the string:
print("AB"[0]) // A

// If index is out of bounds, null is returned
print("AB"[5]) // null

// Since char is a single byte, not a graphene, weird stuff can happen:
print("🚗"[0]) // ð

// We can also convert chars to ints
print(int('A')) // 65

// And we can also go in the other direction
print(char(65)) // A

// If int is larger than 256, it just overflows
print(char(1090)) // B // explanation: 1090 % 256 = 66, and 66 is 'B'
2 changes: 2 additions & 0 deletions parser/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pub enum NodeValue {
Identifier(String),
IntegerLiteral(i64),
FloatLiteral(f64),
CharLiteral(u8),
BoolLiteral(bool),
StringLiteral(String),
ArrayLiteral(Vec<Node>),
Expand Down Expand Up @@ -227,6 +228,7 @@ impl Display for NodeValue {
NodeValue::Identifier(ident) => write!(f, "{ident}"),
NodeValue::IntegerLiteral(int) => write!(f, "{int}"),
NodeValue::FloatLiteral(float) => write!(f, "{float}"),
NodeValue::CharLiteral(ch) => write!(f, "{}", *ch as char),
NodeValue::BoolLiteral(boolean) => write!(f, "{boolean}"),
NodeValue::StringLiteral(string) => write!(f, "\"{string}\""),
NodeValue::ArrayLiteral(arr) => {
Expand Down
5 changes: 5 additions & 0 deletions parser/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pub enum ErrorKind {
UnexpectedEof,
InvalidEscapeChar(char),
InvalidChar(char),
NonAsciiChar(char),
InvalidExpression(TokenKind),
ExpectedEol,
InvalidNodeKind { expected: NodeKind, got: NodeKind },
Expand Down Expand Up @@ -40,6 +41,10 @@ impl Display for ErrorKind {
ErrorKind::UnexpectedEof => write!(f, "Unexpected end of file"),
ErrorKind::InvalidEscapeChar(ch) => write!(f, "Invalid escape character '{ch}'"),
ErrorKind::InvalidChar(ch) => write!(f, "Invalid character '{ch}'"),
ErrorKind::NonAsciiChar(ch) => write!(
f,
"Inalid character literal '{ch}'. Char literals only support ascii."
),
ErrorKind::InvalidExpression(token) => write!(f, "Not a valid expression: {token}"),
ErrorKind::ExpectedEol => write!(f, "Expression must end with new line"),
ErrorKind::InvalidNodeKind { expected, got } => {
Expand Down
72 changes: 72 additions & 0 deletions parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,48 @@ impl<'a> Lexer<'a> {
}
}

fn read_char(&mut self, start_position: Position) -> Result<TokenKind> {
let (_, ch) = self.chars.next().ok_or(Error {
kind: ErrorKind::UnexpectedEof,
range: Range {
start: start_position,
end: self.position,
},
})?;
self.position.character += ch.len_utf8();

let (_, end) = self.chars.next().ok_or(Error {
kind: ErrorKind::UnexpectedEof,
range: Range {
start: start_position,
end: self.position,
},
})?;
self.position.character += end.len_utf8();

if end != '\'' {
return Err(Error {
kind: ErrorKind::InvalidChar(end),
range: Range {
start: start_position,
end: self.position,
},
});
}

if !ch.is_ascii() {
return Err(Error {
kind: ErrorKind::NonAsciiChar(ch),
range: Range {
start: start_position,
end: self.position,
},
});
}

Ok(TokenKind::Char(ch as u8))
}

// Read ident or keywoard, where the first char is at `self.input[start]`
// and `end` is start + utf8 len of first char
fn read_ident(&mut self, start: usize, mut end: usize) -> TokenKind {
Expand Down Expand Up @@ -249,6 +291,10 @@ impl Iterator for Lexer<'_> {
'>' => self.peek_parse('=', TokenKind::Geq, TokenKind::Ge),
'=' => self.peek_parse('=', TokenKind::Eq, TokenKind::Assign),
'!' => self.peek_parse('=', TokenKind::Neq, TokenKind::Bang),
'\'' => match self.read_char(start_position) {
Ok(token) => token,
Err(err) => return Some(Err(err)),
},
'"' => match self.read_string(start_position) {
Ok(token) => token,
Err(err) => return Some(Err(err)),
Expand Down Expand Up @@ -425,6 +471,7 @@ mod test {
"normal string" "\n\t\\\""
// line comment
false //inline comment
'A'
"#;

let lexer = Lexer::new(input);
Expand Down Expand Up @@ -666,6 +713,14 @@ mod test {
start: Position::new(9, 34),
end: Position::new(10, 0),
},
Range {
start: Position::new(10, 12),
end: Position::new(10, 15),
},
Range {
start: Position::new(10, 15),
end: Position::new(11, 0),
},
]
);

Expand Down Expand Up @@ -729,7 +784,24 @@ mod test {
TokenKind::False,
TokenKind::Comment("inline comment".to_string()),
TokenKind::Eol,
TokenKind::Char(b'A'),
TokenKind::Eol,
]
);
}

#[test]
fn non_ascii_char() {
let mut lexer = Lexer::new("'🚗'");
assert_eq!(
lexer.next(),
Some(Err(Error {
kind: ErrorKind::NonAsciiChar('🚗'),
range: Range {
start: Position::new(0, 0),
end: Position::new(0, 6),
}
}))
);
}
}
1 change: 1 addition & 0 deletions parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ impl Parser<'_> {
TokenKind::Ident(ident) => (ast::NodeValue::Identifier(ident), range.end),
TokenKind::Integer(int) => (ast::NodeValue::IntegerLiteral(int), range.end),
TokenKind::Float(flt) => (ast::NodeValue::FloatLiteral(flt), range.end),
TokenKind::Char(ch) => (ast::NodeValue::CharLiteral(ch), range.end),
TokenKind::True => (ast::NodeValue::BoolLiteral(true), range.end),
TokenKind::False => (ast::NodeValue::BoolLiteral(false), range.end),
TokenKind::String(string) => (ast::NodeValue::StringLiteral(string), range.end),
Expand Down
8 changes: 8 additions & 0 deletions parser/src/parser/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ fn simple_prefix_expression() -> Result<()> {
"bar"
break
continue
'~'
"#;

let program = parse(input)?;
Expand Down Expand Up @@ -87,6 +88,13 @@ fn simple_prefix_expression() -> Result<()> {
end: Position::new(8, 16)
},
},
ast::Node {
value: ast::NodeValue::CharLiteral(b'~'),
range: Range {
start: Position::new(9, 8),
end: Position::new(9, 11),
}
},
]
);

Expand Down
2 changes: 2 additions & 0 deletions parser/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ pub enum TokenKind {
Ident(String),
Integer(i64),
Float(f64),
Char(u8),
True,
False,
String(String),
Expand Down Expand Up @@ -104,6 +105,7 @@ impl Display for TokenKind {
TokenKind::Ident(_) => write!(f, "IDENT"),
TokenKind::Integer(_) => write!(f, "INTEGER"),
TokenKind::Float(_) => write!(f, "FLOAT"),
TokenKind::Char(_) => write!(f, "CHAR"),
TokenKind::True => write!(f, "TRUE"),
TokenKind::False => write!(f, "FALSE"),
TokenKind::String(_) => write!(f, "STRING"),
Expand Down
Loading

0 comments on commit 948e4aa

Please sign in to comment.