Skip to content

Commit ac725fc

Browse files
committed
Bug fixes, panic!-less Lexer and ParseError implements Error
Fixed bugs: - Str literals would ignore first character and instead use `"` - Identifiers would assume a length of 2, added warning for 1 character identifiers Removed every instance of `panic!` from `lexer.rs`, instead passes a LexError back everywhere Made ParseError implement the Display and Error traits, similar to LexError
1 parent 2275b64 commit ac725fc

4 files changed

Lines changed: 96 additions & 19 deletions

File tree

src/lexer.rs

Lines changed: 65 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::fmt::Display;
22
use std::error::Error;
3+
use std::num::{IntErrorKind, ParseFloatError, ParseIntError};
34

45
#[derive(Debug)]
56
pub struct LexError {
@@ -14,6 +15,29 @@ impl LexError {
1415
reason,
1516
})
1617
}
18+
19+
fn from_parse<T>(pos: (usize, usize), value: Result<T, ParseIntError>) -> Result<T, LexError> {
20+
if let Err(e) = value {
21+
match e.kind() {
22+
IntErrorKind::PosOverflow | IntErrorKind::NegOverflow => return LexError::new(pos, LexErrorReason::LiteralOutOfRange),
23+
_ => return LexError::new(pos, LexErrorReason::UnexpectedCharacter)
24+
}
25+
} else if let Ok(v) = value {
26+
return Ok(v)
27+
} else {
28+
return Err(LexError { pos, reason: LexErrorReason::UnexpectedCharacter })
29+
}
30+
}
31+
32+
fn from_parsef<T>(pos: (usize, usize), value: Result<T, ParseFloatError>) -> Result<T, LexError> {
33+
if let Err(_) = value {
34+
return LexError::new(pos, LexErrorReason::LiteralOutOfRange)
35+
} else if let Ok(v) = value {
36+
return Ok(v)
37+
} else {
38+
return LexError::new(pos, LexErrorReason::UnexpectedCharacter)
39+
}
40+
}
1741
}
1842

1943
impl Display for LexError {
@@ -32,7 +56,9 @@ enum LexErrorReason {
3256
InvalidIntegerType,
3357
InvalidFloatingPointType,
3458
MisspelledType,
59+
LiteralOutOfRange,
3560
UnexpectedCharacter,
61+
InvalidEscapeSequence,
3662
}
3763

3864
#[allow(dead_code)]
@@ -238,10 +264,12 @@ pub fn lex_string(inp_str: String) -> Result<Vec<DataToken>, LexError> {
238264
char_idx += 1;
239265
if chars.peek() == Some(&'1') { chars.next(); if chars.next() == Some('6') {
240266
char_idx += 2;
241-
tokens.push(DataToken::new(Token::LitI16(i16::from_str_radix(&buf, 16).expect(&format!("i16 literal likely out of range at {:?}", pos))), pos));
267+
let v = LexError::from_parse(pos, i16::from_str_radix(&buf, 16))?;
268+
tokens.push(DataToken::new(Token::LitI16(v), pos));
242269
}} else if chars.peek() == Some(&'3') { chars.next(); if chars.next() == Some('2') {
243270
char_idx += 2;
244-
tokens.push(DataToken::new(Token::LitI32(i32::from_str_radix(&buf, 16).expect(&format!("i32 literal likely out of range at {:?}", pos))), pos));
271+
let v = LexError::from_parse(pos, i32::from_str_radix(&buf, 16))?;
272+
tokens.push(DataToken::new(Token::LitI32(v), pos));
245273
}} else {
246274
return LexError::new(pos, LexErrorReason::InvalidIntegerType)
247275
}
@@ -251,7 +279,8 @@ pub fn lex_string(inp_str: String) -> Result<Vec<DataToken>, LexError> {
251279
char_idx += 1;
252280
if !(chars.next() == Some('y') && chars.next() == Some('t') && chars.next() == Some('e')) {return LexError::new((line_idx, char_idx), LexErrorReason::MisspelledType)}
253281
char_idx += 3;
254-
tokens.push(DataToken::new(Token::LitByte(u8::from_str_radix(&buf, 16).expect(&format!("Byte literal likely out of range at {:?}", pos))), pos));
282+
let v = LexError::from_parse(pos, u8::from_str_radix(&buf, 16))?;
283+
tokens.push(DataToken::new(Token::LitByte(v), pos));
255284
break
256285
}
257286
}
@@ -265,10 +294,12 @@ pub fn lex_string(inp_str: String) -> Result<Vec<DataToken>, LexError> {
265294
char_idx += 1;
266295
if chars.peek() == Some(&'1') { chars.next(); if chars.next() == Some('6') {
267296
char_idx += 2;
268-
tokens.push(DataToken::new(Token::LitI16(i16::from_str_radix(&buf, 10).expect(&format!("i16 literal likely out of range at {:?}", pos))), pos));
297+
let v = LexError::from_parse(pos, i16::from_str_radix(&buf, 10))?;
298+
tokens.push(DataToken::new(Token::LitI16(v), pos));
269299
}} else if chars.peek() == Some(&'3') { chars.next(); if chars.next() == Some('2') {
270300
char_idx += 2;
271-
tokens.push(DataToken::new(Token::LitI32(i32::from_str_radix(&buf, 10).expect(&format!("i32 literal likely out of range at {:?}", pos))), pos));
301+
let v = LexError::from_parse(pos, i32::from_str_radix(&buf, 10))?;
302+
tokens.push(DataToken::new(Token::LitI32(v), pos));
272303
}} else {
273304
return LexError::new(pos, LexErrorReason::InvalidIntegerType)
274305
}
@@ -278,10 +309,12 @@ pub fn lex_string(inp_str: String) -> Result<Vec<DataToken>, LexError> {
278309
char_idx += 1;
279310
if chars.peek() == Some(&'3') { chars.next(); if chars.next() == Some('3') {
280311
char_idx += 2;
281-
tokens.push(DataToken::new(Token::LitF32(buf.parse::<f32>().expect(&format!("f32 literal likely out of range at {:?}", pos))), pos));
312+
let v = LexError::from_parsef(pos, buf.parse::<f32>())?;
313+
tokens.push(DataToken::new(Token::LitF32(v), pos));
282314
}} else if chars.peek() == Some(&'6') { chars.next(); if chars.next() == Some('4') {
283315
char_idx += 2;
284-
tokens.push(DataToken::new(Token::LitF64(buf.parse::<f64>().expect(&format!("f64 literal likely out of range at {:?}", pos))), pos));
316+
let v = LexError::from_parsef(pos, buf.parse::<f64>())?;
317+
tokens.push(DataToken::new(Token::LitF64(v), pos));
285318
}} else {
286319
return LexError::new(pos, LexErrorReason::InvalidFloatingPointType)
287320
}
@@ -291,7 +324,8 @@ pub fn lex_string(inp_str: String) -> Result<Vec<DataToken>, LexError> {
291324
char_idx += 1;
292325
if !(chars.next() == Some('y') && chars.next() == Some('t') && chars.next() == Some('e')) {return LexError::new((line_idx, char_idx), LexErrorReason::MisspelledType)}
293326
char_idx += 3;
294-
tokens.push(DataToken::new(Token::LitByte(u8::from_str_radix(&buf, 10).expect(&format!("Byte literal likely out of range at {:?}", pos))), pos));
327+
let v = LexError::from_parse(pos, u8::from_str_radix(&buf, 10))?;
328+
tokens.push(DataToken::new(Token::LitByte(v), pos));
295329
continue
296330
}
297331
}
@@ -315,10 +349,12 @@ pub fn lex_string(inp_str: String) -> Result<Vec<DataToken>, LexError> {
315349
char_idx += 1;
316350
if chars.peek() == Some(&'1') { chars.next(); if chars.next() == Some('6') {
317351
char_idx += 2;
318-
tokens.push(DataToken::new(Token::LitI16(i16::from_str_radix(&buf, 10).expect(&format!("i16 literal likely out of range at {:?}", pos))), pos));
352+
let v = LexError::from_parse(pos, i16::from_str_radix(&buf, 10))?;
353+
tokens.push(DataToken::new(Token::LitI16(v), pos));
319354
}} else if chars.peek() == Some(&'3') { chars.next(); if chars.next() == Some('2') {
320355
char_idx += 2;
321-
tokens.push(DataToken::new(Token::LitI32(i32::from_str_radix(&buf, 10).expect(&format!("i32 literal likely out of range at {:?}", pos))), pos));
356+
let v = LexError::from_parse(pos, i32::from_str_radix(&buf, 10))?;
357+
tokens.push(DataToken::new(Token::LitI32(v), pos));
322358
}} else {
323359
return LexError::new(pos, LexErrorReason::InvalidIntegerType)
324360
}
@@ -328,10 +364,12 @@ pub fn lex_string(inp_str: String) -> Result<Vec<DataToken>, LexError> {
328364
char_idx += 1;
329365
if chars.peek() == Some(&'3') { chars.next(); if chars.next() == Some('3') {
330366
char_idx += 2;
331-
tokens.push(DataToken::new(Token::LitF32(buf.parse::<f32>().expect(&format!("f32 literal likely out of range at {:?}", pos))), pos));
367+
let v = LexError::from_parsef(pos, buf.parse::<f32>())?;
368+
tokens.push(DataToken::new(Token::LitF32(v), pos));
332369
}} else if chars.peek() == Some(&'6') { chars.next(); if chars.next() == Some('4') {
333370
char_idx += 2;
334-
tokens.push(DataToken::new(Token::LitF64(buf.parse::<f64>().expect(&format!("f64 literal likely out of range at {:?}", pos))), pos));
371+
let v = LexError::from_parsef(pos, buf.parse::<f64>())?;
372+
tokens.push(DataToken::new(Token::LitF64(v), pos));
335373
}} else {
336374
return LexError::new(pos, LexErrorReason::InvalidFloatingPointType)
337375
}
@@ -341,16 +379,16 @@ pub fn lex_string(inp_str: String) -> Result<Vec<DataToken>, LexError> {
341379
char_idx += 1;
342380
if !(chars.next() == Some('y') && chars.next() == Some('t') && chars.next() == Some('e')) {return LexError::new((line_idx, char_idx), LexErrorReason::MisspelledType)}
343381
char_idx += 3;
344-
tokens.push(DataToken::new(Token::LitByte(u8::from_str_radix(&buf, 10).expect(&format!("Byte literal likely out of range at {:?}", pos))), pos));
382+
let v = LexError::from_parse(pos, u8::from_str_radix(&buf, 10))?;
383+
tokens.push(DataToken::new(Token::LitByte(v), pos));
345384
break
346385
}
347386
}
348387
}
349388
},
350-
'"' => {
351-
chars.next();
389+
'"' => if let Some(cr) = chars.next() {
352390
char_idx += 1;
353-
let mut buf = String::from(c);
391+
let mut buf = String::from(cr);
354392
while let Some(cs) = chars.next() {
355393
char_idx += 1;
356394
match cs {
@@ -369,7 +407,7 @@ pub fn lex_string(inp_str: String) -> Result<Vec<DataToken>, LexError> {
369407
Some('"') => buf.push('"'),
370408
Some('\\') => buf.push('\\'),
371409
Some('3') => buf.push('\x03'),
372-
_ => panic!("Invalid escape sequence at {:?}", (line_idx, char_idx))
410+
_ => return LexError::new((line_idx, char_idx), LexErrorReason::InvalidEscapeSequence),
373411
}
374412
}
375413
_ => buf.push(cs)
@@ -379,6 +417,16 @@ pub fn lex_string(inp_str: String) -> Result<Vec<DataToken>, LexError> {
379417
},
380418
'a'..='z' | 'A'..='Z' | '_' => {
381419
let mut buf = String::from(c);
420+
if let Some(cs) = chars.peek() {
421+
match cs {
422+
&('a'..='z') | &('A'..='Z') | &('0'..='9') | &'_' => {},
423+
_ => {
424+
super::LOG.explicit(&format!("Detected single character literal (consider using a more descriptive name) at: {:?}", pos));
425+
tokens.push(DataToken::new(Token::Identifier(c.to_string()), pos));
426+
continue
427+
}
428+
}
429+
}
382430
while let Some(cs) = chars.next() {
383431
char_idx += 1;
384432
buf.push(cs);

src/main.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,19 @@ fn main() {
8787
}
8888
};
8989

90-
LOG.debug(&format!("Lexed:\n\n{:?}", &tokenstream));
90+
LOG.debug(&format!("Lexed:\n\n{:?}\n", &tokenstream));
91+
92+
std::process::exit(0);
93+
94+
let ast = match parser::parse_tokens(tokenstream) {
95+
Ok(ast) => ast,
96+
Err(e) => {
97+
LOG.surface(&format!("{e}"));
98+
std::process::exit(1);
99+
}
100+
};
101+
102+
// LOG.debug(&format!("Parsed:\n\n{:?}\n", &ast));
91103

92104
todo!("AST parsing!");
93105
}

src/parser/mod.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,33 @@
1+
use std::fmt::{write, Display};
2+
use std::error::Error;
3+
14
mod parsertypes;
25
use parsertypes::*;
36
use super::lexer;
47

8+
#[derive(Debug)]
59
pub struct ParseError {
610
token: lexer::DataToken,
711
reason: ParseErrorReason,
812
}
913

14+
impl Display for ParseError {
15+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
16+
write!(f, "Parsing error at token {:?}: {:?}", self.token, self.reason)
17+
}
18+
}
19+
20+
impl Error for ParseError {}
21+
22+
#[derive(Debug)]
1023
enum ParseErrorReason {
1124

1225
}
1326

27+
pub fn parse_tokens(tokens: Vec<lexer::DataToken>) -> Result<Vec<Statement>, ParseError> {
28+
todo!()
29+
}
30+
1431
impl Expr {
1532
fn parse(tokens: Vec<lexer::DataToken>, ptr: &mut usize) -> Result<Self, ParseError> {
1633
todo!()

src/parser/parsertypes.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ pub struct MemberExpr {
103103
member: Identifier,
104104
}
105105

106-
enum FlowExpr {
106+
pub enum FlowExpr {
107107
Return {
108108
expression: Expr,
109109
},

0 commit comments

Comments
 (0)