Skip to content

Commit 2275b64

Browse files
committed
Parsing boilerplate
Beginning parsing, boilerplate is now done (I hope). Some bug fixes, and proper error handling (mostly) so `-v 0` actually outputs nothing (except an error code).
1 parent d2ea888 commit 2275b64

4 files changed

Lines changed: 303 additions & 17 deletions

File tree

src/lexer.rs

Lines changed: 53 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,40 @@
1+
use std::fmt::Display;
2+
use std::error::Error;
3+
4+
#[derive(Debug)]
5+
pub struct LexError {
6+
pos: (usize, usize),
7+
reason: LexErrorReason,
8+
}
9+
10+
impl LexError {
11+
fn new<T>(pos: (usize, usize), reason: LexErrorReason) -> Result<T, LexError> {
12+
Err(LexError {
13+
pos,
14+
reason,
15+
})
16+
}
17+
}
18+
19+
impl Display for LexError {
20+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21+
write!(f, "Lexing error at {:?}: {:?}", self.pos, self.reason)
22+
}
23+
}
24+
25+
impl Error for LexError {}
26+
27+
#[derive(Debug)]
28+
enum LexErrorReason {
29+
HexadecimalFloat,
30+
LiteralTypeSpecifierMissing,
31+
IdentifierStartsWithNumber,
32+
InvalidIntegerType,
33+
InvalidFloatingPointType,
34+
MisspelledType,
35+
UnexpectedCharacter,
36+
}
37+
138
#[allow(dead_code)]
239
#[derive(Debug)]
340
pub enum Token {
@@ -70,7 +107,7 @@ impl DataToken {
70107
}
71108
}
72109

73-
pub fn lex_string(inp_str: String) -> Vec<DataToken> {
110+
pub fn lex_string(inp_str: String) -> Result<Vec<DataToken>, LexError> {
74111
let mut tokens = Vec::new();
75112

76113
let mut chars = inp_str.chars().peekable();
@@ -188,11 +225,11 @@ pub fn lex_string(inp_str: String) -> Vec<DataToken> {
188225
buf.push(cs)
189226
} else if cs == '_' {
190227
} else if cs == '.' {
191-
panic!("No float literals may be prefixed with `0x`, error at {:?}", (line_idx, char_idx))
228+
return LexError::new((line_idx, char_idx), LexErrorReason::HexadecimalFloat)
192229
} else if cs == ';' {
193-
panic!("You need to specify a hexadecimal compatible literal type, try appending `byte`, `i16`, `i32`, error at {:?}", pos)
230+
return LexError::new(pos, LexErrorReason::LiteralTypeSpecifierMissing)
194231
} else {
195-
panic!("`0x` is reserved for hexadecimal numbers, error at {:?}", pos)
232+
return LexError::new(pos, LexErrorReason::IdentifierStartsWithNumber)
196233
}
197234

198235
if let Some(ct) = chars.peek() {
@@ -206,13 +243,13 @@ pub fn lex_string(inp_str: String) -> Vec<DataToken> {
206243
char_idx += 2;
207244
tokens.push(DataToken::new(Token::LitI32(i32::from_str_radix(&buf, 16).expect(&format!("i32 literal likely out of range at {:?}", pos))), pos));
208245
}} else {
209-
panic!("integer literal types are `i16` and `i32`, error at {:?}", (line_idx, char_idx-1))
246+
return LexError::new(pos, LexErrorReason::InvalidIntegerType)
210247
}
211248
break
212249
} else if ct == &'b' {
213250
chars.next();
214251
char_idx += 1;
215-
if !(chars.next() == Some('y') && chars.next() == Some('t') && chars.next() == Some('e')) {panic!("You might have meant `byte` at {:?}", (line_idx, char_idx))}
252+
if !(chars.next() == Some('y') && chars.next() == Some('t') && chars.next() == Some('e')) {return LexError::new((line_idx, char_idx), LexErrorReason::MisspelledType)}
216253
char_idx += 3;
217254
tokens.push(DataToken::new(Token::LitByte(u8::from_str_radix(&buf, 16).expect(&format!("Byte literal likely out of range at {:?}", pos))), pos));
218255
break
@@ -233,7 +270,7 @@ pub fn lex_string(inp_str: String) -> Vec<DataToken> {
233270
char_idx += 2;
234271
tokens.push(DataToken::new(Token::LitI32(i32::from_str_radix(&buf, 10).expect(&format!("i32 literal likely out of range at {:?}", pos))), pos));
235272
}} else {
236-
panic!("integer literal types are `i16` and `i32`, error at {:?}", (line_idx, char_idx-1))
273+
return LexError::new(pos, LexErrorReason::InvalidIntegerType)
237274
}
238275
continue
239276
} else if ct == &'f' {
@@ -246,13 +283,13 @@ pub fn lex_string(inp_str: String) -> Vec<DataToken> {
246283
char_idx += 2;
247284
tokens.push(DataToken::new(Token::LitF64(buf.parse::<f64>().expect(&format!("f64 literal likely out of range at {:?}", pos))), pos));
248285
}} else {
249-
panic!("floating point literal types are `f32` and `i64`, error at {:?}", (line_idx, char_idx-1))
286+
return LexError::new(pos, LexErrorReason::InvalidFloatingPointType)
250287
}
251288
continue
252289
} else if ct == &'b' {
253290
chars.next();
254291
char_idx += 1;
255-
if !(chars.next() == Some('y') && chars.next() == Some('t') && chars.next() == Some('e')) {panic!("You might have meant `byte` at {:?}", (line_idx, char_idx))}
292+
if !(chars.next() == Some('y') && chars.next() == Some('t') && chars.next() == Some('e')) {return LexError::new((line_idx, char_idx), LexErrorReason::MisspelledType)}
256293
char_idx += 3;
257294
tokens.push(DataToken::new(Token::LitByte(u8::from_str_radix(&buf, 10).expect(&format!("Byte literal likely out of range at {:?}", pos))), pos));
258295
continue
@@ -267,9 +304,9 @@ pub fn lex_string(inp_str: String) -> Vec<DataToken> {
267304
} else if cs == '.' {
268305
buf.push('.')
269306
} else if cs == ';' {
270-
panic!("You need to specify literal types, try appending `byte`, `i16`, `i32`, `f32` or `f64`, error at {:?}", pos)
307+
return LexError::new(pos, LexErrorReason::LiteralTypeSpecifierMissing)
271308
} else {
272-
panic!("You cannot prefix an identifier with a number, error at: {:?}", pos)
309+
return LexError::new(pos, LexErrorReason::IdentifierStartsWithNumber)
273310
}
274311

275312
if let Some(ct) = chars.peek() {
@@ -283,7 +320,7 @@ pub fn lex_string(inp_str: String) -> Vec<DataToken> {
283320
char_idx += 2;
284321
tokens.push(DataToken::new(Token::LitI32(i32::from_str_radix(&buf, 10).expect(&format!("i32 literal likely out of range at {:?}", pos))), pos));
285322
}} else {
286-
panic!("integer literal types are `i16` and `i32`, error at {:?}", (line_idx, char_idx-1))
323+
return LexError::new(pos, LexErrorReason::InvalidIntegerType)
287324
}
288325
break
289326
} else if ct == &'f' {
@@ -296,13 +333,13 @@ pub fn lex_string(inp_str: String) -> Vec<DataToken> {
296333
char_idx += 2;
297334
tokens.push(DataToken::new(Token::LitF64(buf.parse::<f64>().expect(&format!("f64 literal likely out of range at {:?}", pos))), pos));
298335
}} else {
299-
panic!("floating point literal types are `f32` and `i64`, error at {:?}", (line_idx, char_idx-1))
336+
return LexError::new(pos, LexErrorReason::InvalidFloatingPointType)
300337
}
301338
break
302339
} else if ct == &'b' {
303340
chars.next();
304341
char_idx += 1;
305-
if !(chars.next() == Some('y') && chars.next() == Some('t') && chars.next() == Some('e')) {panic!("You might have meant `byte` at {:?}", (line_idx, char_idx))}
342+
if !(chars.next() == Some('y') && chars.next() == Some('t') && chars.next() == Some('e')) {return LexError::new((line_idx, char_idx), LexErrorReason::MisspelledType)}
306343
char_idx += 3;
307344
tokens.push(DataToken::new(Token::LitByte(u8::from_str_radix(&buf, 10).expect(&format!("Byte literal likely out of range at {:?}", pos))), pos));
308345
break
@@ -377,9 +414,9 @@ pub fn lex_string(inp_str: String) -> Vec<DataToken> {
377414
_ => tokens.push(DataToken::new(Token::Identifier(buf), pos))
378415
}
379416
},
380-
_ => panic!("Error in input:\nUnexpected character (`{c}`) at {:?}", pos)
417+
_ => return LexError::new(pos, LexErrorReason::UnexpectedCharacter)
381418
}
382419
}
383420

384-
tokens
421+
Ok(tokens)
385422
}

src/main.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::fs;
66
use clap::Parser;
77

88
mod lexer;
9+
mod parser;
910

1011
/// A compiler for kLang
1112
#[derive(Debug, Parser)]
@@ -78,7 +79,13 @@ fn main() {
7879
LOG.debug("Got input:\n\n```kLang");
7980
LOG.debug(&format!("{inp_string}\n```\n"));
8081

81-
let tokenstream = lexer::lex_string(inp_string);
82+
let tokenstream = match lexer::lex_string(inp_string) {
83+
Ok(tks) => tks,
84+
Err(e) => {
85+
LOG.surface(&format!("{e}"));
86+
std::process::exit(1);
87+
}
88+
};
8289

8390
LOG.debug(&format!("Lexed:\n\n{:?}", &tokenstream));
8491

src/parser/mod.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
mod parsertypes;
2+
use parsertypes::*;
3+
use super::lexer;
4+
5+
pub struct ParseError {
6+
token: lexer::DataToken,
7+
reason: ParseErrorReason,
8+
}
9+
10+
enum ParseErrorReason {
11+
12+
}
13+
14+
impl Expr {
15+
fn parse(tokens: Vec<lexer::DataToken>, ptr: &mut usize) -> Result<Self, ParseError> {
16+
todo!()
17+
}
18+
}

0 commit comments

Comments
 (0)