Skip to content

Commit 347a972

Browse files
committed
Refactor MathExpressionTokenizer for improved token parsing and add tests for operator and brace tokens
1 parent 7a73772 commit 347a972

1 file changed

Lines changed: 87 additions & 36 deletions

File tree

  • calculator/src/math_expression_tokenizer

calculator/src/math_expression_tokenizer/mod.rs

Lines changed: 87 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use thiserror::Error;
22

3-
#[derive(Debug)]
3+
#[derive(Debug, PartialEq)]
44
pub enum Token {
55
Digit(f64),
66
Operator(char),
@@ -32,7 +32,7 @@ pub trait TokenizerTraits {
3232
impl TokenizerTraits for MathExpressionTokenizer {
3333
fn has_token(&self) -> bool {
3434
let idx = self.skip_spaces();
35-
return idx < self.expr.as_bytes().len();
35+
return idx < self.expr.len();
3636
}
3737

3838
fn curr_index(&self) -> usize {
@@ -46,21 +46,22 @@ impl TokenizerTraits for MathExpressionTokenizer {
4646

4747
self.curr_idx = self.skip_spaces();
4848

49-
match self.expr.as_bytes()[self.curr_idx] {
50-
b'(' => {
49+
match self.expr.chars().nth(self.curr_idx).unwrap() {
50+
'(' => {
5151
self.curr_idx += 1;
5252
Ok((Token::OpenBrace, self.curr_idx - 1))
5353
}
54-
b')' => {
54+
')' => {
5555
self.curr_idx += 1;
5656
Ok((Token::CloseBrace, self.curr_idx - 1))
5757
}
58-
op @ (b'+' | b'-' | b'*' | b'/') => {
58+
op @ ('+' | '-' | '*' | '/') => {
5959
self.curr_idx += 1;
60-
Ok((Token::Operator(op as char), self.curr_idx - 1))
60+
Ok((Token::Operator(op), self.curr_idx - 1))
6161
}
6262
_ => {
63-
let (digit, idx) = self.parse_digits()?;
63+
let (digit, mut idx) = self.parse_digits()?;
64+
std::mem::swap(&mut self.curr_idx, &mut idx);
6465
Ok((Token::Digit(digit), idx))
6566
}
6667
}
@@ -79,42 +80,30 @@ impl MathExpressionTokenizer {
7980
})
8081
}
8182

82-
fn parse_digits(&mut self) -> Result<(f64, usize), MathExpressionTokenizerError> {
83-
let mut tmp = String::new();
84-
let bytes = self.expr.as_bytes();
83+
fn parse_digits(&self) -> Result<(f64, usize), MathExpressionTokenizerError> {
84+
let s = &self.expr[self.curr_idx..];
8585

86-
let begin = self.curr_idx;
86+
let offset = s
87+
.char_indices()
88+
.find(|&(_, ch)| !ch.is_digit(10) && ch != '.')
89+
.map(|(i, _)| i)
90+
.unwrap_or(s.len());
8791

88-
while self.curr_idx < bytes.len()
89-
&& (bytes[self.curr_idx].is_ascii_digit() || bytes[self.curr_idx] == b'.')
90-
{
91-
tmp.push(bytes[self.curr_idx] as char);
92-
93-
self.curr_idx += 1;
94-
}
95-
96-
match tmp.parse::<f64>() {
97-
Ok(number) => Ok((number, begin)),
92+
match s[..offset].parse::<f64>() {
93+
Ok(number) => Ok((number, self.curr_idx + offset)),
9894
Err(_) => Err(MathExpressionTokenizerError::InvalidToken {
99-
idx: begin,
100-
ch: bytes[begin] as char,
95+
idx: self.curr_idx,
96+
ch: s.chars().nth(0).unwrap(),
10197
}),
10298
}
10399
}
104100

105101
fn skip_spaces(&self) -> usize {
106-
if let Some(idx) = self.expr.as_bytes()[self.curr_idx..]
107-
.iter()
108-
.position(|x| !x.is_ascii_whitespace())
109-
{
110-
return if self.curr_idx + idx < self.expr.bytes().len() {
111-
self.curr_idx + idx
112-
} else {
113-
self.expr.as_bytes().len()
114-
};
115-
};
116-
117-
self.expr.as_bytes().len()
102+
self.expr[self.curr_idx..]
103+
.char_indices()
104+
.position(|(_, char)| !char.is_whitespace())
105+
.map(|idx| self.curr_idx + idx)
106+
.unwrap_or(self.expr.len())
118107
}
119108
}
120109

@@ -195,5 +184,67 @@ mod tests {
195184
panic!("Expected Token::Digit, got {:?}", token);
196185
}
197186
}
187+
188+
#[test]
189+
fn test_valid_operator_tokens(s in r"[+\-*/ ]{1,50}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) {
190+
let mut tokenizer = MathExpressionTokenizer::new(s.clone()).unwrap();
191+
assert!(tokenizer.has_token());
192+
193+
while let Ok((token, idx)) = tokenizer.next_token() {
194+
let op = s.chars().nth(idx).unwrap();
195+
assert_eq!(token, Token::Operator(op));
196+
}
197+
198+
assert!(!tokenizer.has_token());
199+
}
200+
201+
#[test]
202+
fn test_braces_tokens(s in r"[() ]{1,50}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) {
203+
let mut tokenizer = MathExpressionTokenizer::new(s.clone()).unwrap();
204+
assert!(tokenizer.has_token());
205+
206+
while let Ok((token, idx)) = tokenizer.next_token() {
207+
let op = s.chars().nth(idx).unwrap();
208+
if op == '('
209+
{
210+
assert_eq!(token, Token::OpenBrace);
211+
}
212+
else if op == ')'
213+
{
214+
assert_eq!(token, Token::CloseBrace);
215+
}
216+
}
217+
218+
assert!(!tokenizer.has_token());
219+
}
220+
221+
#[test]
222+
fn test_valid_sequence_tokens(s in r"[0-9+\-*/() ]{1,10}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) {
223+
let mut tokenizer = MathExpressionTokenizer::new(s.clone()).unwrap();
224+
assert!(tokenizer.has_token());
225+
226+
println!("Ch:{}",s);
227+
228+
while let Ok((token, idx)) = tokenizer.next_token() {
229+
let ch = s.chars().nth(idx).unwrap();
230+
println!("Ch: {} - {}", ch, s);
231+
match token {
232+
Token::OpenBrace => {
233+
assert_eq!(ch, '(');
234+
},
235+
Token::CloseBrace => {
236+
assert_eq!(ch, ')');
237+
},
238+
Token::Operator(op) => {
239+
assert_eq!(ch, op);
240+
},
241+
Token::Digit(_) => {
242+
assert!(ch.is_digit(10));
243+
},
244+
}
245+
}
246+
247+
assert!(!tokenizer.has_token());
248+
}
198249
}
199250
}

0 commit comments

Comments
 (0)