11use thiserror:: Error ;
22
3- #[ derive( Debug ) ]
3+ #[ derive( Debug , PartialEq ) ]
44pub enum Token {
55 Digit ( f64 ) ,
66 Operator ( char ) ,
@@ -32,7 +32,7 @@ pub trait TokenizerTraits {
3232impl TokenizerTraits for MathExpressionTokenizer {
3333 fn has_token ( & self ) -> bool {
3434 let idx = self . skip_spaces ( ) ;
35- return idx < self . expr . as_bytes ( ) . len ( ) ;
35+ return idx < self . expr . len ( ) ;
3636 }
3737
3838 fn curr_index ( & self ) -> usize {
@@ -46,21 +46,22 @@ impl TokenizerTraits for MathExpressionTokenizer {
4646
4747 self . curr_idx = self . skip_spaces ( ) ;
4848
49- match self . expr . as_bytes ( ) [ self . curr_idx ] {
50- b '(' => {
49+ match self . expr . chars ( ) . nth ( self . curr_idx ) . unwrap ( ) {
50+ '(' => {
5151 self . curr_idx += 1 ;
5252 Ok ( ( Token :: OpenBrace , self . curr_idx - 1 ) )
5353 }
54- b ')' => {
54+ ')' => {
5555 self . curr_idx += 1 ;
5656 Ok ( ( Token :: CloseBrace , self . curr_idx - 1 ) )
5757 }
58- op @ ( b '+' | b '-' | b '*' | b '/') => {
58+ op @ ( '+' | '-' | '*' | '/' ) => {
5959 self . curr_idx += 1 ;
60- Ok ( ( Token :: Operator ( op as char ) , self . curr_idx - 1 ) )
60+ Ok ( ( Token :: Operator ( op) , self . curr_idx - 1 ) )
6161 }
6262 _ => {
63- let ( digit, idx) = self . parse_digits ( ) ?;
63+ let ( digit, mut idx) = self . parse_digits ( ) ?;
64+ std:: mem:: swap ( & mut self . curr_idx , & mut idx) ;
6465 Ok ( ( Token :: Digit ( digit) , idx) )
6566 }
6667 }
@@ -79,42 +80,30 @@ impl MathExpressionTokenizer {
7980 } )
8081 }
8182
82- fn parse_digits ( & mut self ) -> Result < ( f64 , usize ) , MathExpressionTokenizerError > {
83- let mut tmp = String :: new ( ) ;
84- let bytes = self . expr . as_bytes ( ) ;
83+ fn parse_digits ( & self ) -> Result < ( f64 , usize ) , MathExpressionTokenizerError > {
84+ let s = & self . expr [ self . curr_idx ..] ;
8585
86- let begin = self . curr_idx ;
86+ let offset = s
87+ . char_indices ( )
88+ . find ( |& ( _, ch) | !ch. is_digit ( 10 ) && ch != '.' )
89+ . map ( |( i, _) | i)
90+ . unwrap_or ( s. len ( ) ) ;
8791
88- while self . curr_idx < bytes. len ( )
89- && ( bytes[ self . curr_idx ] . is_ascii_digit ( ) || bytes[ self . curr_idx ] == b'.' )
90- {
91- tmp. push ( bytes[ self . curr_idx ] as char ) ;
92-
93- self . curr_idx += 1 ;
94- }
95-
96- match tmp. parse :: < f64 > ( ) {
97- Ok ( number) => Ok ( ( number, begin) ) ,
92+ match s[ ..offset] . parse :: < f64 > ( ) {
93+ Ok ( number) => Ok ( ( number, self . curr_idx + offset) ) ,
9894 Err ( _) => Err ( MathExpressionTokenizerError :: InvalidToken {
99- idx : begin ,
100- ch : bytes [ begin ] as char ,
95+ idx : self . curr_idx ,
96+ ch : s . chars ( ) . nth ( 0 ) . unwrap ( ) ,
10197 } ) ,
10298 }
10399 }
104100
105101 fn skip_spaces ( & self ) -> usize {
106- if let Some ( idx) = self . expr . as_bytes ( ) [ self . curr_idx ..]
107- . iter ( )
108- . position ( |x| !x. is_ascii_whitespace ( ) )
109- {
110- return if self . curr_idx + idx < self . expr . bytes ( ) . len ( ) {
111- self . curr_idx + idx
112- } else {
113- self . expr . as_bytes ( ) . len ( )
114- } ;
115- } ;
116-
117- self . expr . as_bytes ( ) . len ( )
102+ self . expr [ self . curr_idx ..]
103+ . char_indices ( )
104+ . position ( |( _, char) | !char. is_whitespace ( ) )
105+ . map ( |idx| self . curr_idx + idx)
106+ . unwrap_or ( self . expr . len ( ) )
118107 }
119108}
120109
@@ -195,5 +184,67 @@ mod tests {
195184 panic!( "Expected Token::Digit, got {:?}" , token) ;
196185 }
197186 }
187+
188+ #[ test]
189+ fn test_valid_operator_tokens( s in r"[+\-*/ ]{1,50}" . prop_filter( "no leading space" , |s| !s. starts_with( char :: is_whitespace) ) ) {
190+ let mut tokenizer = MathExpressionTokenizer :: new( s. clone( ) ) . unwrap( ) ;
191+ assert!( tokenizer. has_token( ) ) ;
192+
193+ while let Ok ( ( token, idx) ) = tokenizer. next_token( ) {
194+ let op = s. chars( ) . nth( idx) . unwrap( ) ;
195+ assert_eq!( token, Token :: Operator ( op) ) ;
196+ }
197+
198+ assert!( !tokenizer. has_token( ) ) ;
199+ }
200+
201+ #[ test]
202+ fn test_braces_tokens( s in r"[() ]{1,50}" . prop_filter( "no leading space" , |s| !s. starts_with( char :: is_whitespace) ) ) {
203+ let mut tokenizer = MathExpressionTokenizer :: new( s. clone( ) ) . unwrap( ) ;
204+ assert!( tokenizer. has_token( ) ) ;
205+
206+ while let Ok ( ( token, idx) ) = tokenizer. next_token( ) {
207+ let op = s. chars( ) . nth( idx) . unwrap( ) ;
208+ if op == '('
209+ {
210+ assert_eq!( token, Token :: OpenBrace ) ;
211+ }
212+ else if op == ')'
213+ {
214+ assert_eq!( token, Token :: CloseBrace ) ;
215+ }
216+ }
217+
218+ assert!( !tokenizer. has_token( ) ) ;
219+ }
220+
221+ #[ test]
222+ fn test_valid_sequence_tokens( s in r"[0-9+\-*/() ]{1,10}" . prop_filter( "no leading space" , |s| !s. starts_with( char :: is_whitespace) ) ) {
223+ let mut tokenizer = MathExpressionTokenizer :: new( s. clone( ) ) . unwrap( ) ;
224+ assert!( tokenizer. has_token( ) ) ;
225+
226+ println!( "Ch:{}" , s) ;
227+
228+ while let Ok ( ( token, idx) ) = tokenizer. next_token( ) {
229+ let ch = s. chars( ) . nth( idx) . unwrap( ) ;
230+ println!( "Ch: {} - {}" , ch, s) ;
231+ match token {
232+ Token :: OpenBrace => {
233+ assert_eq!( ch, '(' ) ;
234+ } ,
235+ Token :: CloseBrace => {
236+ assert_eq!( ch, ')' ) ;
237+ } ,
238+ Token :: Operator ( op) => {
239+ assert_eq!( ch, op) ;
240+ } ,
241+ Token :: Digit ( _) => {
242+ assert!( ch. is_digit( 10 ) ) ;
243+ } ,
244+ }
245+ }
246+
247+ assert!( !tokenizer. has_token( ) ) ;
248+ }
198249 }
199250}
0 commit comments