@@ -305,18 +305,32 @@ public PathSegmentIterator(int length) =>
305305 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
306306 public ( int start , int final ) GetNext ( ref char source , MatchFlags flags )
307307 {
308+ //
309+ // Number of bits per char (ushort) in the MoveMask output
310+ //
311+ const uint BitsPerChar = 0b11 ;
312+
308313 var start = _last + 1 ;
309314
310315 while ( _position < _length )
311316 {
312- if ( Avx2 . IsSupported && _mask != 0 )
317+ if ( ( Avx2 . IsSupported || Sse2 . IsSupported ) && _mask != 0 )
313318 {
314319 var offset = BitOperations . TrailingZeroCount ( _mask ) ;
315320 _last = _position + ( nint ) ( ( uint ) offset >> 1 ) ;
316- _mask &= ~ ( 3u << offset ) ;
317321
322+ //
323+ // Clear the bits for the current separator to process the next position in the mask
324+ //
325+ _mask &= ~ ( BitsPerChar << offset ) ;
326+
327+ //
328+ // Advance position to the next chunk when no separators remain in the mask
329+ //
318330 if ( _mask == 0 )
319- _position += Vector256 < ushort > . Count ;
331+ _position += Avx2 . IsSupported
332+ ? Vector256 < ushort > . Count
333+ : Vector128 < ushort > . Count ;
320334
321335 return ( ( int ) start , ( int ) _last ) ;
322336 }
@@ -334,10 +348,47 @@ public PathSegmentIterator(int length) =>
334348 allowEscapingMask ,
335349 Avx2 . CompareEqual ( chunk , backslash ) ) ) ;
336350
351+ //
352+ // Store the comparison bitmask and reuse it across iterations
353+ // as long as it contains non-zero bits.
354+ // This avoids reloading SIMD registers and repeating comparisons
355+ // on the same chunk of data.
356+ //
337357 _mask = ( uint ) Avx2 . MoveMask ( comparison . AsByte ( ) ) ;
358+
359+ //
360+ // Advance position to the next chunk when no separators found
361+ //
338362 if ( _mask == 0 )
339363 _position += Vector256 < ushort > . Count ;
340364 }
365+ else if ( Sse2 . IsSupported && ! Avx2 . IsSupported && _position + Vector128 < ushort > . Count <= _length )
366+ {
367+ var chunk = LoadVector128 ( ref source , _position ) ;
368+ var allowEscapingMask = CreateAllowEscaping128Bitmask ( flags ) ;
369+ var slash = Vector128 . Create ( ( ushort ) '/' ) ;
370+ var backslash = Vector128 . Create ( ( ushort ) '\\ ' ) ;
371+
372+ var comparison = Sse2 . Or (
373+ Sse2 . CompareEqual ( chunk , slash ) ,
374+ Sse2 . AndNot (
375+ allowEscapingMask ,
376+ Sse2 . CompareEqual ( chunk , backslash ) ) ) ;
377+
378+ //
379+ // Store the comparison bitmask and reuse it across iterations
380+ // as long as it contains non-zero bits.
381+ // This avoids reloading SIMD registers and repeating comparisons
382+ // on the same chunk of data.
383+ //
384+ _mask = ( uint ) Sse2 . MoveMask ( comparison . AsByte ( ) ) ;
385+
386+ //
387+ // Advance position to the next chunk when no separators found
388+ //
389+ if ( _mask == 0 )
390+ _position += Vector128 < ushort > . Count ;
391+ }
341392 else
342393 {
343394 for ( ; _position < _length ; _position ++ )
0 commit comments