From 1f1dc073db02dcc39418423a98b4627a85b9100f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Tvrd=C3=ADk?= Date: Tue, 24 Feb 2026 08:21:45 +0100 Subject: [PATCH] Change empty-match exit from break 2 to break and add regression test The break 2 was harder to reason about and could incorrectly abort parsing when an empty match occurred mid-buffer with more stream data available. With break (single), the iterator loads the next chunk, which can resolve the empty match into a positive-length one (e.g. when a fixed-width alternative needs more characters than remain in the current buffer). --- src/PatternIterator.php | 2 +- tests/cases/PatternIteratorTest.phpt | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/PatternIterator.php b/src/PatternIterator.php index a48ba86..788239c 100644 --- a/src/PatternIterator.php +++ b/src/PatternIterator.php @@ -82,7 +82,7 @@ public function getIterator(): Iterator } if (strlen($matches[0]) === 0) { - break 2; + break; } yield $matches; diff --git a/tests/cases/PatternIteratorTest.phpt b/tests/cases/PatternIteratorTest.phpt index e7b34e9..7694ac3 100644 --- a/tests/cases/PatternIteratorTest.phpt +++ b/tests/cases/PatternIteratorTest.phpt @@ -524,6 +524,20 @@ class PatternIteratorTest extends TestCase } + public function testZeroLengthMatchMidBufferLoadsMoreData(): void + { + // Pattern matches exactly 3 chars, or empty. When only 1 char remains in the + // buffer but the stream has more data, the empty match mid-buffer must not abort; + // loading the next chunk makes the 3-char alternative succeed. + $iter = new PatternIterator($this->stream('abXY', 'Zmore'), '~.{3}|~A'); + $results = $this->collect($iter); + Assert::count(3, $results); + Assert::same('abX', $results[0][0]); + Assert::same('YZm', $results[1][0]); + Assert::same('ore', $results[2][0]); + } + + public function testZeroLengthMatchAcrossChunks(): void { $pattern = '~\s*(?:(?[^;]+);|\z)~As';