diff --git a/.github/workflows/formats.yml b/.github/workflows/formats.yml new file mode 100644 index 0000000..20eeb2d --- /dev/null +++ b/.github/workflows/formats.yml @@ -0,0 +1,49 @@ +name: Formats + +on: ['push', 'pull_request'] + +jobs: + ci: + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: true + matrix: + os: [ubuntu-latest] + php: [8.4] + dependency-version: [prefer-lowest, prefer-stable] + + name: Formats P${{ matrix.php }} - ${{ matrix.os }} - ${{ matrix.dependency-version }} + + steps: + + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup PHP + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php }} + extensions: dom, mbstring, zip + coverage: pcov + + - name: Get Composer cache directory + id: composer-cache + shell: bash + run: echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT + + - name: Cache dependencies + uses: actions/cache@v3 + with: + path: ${{ steps.composer-cache.outputs.dir }} + key: dependencies-php-${{ matrix.php }}-os-${{ matrix.os }}-version-${{ matrix.dependency-version }}-composer-${{ hashFiles('composer.json') }} + restore-keys: dependencies-php-${{ matrix.php }}-os-${{ matrix.os }}-version-${{ matrix.dependency-version }}-composer- + + - name: Install Composer dependencies + run: composer update --${{ matrix.dependency-version }} --no-interaction --prefer-dist + + - name: Coding Style Checks + run: composer test:lint + + - name: Type Checks + run: composer test:types diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..7fb6b72 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,45 @@ +name: Tests + +on: ['push', 'pull_request'] + +jobs: + ci: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: true + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + php: ['8.3', '8.4'] + dependency-version: [prefer-lowest, prefer-stable] + + name: Tests P${{ matrix.php }} - ${{ matrix.os }} - ${{ matrix.dependency-version }} + + steps: + + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup PHP + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php }} + extensions: dom, mbstring, zip + coverage: xdebug + + - name: Get Composer cache directory + id: composer-cache + shell: bash + run: echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT + + - name: Cache dependencies + uses: actions/cache@v3 + with: + path: ${{ steps.composer-cache.outputs.dir }} + key: dependencies-php-${{ matrix.php }}-os-${{ matrix.os }}-version-${{ matrix.dependency-version }}-composer-${{ hashFiles('composer.json') }} + restore-keys: dependencies-php-${{ matrix.php }}-os-${{ matrix.os }}-version-${{ matrix.dependency-version }}-composer- + + - name: Install Composer dependencies + run: composer update --${{ matrix.dependency-version }} --no-interaction --prefer-dist + + - name: Integration Tests + run: php ./vendor/phpunit/phpunit/phpunit --testsuite default diff --git a/composer.json b/composer.json index 1a386b3..7209a3b 100755 --- a/composer.json +++ b/composer.json @@ -1,25 +1,61 @@ { - "name": "edgaras/strsim", - "description": "Collection of string similarity and distance algorithms in PHP including Levenshtein, Damerau-Levenshtein, Jaro-Winkler, and more", - "type": "library", - "homepage": "https://github.com/Edgaras0x4E/StrSim", - "license": "MIT", - "keywords": ["string", "similarity", "distance", "levenshtein", "damerau-levenshtein", "jaro", "jaro-winkler", "lcs", "smith-waterman", "needleman-wunsch", "cosine", "jaccard", "monge-elkan", "text", "fuzzy", "comparison", "algorithm", "multibyte"], - "autoload": { - "psr-4": { - "Edgaras\\StrSim\\": "src/" - } - }, - "autoload-dev": { - "psr-4": { - "Edgaras\\StrSim\\Tests\\": "tests/" - } - }, - "minimum-stability": "stable", - "require": { - "php": ">=8.3.0" - }, - "require-dev": { - "phpunit/phpunit": "^11.5" + "name": "edgaras/strsim", + "description": "Collection of string similarity and distance algorithms in PHP including Levenshtein, Damerau-Levenshtein, Jaro-Winkler, and more", + "type": "library", + "homepage": "https://github.com/Edgaras0x4E/StrSim", + "license": "MIT", + "keywords": [ + "string", + "similarity", + "distance", + "levenshtein", + "damerau-levenshtein", + "jaro", + "jaro-winkler", + "lcs", + "smith-waterman", + "needleman-wunsch", + "cosine", + "jaccard", + "monge-elkan", + "text", + "fuzzy", + "comparison", + "algorithm", + "multibyte" + ], + "autoload": { + "psr-4": { + "Edgaras\\StrSim\\": "src/" } + }, + "autoload-dev": { + "psr-4": { + "Edgaras\\StrSim\\Tests\\": "tests/" + } + }, + "minimum-stability": "stable", + "require": { + "php": ">=8.3.0" + }, + "require-dev": { + "phpunit/phpunit": "^11.5", + "phpstan/phpstan": "^2.1", + "rector/rector": "^2.0", + "laravel/pint": "^1.19" + }, + "scripts": { + "refacto": "rector", + "lint": "pint", + "test:refacto": "rector --dry-run", + "test:lint": "pint --test", + "test:types": "phpstan analyse --ansi", + "test:unit": "phpunit", + "test": [ + "@test:refacto", + "@test:lint", + "@test:types", + "@test:unit" + ] + } } diff --git a/phpstan.neon b/phpstan.neon new file mode 100644 index 0000000..5fd25fc --- /dev/null +++ b/phpstan.neon @@ -0,0 +1,6 @@ +parameters: + level: max + paths: + - src + + reportUnmatchedIgnoredErrors: true diff --git a/phpunit.xml b/phpunit.xml new file mode 100644 index 0000000..3a09eef --- /dev/null +++ b/phpunit.xml @@ -0,0 +1,31 @@ + + + + + tests + + + + + src + + + + + + + + diff --git a/pint.json b/pint.json new file mode 100644 index 0000000..4235698 --- /dev/null +++ b/pint.json @@ -0,0 +1,6 @@ +{ + "preset": "psr12", + "rules": { + "declare_strict_types": true + } +} \ No newline at end of file diff --git a/rector.php b/rector.php new file mode 100644 index 0000000..e75954e --- /dev/null +++ b/rector.php @@ -0,0 +1,18 @@ +withPaths([ + __DIR__.'/src', + __DIR__.'/tests', + ]) + ->withPreparedSets( + deadCode: true, + codeQuality: true, + typeDeclarations: true, + privatization: true, + earlyReturn: true, + ); diff --git a/src/Cosine.php b/src/Cosine.php index edf1ab2..eeff1a4 100644 --- a/src/Cosine.php +++ b/src/Cosine.php @@ -1,43 +1,57 @@ $v) { - $dot += $v * ($tokensB[$k] ?? 0); - $normA += $v * $v; + $dot += (float)$v * (float)($tokensB[$k] ?? 0); + $normA += (float)$v * (float)$v; } foreach ($tokensB as $v) { - $normB += $v * $v; + $normB += (float)$v * (float)$v; } return ($normA && $normB) ? $dot / (sqrt($normA) * sqrt($normB)) : 0; } - private static function countMbChars(string $str): array { + /** + * @return array + */ + private static function countMbChars(string $str): array + { $chars = []; $length = mb_strlen($str, 'UTF-8'); - + for ($i = 0; $i < $length; $i++) { $char = mb_substr($str, $i, 1, 'UTF-8'); $chars[$char] = ($chars[$char] ?? 0) + 1; } - + return $chars; } - public static function similarityFromVectors(array $vecA, array $vecB): float { + /** + * @param array $vecA + * @param array $vecB + */ + public static function similarityFromVectors(array $vecA, array $vecB): float + { if (count($vecA) !== count($vecB)) { throw new \InvalidArgumentException("Vectors must be the same length."); } @@ -48,9 +62,9 @@ public static function similarityFromVectors(array $vecA, array $vecB): float { foreach ($vecA as $i => $valA) { $valB = $vecB[$i]; - $dot += $valA * $valB; - $normA += $valA * $valA; - $normB += $valB * $valB; + $dot += (float)$valA * (float)$valB; + $normA += (float)$valA * (float)$valA; + $normB += (float)$valB * (float)$valB; } return ($normA && $normB) ? $dot / (sqrt($normA) * sqrt($normB)) : 0.0; diff --git a/src/DamerauLevenshtein.php b/src/DamerauLevenshtein.php index 18accfe..c6e4b5c 100644 --- a/src/DamerauLevenshtein.php +++ b/src/DamerauLevenshtein.php @@ -1,31 +1,40 @@ 1 && $j > 1) { $charA2 = mb_substr($a, $i - 2, 1, 'UTF-8'); $charB2 = mb_substr($b, $j - 2, 1, 'UTF-8'); diff --git a/src/Hamming.php b/src/Hamming.php index 5282090..86a03e1 100644 --- a/src/Hamming.php +++ b/src/Hamming.php @@ -1,11 +1,16 @@ + */ + private static function mbStrSplit(string $str): array + { $chars = []; $length = mb_strlen($str, 'UTF-8'); - + for ($i = 0; $i < $length; $i++) { $chars[] = mb_substr($str, $i, 1, 'UTF-8'); } - + return $chars; } } diff --git a/src/Jaro.php b/src/Jaro.php index edcf1d3..ce7669d 100644 --- a/src/Jaro.php +++ b/src/Jaro.php @@ -1,16 +1,22 @@ > */ $dp = []; for ($i = 0; $i <= $m; $i++) { diff --git a/src/Levenshtein.php b/src/Levenshtein.php index c5e86d9..340361f 100644 --- a/src/Levenshtein.php +++ b/src/Levenshtein.php @@ -1,29 +1,42 @@ assertEqualsWithDelta(1.0, Cosine::similarity("abc", "abc"), 1e-10); } - public function testCompletelyDifferentStrings() + public function testCompletelyDifferentStrings(): void { $this->assertEquals(0.0, Cosine::similarity("abc", "xyz")); } - public function testPartiallySimilarStrings() + public function testPartiallySimilarStrings(): void { $a = "night"; $b = "nacht"; @@ -26,34 +30,34 @@ public function testPartiallySimilarStrings() $this->assertLessThan(1, $result); } - public function testEmptyStrings() + public function testEmptyStrings(): void { $this->assertEquals(0.0, Cosine::similarity("", "")); } - public function testOneEmptyString() + public function testOneEmptyString(): void { $this->assertEquals(0.0, Cosine::similarity("abc", "")); $this->assertEquals(0.0, Cosine::similarity("", "abc")); } - - public function testIdenticalVectors() + + public function testIdenticalVectors(): void { $this->assertEqualsWithDelta(1.0, Cosine::similarityFromVectors([1, 2, 3], [1, 2, 3]), 1e-10); } - public function testCompletelyOppositeVectors() + public function testCompletelyOppositeVectors(): void { $this->assertEqualsWithDelta(-1.0, Cosine::similarityFromVectors([1, 0], [-1, 0]), 1e-10); } - public function testOrthogonalVectors() + public function testOrthogonalVectors(): void { $this->assertEqualsWithDelta(0.0, Cosine::similarityFromVectors([1, 0], [0, 1]), 1e-10); } - public function testPartiallyAlignedVectors() + public function testPartiallyAlignedVectors(): void { $a = [1, 1]; $b = [1, 0]; @@ -62,101 +66,101 @@ public function testPartiallyAlignedVectors() $this->assertLessThan(1, $similarity); } - public function testMismatchedVectorLengths() + public function testMismatchedVectorLengths(): void { $this->expectException(\InvalidArgumentException::class); $this->expectExceptionMessage("Vectors must be the same length."); Cosine::similarityFromVectors([1, 2, 3], [1, 2]); } - public function testZeroVectors() + public function testZeroVectors(): void { $this->assertEquals(0.0, Cosine::similarityFromVectors([0, 0], [0, 0])); } - public function testMultibyteIdenticalStrings() + public function testMultibyteIdenticalStrings(): void { $this->assertEqualsWithDelta(1.0, Cosine::similarity("café", "café"), 1e-10); $this->assertEqualsWithDelta(1.0, Cosine::similarity("🚀🌟", "🚀🌟"), 1e-10); } - public function testMultibyteDifferentStrings() + public function testMultibyteDifferentStrings(): void { $this->assertEquals(0.0, Cosine::similarity("café", "🚀🌟")); } - public function testMultibytePartialSimilarity() + public function testMultibytePartialSimilarity(): void { $result = Cosine::similarity("café", "caffé"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testJapaneseCharacters() + public function testJapaneseCharacters(): void { $result = Cosine::similarity("こんにちは", "こんにちわ"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testEmojiSimilarity() + public function testEmojiSimilarity(): void { $result = Cosine::similarity("🚀🌟", "🚀⭐"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testMixedAsciiMultibyte() + public function testMixedAsciiMultibyte(): void { $result = Cosine::similarity("hello café", "hello cafe"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testCyrillicCharacters() + public function testCyrillicCharacters(): void { $result = Cosine::similarity("собака", "собаки"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testMultibyteWithEmptyString() + public function testMultibyteWithEmptyString(): void { $this->assertEquals(0.0, Cosine::similarity("café", "")); $this->assertEquals(0.0, Cosine::similarity("", "🚀🌟")); } - public function testSingleMultibyteCharacter() + public function testSingleMultibyteCharacter(): void { $this->assertEqualsWithDelta(1.0, Cosine::similarity("é", "é"), 1e-10); $this->assertEquals(0.0, Cosine::similarity("é", "ö")); $this->assertEqualsWithDelta(1.0, Cosine::similarity("🚀", "🚀"), 1e-10); } - public function testNormalizationCombiningMarks() + public function testNormalizationCombiningMarks(): void { $this->assertEqualsWithDelta(0.0, Cosine::similarity("é", "\u{0065}\u{0301}"), 1e-10); } - public function testZWJFamilyEmojiCosine() + public function testZWJFamilyEmojiCosine(): void { $this->assertEqualsWithDelta(2 / sqrt(13), Cosine::similarity("👨‍👩‍👧‍👦", "👨👩👧👦"), 1e-9); } - public function testNFKCCompatibilityNormalization() + public function testNFKCCompatibilityNormalization(): void { $this->assertEquals(0.0, Cosine::similarity("①", "1")); $this->assertEquals(0.0, Cosine::similarity("ff", "ff")); $this->assertEquals(0.0, Cosine::similarity("Å", "Å")); } - public function testNFKDDecomposedNormalization() + public function testNFKDDecomposedNormalization(): void { $this->assertEquals(0.0, Cosine::similarity("Å", "A\u{030A}")); $this->assertEquals(0.0, Cosine::similarity("ñ", "n\u{0303}")); } - public function testInvalidUtf8Input() + public function testInvalidUtf8Input(): void { $this->expectException(\InvalidArgumentException::class); $this->expectExceptionMessage("Input strings must be valid UTF-8."); diff --git a/tests/DamerauLevenshteinTest.php b/tests/DamerauLevenshteinTest.php index 5fa15c1..863b4a6 100644 --- a/tests/DamerauLevenshteinTest.php +++ b/tests/DamerauLevenshteinTest.php @@ -1,134 +1,138 @@ assertSame(0, DamerauLevenshtein::distance("test", "test")); } - public function testInsertion() + public function testInsertion(): void { $this->assertSame(1, DamerauLevenshtein::distance("test", "tests")); } - public function testDeletion() + public function testDeletion(): void { $this->assertSame(1, DamerauLevenshtein::distance("tests", "test")); } - public function testSubstitution() + public function testSubstitution(): void { $this->assertSame(1, DamerauLevenshtein::distance("test", "tent")); } - public function testTransposition() + public function testTransposition(): void { $this->assertSame(1, DamerauLevenshtein::distance("ab", "ba")); } - public function testComplexCase() + public function testComplexCase(): void { $this->assertSame(3, DamerauLevenshtein::distance("ca", "abc")); } - public function testEmptyToNonEmpty() + public function testEmptyToNonEmpty(): void { $this->assertSame(4, DamerauLevenshtein::distance("", "test")); } - public function testNonEmptyToEmpty() + public function testNonEmptyToEmpty(): void { $this->assertSame(4, DamerauLevenshtein::distance("test", "")); } - public function testBothEmpty() + public function testBothEmpty(): void { $this->assertSame(0, DamerauLevenshtein::distance("", "")); } - public function testMultibyteIdenticalStrings() + public function testMultibyteIdenticalStrings(): void { $this->assertSame(0, DamerauLevenshtein::distance("café", "café")); $this->assertSame(0, DamerauLevenshtein::distance("🚀🌟", "🚀🌟")); } - public function testMultibyteInsertion() + public function testMultibyteInsertion(): void { $this->assertSame(1, DamerauLevenshtein::distance("café", "caffé")); $this->assertSame(1, DamerauLevenshtein::distance("🚀", "🚀🌟")); } - public function testMultibyteDeletion() + public function testMultibyteDeletion(): void { $this->assertSame(1, DamerauLevenshtein::distance("caffé", "café")); $this->assertSame(1, DamerauLevenshtein::distance("🚀🌟", "🚀")); } - public function testMultibyteSubstitution() + public function testMultibyteSubstitution(): void { $this->assertSame(1, DamerauLevenshtein::distance("café", "cafe")); $this->assertSame(1, DamerauLevenshtein::distance("🚀🌟", "🚀⭐")); } - public function testMultibyteTransposition() + public function testMultibyteTransposition(): void { $this->assertSame(1, DamerauLevenshtein::distance("éö", "öé")); $this->assertSame(1, DamerauLevenshtein::distance("🚀🌟", "🌟🚀")); } - public function testJapaneseCharacters() + public function testJapaneseCharacters(): void { $this->assertSame(1, DamerauLevenshtein::distance("こんにちは", "こんにちわ")); $this->assertSame(1, DamerauLevenshtein::distance("あい", "いあ")); } - public function testCyrillicCharacters() + public function testCyrillicCharacters(): void { $this->assertSame(0, DamerauLevenshtein::distance("собака", "собака")); } - public function testHebrewCharacters() + public function testHebrewCharacters(): void { $this->assertSame(1, DamerauLevenshtein::distance("עברית", "עבדית")); } - public function testMixedAsciiMultibyte() + public function testMixedAsciiMultibyte(): void { $this->assertSame(1, DamerauLevenshtein::distance("hello café", "hello cafe")); $this->assertSame(1, DamerauLevenshtein::distance("test 🚀", "test 🌟")); } - public function testMultibyteCompletelyDifferent() + public function testMultibyteCompletelyDifferent(): void { $this->assertSame(4, DamerauLevenshtein::distance("café", "🚀🌟⭐")); } - public function testMultibyteEmptyToNonEmpty() + public function testMultibyteEmptyToNonEmpty(): void { $this->assertSame(4, DamerauLevenshtein::distance("", "café")); $this->assertSame(2, DamerauLevenshtein::distance("", "🚀🌟")); } - public function testMultibyteNonEmptyToEmpty() + public function testMultibyteNonEmptyToEmpty(): void { $this->assertSame(4, DamerauLevenshtein::distance("café", "")); $this->assertSame(2, DamerauLevenshtein::distance("🚀🌟", "")); } - public function testSingleMultibyteCharacter() + public function testSingleMultibyteCharacter(): void { $this->assertSame(0, DamerauLevenshtein::distance("é", "é")); $this->assertSame(1, DamerauLevenshtein::distance("é", "ö")); $this->assertSame(0, DamerauLevenshtein::distance("🚀", "🚀")); } - public function testComplexMultibyteTranspositions() + public function testComplexMultibyteTranspositions(): void { $this->assertSame(3, DamerauLevenshtein::distance("éöü", "üé")); } diff --git a/tests/HammingTest.php b/tests/HammingTest.php index a481390..91fa5ae 100644 --- a/tests/HammingTest.php +++ b/tests/HammingTest.php @@ -1,109 +1,113 @@ assertSame(0, Hamming::distance("karolin", "karolin")); } - public function testDifferentStrings() + public function testDifferentStrings(): void { $this->assertSame(3, Hamming::distance("karolin", "kathrin")); $this->assertSame(1, Hamming::distance("1011101", "1001101")); $this->assertSame(2, Hamming::distance("2173896", "2174890")); } - public function testEmptyStrings() + public function testEmptyStrings(): void { $this->assertSame(0, Hamming::distance("", "")); } - public function testThrowsOnUnequalLength() + public function testThrowsOnUnequalLength(): void { $this->expectException(\InvalidArgumentException::class); $this->expectExceptionMessage("Strings must be of equal length."); Hamming::distance("abc", "ab"); } - public function testThrowsOnOneEmptyOneNot() + public function testThrowsOnOneEmptyOneNot(): void { $this->expectException(\InvalidArgumentException::class); Hamming::distance("", "a"); } - public function testMultibyteIdenticalStrings() + public function testMultibyteIdenticalStrings(): void { $this->assertSame(0, Hamming::distance("café", "café")); $this->assertSame(0, Hamming::distance("🚀🌟", "🚀🌟")); } - public function testMultibyteDifferentStrings() + public function testMultibyteDifferentStrings(): void { $this->assertSame(2, Hamming::distance("café", "case")); $this->assertSame(2, Hamming::distance("café", "casa")); } - public function testEmojiDistance() + public function testEmojiDistance(): void { $this->assertSame(1, Hamming::distance("🚀🌟", "🚀⭐")); $this->assertSame(2, Hamming::distance("🚀🌟", "⭐🌙")); } - public function testJapaneseCharacters() + public function testJapaneseCharacters(): void { $this->assertSame(1, Hamming::distance("こんにちは", "こんにちわ")); $this->assertSame(0, Hamming::distance("こんにちは", "こんにちは")); } - public function testCyrillicCharacters() + public function testCyrillicCharacters(): void { $this->assertSame(4, Hamming::distance("собака", "корова")); } - public function testHebrewCharacters() + public function testHebrewCharacters(): void { $this->assertSame(1, Hamming::distance("עברית", "עבדית")); } - public function testMixedAsciiMultibyte() + public function testMixedAsciiMultibyte(): void { $this->assertSame(1, Hamming::distance("test é", "test e")); } - public function testSingleMultibyteCharacter() + public function testSingleMultibyteCharacter(): void { $this->assertSame(0, Hamming::distance("é", "é")); $this->assertSame(1, Hamming::distance("é", "ö")); $this->assertSame(0, Hamming::distance("🚀", "🚀")); } - public function testMultibyteUnequalLength() + public function testMultibyteUnequalLength(): void { $this->expectException(\InvalidArgumentException::class); $this->expectExceptionMessage("Strings must be of equal length."); Hamming::distance("café", "ca"); } - public function testMultibyteUnequalLengthEmoji() + public function testMultibyteUnequalLengthEmoji(): void { $this->expectException(\InvalidArgumentException::class); Hamming::distance("🚀🌟", "🚀"); } - public function testGraphemeSkinToneUnequalLength() + public function testGraphemeSkinToneUnequalLength(): void { $this->expectException(\InvalidArgumentException::class); $this->expectExceptionMessage("Strings must be of equal length."); Hamming::distance("👍", "👍🏽"); } - public function testFlagRegionalIndicators() + public function testFlagRegionalIndicators(): void { $this->assertSame(1, Hamming::distance("🇺🇸", "🇺🇳")); } diff --git a/tests/JaccardTest.php b/tests/JaccardTest.php index f4fa32c..91571bb 100644 --- a/tests/JaccardTest.php +++ b/tests/JaccardTest.php @@ -1,135 +1,139 @@ assertEqualsWithDelta(1.0, Jaccard::index("abc", "abc"), 1e-10); } - public function testCompletelyDifferentStrings() + public function testCompletelyDifferentStrings(): void { $this->assertEqualsWithDelta(0.0, Jaccard::index("abc", "xyz"), 1e-10); } - public function testPartiallyOverlappingStrings() + public function testPartiallyOverlappingStrings(): void { $this->assertEqualsWithDelta(0.5, Jaccard::index("abc", "bcd"), 1e-10); } - public function testEmptyStrings() + public function testEmptyStrings(): void { $this->assertEqualsWithDelta(0.0, Jaccard::index("", ""), 1e-10); } - public function testOneEmptyString() + public function testOneEmptyString(): void { $this->assertEqualsWithDelta(0.0, Jaccard::index("abc", ""), 1e-10); $this->assertEqualsWithDelta(0.0, Jaccard::index("", "xyz"), 1e-10); } - public function testRepeatedCharacters() + public function testRepeatedCharacters(): void { $this->assertEqualsWithDelta(1.0, Jaccard::index("aaaa", "a"), 1e-10); } - public function testMultibyteIdenticalStrings() + public function testMultibyteIdenticalStrings(): void { $this->assertEqualsWithDelta(1.0, Jaccard::index("café", "café"), 1e-10); $this->assertEqualsWithDelta(1.0, Jaccard::index("🚀🌟", "🚀🌟"), 1e-10); } - public function testMultibyteDifferentStrings() + public function testMultibyteDifferentStrings(): void { $this->assertEqualsWithDelta(0.0, Jaccard::index("café", "🚀🌟"), 1e-10); } - public function testMultibytePartialOverlap() + public function testMultibytePartialOverlap(): void { $result = Jaccard::index("café", "caffé"); $this->assertEqualsWithDelta(1.0, $result, 1e-10); } - public function testJapaneseCharacters() + public function testJapaneseCharacters(): void { $result = Jaccard::index("こんにちは", "こんにちわ"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testEmojiSupport() + public function testEmojiSupport(): void { $this->assertEqualsWithDelta(0.3333333333333333, Jaccard::index("🚀🌟", "🚀⭐"), 1e-10); } - public function testCyrillicCharacters() + public function testCyrillicCharacters(): void { $result = Jaccard::index("собака", "собаки"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testHebrewCharacters() + public function testHebrewCharacters(): void { $result = Jaccard::index("עברית", "עבדית"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testMixedAsciiMultibyte() + public function testMixedAsciiMultibyte(): void { $result = Jaccard::index("hello café", "hello cafe"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testMultibyteWithEmptyString() + public function testMultibyteWithEmptyString(): void { $this->assertEqualsWithDelta(0.0, Jaccard::index("café", ""), 1e-10); $this->assertEqualsWithDelta(0.0, Jaccard::index("", "🚀🌟"), 1e-10); } - public function testSingleMultibyteCharacter() + public function testSingleMultibyteCharacter(): void { $this->assertEqualsWithDelta(1.0, Jaccard::index("é", "é"), 1e-10); $this->assertEqualsWithDelta(0.0, Jaccard::index("é", "ö"), 1e-10); $this->assertEqualsWithDelta(1.0, Jaccard::index("🚀", "🚀"), 1e-10); } - public function testMultibyteRepeatedCharacters() + public function testMultibyteRepeatedCharacters(): void { $this->assertEqualsWithDelta(1.0, Jaccard::index("éééé", "é"), 1e-10); } - public function testNormalizationCombiningMarks() + public function testNormalizationCombiningMarks(): void { $this->assertEqualsWithDelta(0.0, Jaccard::index("é", "\u{0065}\u{0301}"), 1e-10); } - public function testZWJFamilyEmojiSetOverlap() + public function testZWJFamilyEmojiSetOverlap(): void { $this->assertEqualsWithDelta(0.8, Jaccard::index("👨‍👩‍👧‍👦", "👨👩👧👦"), 1e-10); } - public function testNFKCCompatibilityNormalization() + public function testNFKCCompatibilityNormalization(): void { $this->assertEqualsWithDelta(0.0, Jaccard::index("①", "1"), 1e-10); $this->assertEqualsWithDelta(0.0, Jaccard::index("ff", "ff"), 1e-10); $this->assertEqualsWithDelta(0.0, Jaccard::index("Å", "Å"), 1e-10); } - public function testNFKDDecomposedNormalization() + public function testNFKDDecomposedNormalization(): void { $this->assertEqualsWithDelta(0.0, Jaccard::index("Å", "A\u{030A}"), 1e-10); $this->assertEqualsWithDelta(0.0, Jaccard::index("ñ", "n\u{0303}"), 1e-10); } - public function testInvalidUtf8Input() + public function testInvalidUtf8Input(): void { $this->expectException(\InvalidArgumentException::class); $this->expectExceptionMessage("Input strings must be valid UTF-8."); diff --git a/tests/JaroTest.php b/tests/JaroTest.php index fc63685..ca7fa8c 100644 --- a/tests/JaroTest.php +++ b/tests/JaroTest.php @@ -1,124 +1,127 @@ assertEqualsWithDelta(1.0, Jaro::similarity("martha", "martha"), 1e-10); } - public function testSimilarityCompletelyDifferentStrings() + public function testSimilarityCompletelyDifferentStrings(): void { $this->assertEqualsWithDelta(0.0, Jaro::similarity("abc", "xyz"), 1e-10); } - public function testSimilarityPartialMatch() + public function testSimilarityPartialMatch(): void { $expected = 0.9444444444; $actual = Jaro::similarity("martha", "marhta"); $this->assertEqualsWithDelta($expected, $actual, 1e-6); } - public function testSimilarityCaseWithSomeOverlap() + public function testSimilarityCaseWithSomeOverlap(): void { $expected = 0.8222222222; $actual = Jaro::similarity("dwayne", "duane"); $this->assertEqualsWithDelta($expected, $actual, 1e-6); } - public function testSimilarityEmptyStrings() + public function testSimilarityEmptyStrings(): void { $this->assertEqualsWithDelta(1.0, Jaro::similarity("", ""), 1e-10); } - public function testSimilarityOneEmptyString() + public function testSimilarityOneEmptyString(): void { $this->assertEqualsWithDelta(0.0, Jaro::similarity("abc", ""), 1e-10); $this->assertEqualsWithDelta(0.0, Jaro::similarity("", "abc"), 1e-10); } - public function testSimilaritySingleCharacterMismatch() + public function testSimilaritySingleCharacterMismatch(): void { $this->assertEqualsWithDelta(0.0, Jaro::similarity("a", "b"), 1e-10); } - public function testSimilaritySingleCharacterMatch() + public function testSimilaritySingleCharacterMatch(): void { $this->assertEqualsWithDelta(1.0, Jaro::similarity("a", "a"), 1e-10); } - - public function testDistanceIdenticalStrings() + + public function testDistanceIdenticalStrings(): void { $this->assertEqualsWithDelta(0.0, Jaro::distance("martha", "martha"), 1e-10); } - public function testDistanceCompletelyDifferentStrings() + public function testDistanceCompletelyDifferentStrings(): void { $this->assertEqualsWithDelta(1.0, Jaro::distance("abc", "xyz"), 1e-10); } - public function testDistancePartialMatch() + public function testDistancePartialMatch(): void { $expected = 1.0 - 0.9444444444; $actual = Jaro::distance("martha", "marhta"); $this->assertEqualsWithDelta($expected, $actual, 1e-6); } - public function testDistanceCaseWithSomeOverlap() + public function testDistanceCaseWithSomeOverlap(): void { $expected = 1.0 - 0.8222222222; $actual = Jaro::distance("dwayne", "duane"); $this->assertEqualsWithDelta($expected, $actual, 1e-6); } - public function testDistanceEmptyStrings() + public function testDistanceEmptyStrings(): void { $this->assertEqualsWithDelta(0.0, Jaro::distance("", ""), 1e-10); } - public function testDistanceOneEmptyString() + public function testDistanceOneEmptyString(): void { $this->assertEqualsWithDelta(1.0, Jaro::distance("abc", ""), 1e-10); $this->assertEqualsWithDelta(1.0, Jaro::distance("", "abc"), 1e-10); } - public function testDistanceSingleCharacterMismatch() + public function testDistanceSingleCharacterMismatch(): void { $this->assertEqualsWithDelta(1.0, Jaro::distance("a", "b"), 1e-10); } - public function testDistanceSingleCharacterMatch() + public function testDistanceSingleCharacterMatch(): void { $this->assertEqualsWithDelta(0.0, Jaro::distance("a", "a"), 1e-10); } - - public function testSimilarityMultibyteIdenticalStrings() + + public function testSimilarityMultibyteIdenticalStrings(): void { $this->assertEqualsWithDelta(1.0, Jaro::similarity("café", "café"), 1e-10); $this->assertEqualsWithDelta(1.0, Jaro::similarity("🚀🌟", "🚀🌟"), 1e-10); } - public function testSimilarityMultibytePartialMatch() + public function testSimilarityMultibytePartialMatch(): void { $result = Jaro::similarity("café", "caffé"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testSimilarityJapaneseCharacters() + public function testSimilarityJapaneseCharacters(): void { $result = Jaro::similarity("こんにちは", "こんにちわ"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testSimilarityEmojiSupport() + public function testSimilarityEmojiSupport(): void { $this->assertEqualsWithDelta(1.0, Jaro::similarity("🚀", "🚀"), 1e-10); $result = Jaro::similarity("🚀🌟", "🚀⭐"); @@ -126,66 +129,66 @@ public function testSimilarityEmojiSupport() $this->assertLessThan(1, $result); } - public function testSimilarityCyrillicCharacters() + public function testSimilarityCyrillicCharacters(): void { $result = Jaro::similarity("собака", "собаки"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testSimilarityHebrewCharacters() + public function testSimilarityHebrewCharacters(): void { $result = Jaro::similarity("עברית", "עבדית"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testSimilarityMixedAsciiMultibyte() + public function testSimilarityMixedAsciiMultibyte(): void { $result = Jaro::similarity("hello café", "hello cafe"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testSimilarityMultibyteCompletelyDifferent() + public function testSimilarityMultibyteCompletelyDifferent(): void { $this->assertEqualsWithDelta(0.0, Jaro::similarity("café", "🚀🌟"), 1e-10); } - public function testSimilarityMultibyteWithEmptyString() + public function testSimilarityMultibyteWithEmptyString(): void { $this->assertEqualsWithDelta(0.0, Jaro::similarity("café", ""), 1e-10); $this->assertEqualsWithDelta(0.0, Jaro::similarity("🚀🌟", ""), 1e-10); } - public function testSimilaritySingleMultibyteCharacter() + public function testSimilaritySingleMultibyteCharacter(): void { $this->assertEqualsWithDelta(1.0, Jaro::similarity("é", "é"), 1e-10); $this->assertEqualsWithDelta(0.0, Jaro::similarity("é", "ö"), 1e-10); $this->assertEqualsWithDelta(1.0, Jaro::similarity("🚀", "🚀"), 1e-10); } - - public function testDistanceMultibyteIdenticalStrings() + + public function testDistanceMultibyteIdenticalStrings(): void { $this->assertEqualsWithDelta(0.0, Jaro::distance("café", "café"), 1e-10); $this->assertEqualsWithDelta(0.0, Jaro::distance("🚀🌟", "🚀🌟"), 1e-10); } - public function testDistanceMultibytePartialMatch() + public function testDistanceMultibytePartialMatch(): void { $result = Jaro::distance("café", "caffé"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testDistanceJapaneseCharacters() + public function testDistanceJapaneseCharacters(): void { $result = Jaro::distance("こんにちは", "こんにちわ"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testDistanceEmojiSupport() + public function testDistanceEmojiSupport(): void { $this->assertEqualsWithDelta(0.0, Jaro::distance("🚀", "🚀"), 1e-10); $result = Jaro::distance("🚀🌟", "🚀⭐"); @@ -193,46 +196,46 @@ public function testDistanceEmojiSupport() $this->assertLessThan(1, $result); } - public function testDistanceCyrillicCharacters() + public function testDistanceCyrillicCharacters(): void { $result = Jaro::distance("собака", "собаки"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testDistanceHebrewCharacters() + public function testDistanceHebrewCharacters(): void { $result = Jaro::distance("עברית", "עבדית"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testDistanceMixedAsciiMultibyte() + public function testDistanceMixedAsciiMultibyte(): void { $result = Jaro::distance("hello café", "hello cafe"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testDistanceMultibyteCompletelyDifferent() + public function testDistanceMultibyteCompletelyDifferent(): void { $this->assertEqualsWithDelta(1.0, Jaro::distance("café", "🚀🌟"), 1e-10); } - public function testDistanceMultibyteWithEmptyString() + public function testDistanceMultibyteWithEmptyString(): void { $this->assertEqualsWithDelta(1.0, Jaro::distance("café", ""), 1e-10); $this->assertEqualsWithDelta(1.0, Jaro::distance("🚀🌟", ""), 1e-10); } - public function testDistanceSingleMultibyteCharacter() + public function testDistanceSingleMultibyteCharacter(): void { $this->assertEqualsWithDelta(0.0, Jaro::distance("é", "é"), 1e-10); $this->assertEqualsWithDelta(1.0, Jaro::distance("é", "ö"), 1e-10); $this->assertEqualsWithDelta(0.0, Jaro::distance("🚀", "🚀"), 1e-10); } - public function testDistanceWindowBoundaryCases() + public function testDistanceWindowBoundaryCases(): void { $this->assertEqualsWithDelta(1.0, Jaro::distance("a", "b"), 1e-10); $this->assertEqualsWithDelta(0.0, Jaro::distance("a", "a"), 1e-10); @@ -241,7 +244,7 @@ public function testDistanceWindowBoundaryCases() $this->assertEqualsWithDelta(1.0, Jaro::distance("abcd", "efgh"), 1e-10); } - public function testDistanceExactJaroValues() + public function testDistanceExactJaroValues(): void { $this->assertEqualsWithDelta(1.0, Jaro::distance("abc", "xyz"), 1e-10); $this->assertEqualsWithDelta(1.0, Jaro::distance("a", "b"), 1e-10); @@ -251,7 +254,7 @@ public function testDistanceExactJaroValues() $this->assertLessThan(1.0, $result); } - public function testInvalidUtf8InputDistance() + public function testInvalidUtf8InputDistance(): void { $this->expectException(\InvalidArgumentException::class); $this->expectExceptionMessage("Input strings must be valid UTF-8."); @@ -259,11 +262,11 @@ public function testInvalidUtf8InputDistance() Jaro::distance($invalid, "test"); } - public function testInvalidUtf8InputSimilarity() + public function testInvalidUtf8InputSimilarity(): void { $this->expectException(\InvalidArgumentException::class); $this->expectExceptionMessage("Input strings must be valid UTF-8."); $invalid = "\xFF\xFF"; Jaro::similarity($invalid, "test"); } -} \ No newline at end of file +} diff --git a/tests/JaroWinklerTest.php b/tests/JaroWinklerTest.php index bdb40fa..97cdd30 100644 --- a/tests/JaroWinklerTest.php +++ b/tests/JaroWinklerTest.php @@ -1,44 +1,50 @@ assertEqualsWithDelta(1.0, JaroWinkler::similarity("martha", "martha"), 1e-10); } - public function testSimilarityCompletelyDifferentStrings() + public function testSimilarityCompletelyDifferentStrings(): void { $this->assertEqualsWithDelta(0.0, JaroWinkler::similarity("abc", "xyz"), 1e-10); } - public function testSimilarityKnownPairMARTHAvsMARHTA() + public function testSimilarityKnownPairMARTHAvsMARHTA(): void { - $jaro = Jaro::similarity("martha", "marhta"); + $jaro = Jaro::similarity("martha", "marhta"); $prefix = 3; $expected = $jaro + $prefix * 0.1 * (1 - $jaro); $actual = JaroWinkler::similarity("martha", "marhta"); $this->assertEqualsWithDelta($expected, $actual, 1e-6); } - public function testSimilarityPrefixLimit() - { + public function testSimilarityPrefixLimit(): void + { $a = "prefix_match_1"; $b = "prefix_match_2"; $jaro = Jaro::similarity($a, $b); - $expected = $jaro + 4 * 0.1 * (1 - $jaro); + $expected = $jaro + 4 * 0.1 * (1 - $jaro); $actual = JaroWinkler::similarity($a, $b); $this->assertEqualsWithDelta($expected, $actual, 1e-6); } - public function testSimilarityNoCommonPrefix() + public function testSimilarityNoCommonPrefix(): void { $a = "xxxxx"; $b = "yyyyy"; @@ -47,58 +53,58 @@ public function testSimilarityNoCommonPrefix() $this->assertEqualsWithDelta($expected, JaroWinkler::similarity($a, $b), 1e-10); } - public function testSimilarityEmptyStrings() + public function testSimilarityEmptyStrings(): void { $this->assertEqualsWithDelta(1.0, JaroWinkler::similarity("", ""), 1e-10); } - public function testSimilarityOneEmptyString() + public function testSimilarityOneEmptyString(): void { $this->assertEqualsWithDelta(0.0, JaroWinkler::similarity("abc", ""), 1e-10); $this->assertEqualsWithDelta(0.0, JaroWinkler::similarity("", "xyz"), 1e-10); } - public function testSimilaritySingleCharMatch() + public function testSimilaritySingleCharMatch(): void { $this->assertEqualsWithDelta(1.0, JaroWinkler::similarity("a", "a"), 1e-10); } - public function testSimilaritySingleCharMismatch() + public function testSimilaritySingleCharMismatch(): void { $this->assertEqualsWithDelta(0.0, JaroWinkler::similarity("a", "b"), 1e-10); } - - public function testDistanceIdenticalStrings() + + public function testDistanceIdenticalStrings(): void { $this->assertEqualsWithDelta(0.0, JaroWinkler::distance("martha", "martha"), 1e-10); } - public function testDistanceCompletelyDifferentStrings() + public function testDistanceCompletelyDifferentStrings(): void { $this->assertEqualsWithDelta(1.0, JaroWinkler::distance("abc", "xyz"), 1e-10); } - public function testDistanceKnownPairMARTHAvsMARHTA() + public function testDistanceKnownPairMARTHAvsMARHTA(): void { - $jaro = Jaro::similarity("martha", "marhta"); + $jaro = Jaro::similarity("martha", "marhta"); $prefix = 3; $expected = 1.0 - ($jaro + $prefix * 0.1 * (1 - $jaro)); $actual = JaroWinkler::distance("martha", "marhta"); $this->assertEqualsWithDelta($expected, $actual, 1e-6); } - public function testDistancePrefixLimit() - { + public function testDistancePrefixLimit(): void + { $a = "prefix_match_1"; $b = "prefix_match_2"; $jaro = Jaro::similarity($a, $b); - $expected = 1.0 - ($jaro + 4 * 0.1 * (1 - $jaro)); + $expected = 1.0 - ($jaro + 4 * 0.1 * (1 - $jaro)); $actual = JaroWinkler::distance($a, $b); $this->assertEqualsWithDelta($expected, $actual, 1e-6); } - public function testDistanceNoCommonPrefix() + public function testDistanceNoCommonPrefix(): void { $a = "xxxxx"; $b = "yyyyy"; @@ -107,113 +113,113 @@ public function testDistanceNoCommonPrefix() $this->assertEqualsWithDelta($expected, JaroWinkler::distance($a, $b), 1e-10); } - public function testDistanceEmptyStrings() + public function testDistanceEmptyStrings(): void { $this->assertEqualsWithDelta(0.0, JaroWinkler::distance("", ""), 1e-10); } - public function testDistanceOneEmptyString() + public function testDistanceOneEmptyString(): void { $this->assertEqualsWithDelta(1.0, JaroWinkler::distance("abc", ""), 1e-10); $this->assertEqualsWithDelta(1.0, JaroWinkler::distance("", "xyz"), 1e-10); } - public function testDistanceSingleCharMatch() + public function testDistanceSingleCharMatch(): void { $this->assertEqualsWithDelta(0.0, JaroWinkler::distance("a", "a"), 1e-10); } - public function testDistanceSingleCharMismatch() + public function testDistanceSingleCharMismatch(): void { $this->assertEqualsWithDelta(1.0, JaroWinkler::distance("a", "b"), 1e-10); } - - public function testSimilarityMultibyteIdenticalStrings() + + public function testSimilarityMultibyteIdenticalStrings(): void { $this->assertEqualsWithDelta(1.0, JaroWinkler::similarity("café", "café"), 1e-10); $this->assertEqualsWithDelta(1.0, JaroWinkler::similarity("🚀🌟", "🚀🌟"), 1e-10); } - public function testSimilarityMultibytePartialMatch() + public function testSimilarityMultibytePartialMatch(): void { $result = JaroWinkler::similarity("café", "caffé"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testSimilarityJapaneseCharacters() + public function testSimilarityJapaneseCharacters(): void { $result = JaroWinkler::similarity("こんにちは", "こんにちわ"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testSimilarityEmojiSupport() + public function testSimilarityEmojiSupport(): void { $this->assertEqualsWithDelta(1.0, JaroWinkler::similarity("🚀", "🚀"), 1e-10); $this->assertEqualsWithDelta(Jaro::similarity("🚀🌟", "🚀⭐") + 0.1 * (1 - Jaro::similarity("🚀🌟", "🚀⭐")), JaroWinkler::similarity("🚀🌟", "🚀⭐"), 1e-6); } - public function testSimilarityCyrillicCharacters() + public function testSimilarityCyrillicCharacters(): void { $result = JaroWinkler::similarity("собака", "собаки"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testSimilarityHebrewCharacters() + public function testSimilarityHebrewCharacters(): void { $result = JaroWinkler::similarity("עברית", "עבדית"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testSimilarityMixedAsciiMultibyte() + public function testSimilarityMixedAsciiMultibyte(): void { $result = JaroWinkler::similarity("hello café", "hello cafe"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testSimilarityMultibyteCompletelyDifferent() + public function testSimilarityMultibyteCompletelyDifferent(): void { $this->assertEqualsWithDelta(0.0, JaroWinkler::similarity("café", "🚀🌟"), 1e-10); } - public function testSimilarityMultibyteWithEmptyString() + public function testSimilarityMultibyteWithEmptyString(): void { $this->assertEqualsWithDelta(0.0, JaroWinkler::similarity("café", ""), 1e-10); $this->assertEqualsWithDelta(0.0, JaroWinkler::similarity("🚀🌟", ""), 1e-10); } - public function testSimilaritySingleMultibyteCharacter() + public function testSimilaritySingleMultibyteCharacter(): void { $this->assertEqualsWithDelta(1.0, JaroWinkler::similarity("é", "é"), 1e-10); $this->assertEqualsWithDelta(0.0, JaroWinkler::similarity("é", "ö"), 1e-10); $this->assertEqualsWithDelta(1.0, JaroWinkler::similarity("🚀", "🚀"), 1e-10); } - - public function testDistanceMultibyteIdenticalStrings() + + public function testDistanceMultibyteIdenticalStrings(): void { $this->assertEqualsWithDelta(0.0, JaroWinkler::distance("café", "café"), 1e-10); $this->assertEqualsWithDelta(0.0, JaroWinkler::distance("🚀🌟", "🚀🌟"), 1e-10); } - public function testDistanceMultibytePartialMatch() + public function testDistanceMultibytePartialMatch(): void { $result = JaroWinkler::distance("café", "caffé"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testDistanceJapaneseCharacters() + public function testDistanceJapaneseCharacters(): void { $result = JaroWinkler::distance("こんにちは", "こんにちわ"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testDistanceEmojiSupport() + public function testDistanceEmojiSupport(): void { $this->assertEqualsWithDelta(0.0, JaroWinkler::distance("🚀", "🚀"), 1e-10); $jaroSim = Jaro::similarity("🚀🌟", "🚀⭐"); @@ -221,60 +227,60 @@ public function testDistanceEmojiSupport() $this->assertEqualsWithDelta(1.0 - $expectedSim, JaroWinkler::distance("🚀🌟", "🚀⭐"), 1e-6); } - public function testDistanceCyrillicCharacters() + public function testDistanceCyrillicCharacters(): void { $result = JaroWinkler::distance("собака", "собаки"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testDistanceHebrewCharacters() + public function testDistanceHebrewCharacters(): void { $result = JaroWinkler::distance("עברית", "עבדית"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testDistanceMixedAsciiMultibyte() + public function testDistanceMixedAsciiMultibyte(): void { $result = JaroWinkler::distance("hello café", "hello cafe"); $this->assertGreaterThan(0, $result); $this->assertLessThan(1, $result); } - public function testDistanceMultibyteCompletelyDifferent() + public function testDistanceMultibyteCompletelyDifferent(): void { $this->assertEqualsWithDelta(1.0, JaroWinkler::distance("café", "🚀🌟"), 1e-10); } - public function testDistanceMultibyteWithEmptyString() + public function testDistanceMultibyteWithEmptyString(): void { $this->assertEqualsWithDelta(1.0, JaroWinkler::distance("café", ""), 1e-10); $this->assertEqualsWithDelta(1.0, JaroWinkler::distance("🚀🌟", ""), 1e-10); } - public function testDistanceSingleMultibyteCharacter() + public function testDistanceSingleMultibyteCharacter(): void { $this->assertEqualsWithDelta(0.0, JaroWinkler::distance("é", "é"), 1e-10); $this->assertEqualsWithDelta(1.0, JaroWinkler::distance("é", "ö"), 1e-10); $this->assertEqualsWithDelta(0.0, JaroWinkler::distance("🚀", "🚀"), 1e-10); } - public function testSimilarityMultibytePrefix() + public function testSimilarityMultibytePrefix(): void { $withoutPrefix = JaroWinkler::similarity("ébcdef", "éxydef"); $withPrefix = JaroWinkler::similarity("éébcdef", "ééxydef"); $this->assertGreaterThan($withoutPrefix, $withPrefix); } - public function testSimilarityMultibytePrefixScale() + public function testSimilarityMultibytePrefixScale(): void { $defaultScale = JaroWinkler::similarity("café", "cafe"); $customScale = JaroWinkler::similarity("café", "cafe", 0.2); $this->assertNotEquals($defaultScale, $customScale); } - public function testSimilarityPrefixScaleValidation() + public function testSimilarityPrefixScaleValidation(): void { $base = Jaro::similarity("martha", "marhta"); $scale1 = JaroWinkler::similarity("martha", "marhta", 0.1); @@ -286,21 +292,21 @@ public function testSimilarityPrefixScaleValidation() $this->assertGreaterThan($scale1, $scale2); } - public function testDistanceMultibytePrefix() + public function testDistanceMultibytePrefix(): void { $withoutPrefix = JaroWinkler::distance("ébcdef", "éxydef"); $withPrefix = JaroWinkler::distance("éébcdef", "ééxydef"); - $this->assertLessThan($withoutPrefix, $withPrefix); + $this->assertLessThan($withoutPrefix, $withPrefix); } - public function testDistanceMultibytePrefixScale() + public function testDistanceMultibytePrefixScale(): void { $defaultScale = JaroWinkler::distance("café", "cafe"); $customScale = JaroWinkler::distance("café", "cafe", 0.2); $this->assertNotEquals($defaultScale, $customScale); } - public function testDistancePrefixScaleValidation() + public function testDistancePrefixScaleValidation(): void { $base = Jaro::similarity("martha", "marhta"); $scale1 = JaroWinkler::distance("martha", "marhta", 0.1); @@ -309,10 +315,10 @@ public function testDistancePrefixScaleValidation() $expected2 = 1.0 - ($base + 3 * 0.2 * (1 - $base)); $this->assertEqualsWithDelta($expected1, $scale1, 1e-6); $this->assertEqualsWithDelta($expected2, $scale2, 1e-6); - $this->assertLessThan($scale1, $scale2); + $this->assertLessThan($scale1, $scale2); } - public function testDistanceWindowBoundaryCases() + public function testDistanceWindowBoundaryCases(): void { $this->assertEqualsWithDelta(1.0, JaroWinkler::distance("a", "b"), 1e-10); $this->assertEqualsWithDelta(0.0, JaroWinkler::distance("a", "a"), 1e-10); @@ -321,7 +327,7 @@ public function testDistanceWindowBoundaryCases() $this->assertEqualsWithDelta(1.0, $result, 1e-10); } - public function testInvalidUtf8InputDistance() + public function testInvalidUtf8InputDistance(): void { $this->expectException(\InvalidArgumentException::class); $this->expectExceptionMessage("Input strings must be valid UTF-8."); @@ -329,11 +335,11 @@ public function testInvalidUtf8InputDistance() JaroWinkler::distance($invalid, "test"); } - public function testInvalidUtf8InputSimilarity() + public function testInvalidUtf8InputSimilarity(): void { $this->expectException(\InvalidArgumentException::class); $this->expectExceptionMessage("Input strings must be valid UTF-8."); $invalid = "\xFF\xFF"; JaroWinkler::similarity($invalid, "test"); } -} \ No newline at end of file +} diff --git a/tests/LCSTest.php b/tests/LCSTest.php index 16d1606..efbfd70 100644 --- a/tests/LCSTest.php +++ b/tests/LCSTest.php @@ -1,132 +1,136 @@ assertSame(6, LCS::length("abcdef", "abcdef")); } - public function testCompletelyDifferentStrings() + public function testCompletelyDifferentStrings(): void { $this->assertSame(0, LCS::length("abc", "xyz")); } - public function testPartiallyMatchingStrings() + public function testPartiallyMatchingStrings(): void { - $this->assertSame(4, LCS::length("AGGTAB", "GXTXAYB")); + $this->assertSame(4, LCS::length("AGGTAB", "GXTXAYB")); } - public function testReorderedCharacters() + public function testReorderedCharacters(): void { - $this->assertSame(2, LCS::length("abc", "cab")); + $this->assertSame(2, LCS::length("abc", "cab")); } - public function testSingleCharacterMatch() + public function testSingleCharacterMatch(): void { $this->assertSame(1, LCS::length("a", "a")); } - public function testSingleCharacterMismatch() + public function testSingleCharacterMismatch(): void { $this->assertSame(0, LCS::length("a", "b")); } - public function testEmptyStrings() + public function testEmptyStrings(): void { $this->assertSame(0, LCS::length("", "")); } - public function testOneEmptyString() + public function testOneEmptyString(): void { $this->assertSame(0, LCS::length("abc", "")); $this->assertSame(0, LCS::length("", "def")); } - public function testLongerExample() + public function testLongerExample(): void { $a = "ABCBDAB"; $b = "BDCAB"; - $this->assertSame(4, LCS::length($a, $b)); + $this->assertSame(4, LCS::length($a, $b)); } - public function testMultibyteIdenticalStrings() + public function testMultibyteIdenticalStrings(): void { $this->assertSame(4, LCS::length("café", "café")); $this->assertSame(2, LCS::length("🚀🌟", "🚀🌟")); } - public function testMultibyteCompletelyDifferent() + public function testMultibyteCompletelyDifferent(): void { $this->assertSame(0, LCS::length("café", "🚀🌟")); } - public function testMultibytePartialMatch() + public function testMultibytePartialMatch(): void { $this->assertSame(3, LCS::length("café", "cafe")); $this->assertSame(1, LCS::length("🚀🌟", "🚀⭐")); } - public function testJapaneseCharacters() + public function testJapaneseCharacters(): void { $this->assertSame(4, LCS::length("こんにちは", "こんにちわ")); } - public function testCyrillicCharacters() + public function testCyrillicCharacters(): void { $this->assertSame(6, LCS::length("собака", "собака")); } - public function testHebrewCharacters() + public function testHebrewCharacters(): void { $this->assertSame(4, LCS::length("עברית", "עבדית")); } - public function testMixedAsciiMultibyte() + public function testMixedAsciiMultibyte(): void { $this->assertSame(9, LCS::length("hello café", "hello cafe")); $this->assertSame(5, LCS::length("test 🚀", "test 🌟")); } - public function testMultibyteWithEmptyString() + public function testMultibyteWithEmptyString(): void { $this->assertSame(0, LCS::length("café", "")); $this->assertSame(0, LCS::length("", "🚀🌟")); } - public function testSingleMultibyteCharacter() + public function testSingleMultibyteCharacter(): void { $this->assertSame(1, LCS::length("é", "é")); $this->assertSame(0, LCS::length("é", "ö")); $this->assertSame(1, LCS::length("🚀", "🚀")); } - public function testMultibyteReorderedCharacters() + public function testMultibyteReorderedCharacters(): void { $this->assertSame(2, LCS::length("éöü", "üéö")); } - public function testLongMultibyteStrings() + public function testLongMultibyteStrings(): void { $longMb1 = str_repeat("ä", 100); $longMb2 = str_repeat("ö", 100); $this->assertSame(0, LCS::length($longMb1, $longMb2)); - + $partialMatch = str_repeat("ä", 50) . str_repeat("ö", 50); $this->assertSame(50, LCS::length($longMb1, $partialMatch)); } - public function testNormalizationCombiningMarks() + public function testNormalizationCombiningMarks(): void { $this->assertSame(0, LCS::length("é", "\u{0065}\u{0301}")); } - public function testZWJFamilyEmojiLCS() + public function testZWJFamilyEmojiLCS(): void { $this->assertSame(4, LCS::length("👨‍👩‍👧‍👦", "👨👩👧👦")); } diff --git a/tests/LevenshteinTest.php b/tests/LevenshteinTest.php index 8d67ef6..450cdd3 100644 --- a/tests/LevenshteinTest.php +++ b/tests/LevenshteinTest.php @@ -1,136 +1,140 @@ assertSame(0, Levenshtein::distance("kitten", "kitten")); } - public function testBasicDistance() + public function testBasicDistance(): void { $this->assertSame(3, Levenshtein::distance("kitten", "sitting")); $this->assertSame(3, Levenshtein::distance("saturday", "sunday")); } - public function testEmptyStrings() + public function testEmptyStrings(): void { $this->assertSame(0, Levenshtein::distance("", "")); } - public function testOneEmptyString() + public function testOneEmptyString(): void { $this->assertSame(3, Levenshtein::distance("abc", "")); $this->assertSame(5, Levenshtein::distance("", "hello")); } - public function testSingleCharacters() + public function testSingleCharacters(): void { $this->assertSame(0, Levenshtein::distance("a", "a")); $this->assertSame(1, Levenshtein::distance("a", "b")); } - public function testCompletelyDifferentStrings() + public function testCompletelyDifferentStrings(): void { $this->assertSame(3, Levenshtein::distance("abc", "xyz")); } - public function testInsertionDeletion() + public function testInsertionDeletion(): void { $this->assertSame(1, Levenshtein::distance("abc", "ab")); $this->assertSame(1, Levenshtein::distance("ab", "abc")); } - public function testSubstitution() + public function testSubstitution(): void { $this->assertSame(1, Levenshtein::distance("abc", "axc")); } - public function testMultibyteIdentical() + public function testMultibyteIdentical(): void { $this->assertSame(0, Levenshtein::distance("café", "café")); $this->assertSame(0, Levenshtein::distance("こんにちは", "こんにちは")); } - public function testMultibyteDistance() + public function testMultibyteDistance(): void { $this->assertSame(1, Levenshtein::distance("café", "cafe")); $this->assertSame(1, Levenshtein::distance("naïve", "naive")); } - public function testEmojiSupport() + public function testEmojiSupport(): void { $this->assertSame(0, Levenshtein::distance("🚀🌟", "🚀🌟")); $this->assertSame(1, Levenshtein::distance("🚀🌟", "🚀⭐")); $this->assertSame(2, Levenshtein::distance("🚀🌟", "⭐🌙")); } - public function testJapaneseCharacters() + public function testJapaneseCharacters(): void { $this->assertSame(1, Levenshtein::distance("こんにちは", "こんにちわ")); $this->assertSame(5, Levenshtein::distance("こんにちは", "さようなら")); } - public function testCyrillicCharacters() + public function testCyrillicCharacters(): void { $this->assertSame(1, Levenshtein::distance("собака", "собаки")); $this->assertSame(7, Levenshtein::distance("собака", "медведь")); } - public function testHebrewCharacters() + public function testHebrewCharacters(): void { $this->assertSame(1, Levenshtein::distance("עברית", "עבדית")); } - public function testMixedAsciiMultibyte() + public function testMixedAsciiMultibyte(): void { $this->assertSame(1, Levenshtein::distance("hello café", "hello cafe")); $this->assertSame(1, Levenshtein::distance("test 🚀", "test 🌟")); } - public function testLongStrings() + public function testLongStrings(): void { $longString1 = str_repeat("a", 1000); $longString2 = str_repeat("b", 1000); $this->assertSame(1000, Levenshtein::distance($longString1, $longString2)); } - public function testLongMultibyteStrings() + public function testLongMultibyteStrings(): void { $longMb1 = str_repeat("ä", 100); $longMb2 = str_repeat("ö", 100); $this->assertSame(100, Levenshtein::distance($longMb1, $longMb2)); } - public function testNormalizationCombiningMarks() + public function testNormalizationCombiningMarks(): void { $this->assertSame(2, Levenshtein::distance("é", "\u{0065}\u{0301}")); } - public function testNFKCCompatibilityNormalization() + public function testNFKCCompatibilityNormalization(): void { $this->assertSame(1, Levenshtein::distance("①", "1")); $this->assertSame(2, Levenshtein::distance("ff", "ff")); $this->assertSame(1, Levenshtein::distance("Å", "Å")); } - public function testNFKDDecomposedNormalization() + public function testNFKDDecomposedNormalization(): void { $this->assertSame(2, Levenshtein::distance("Å", "A\u{030A}")); $this->assertSame(2, Levenshtein::distance("ñ", "n\u{0303}")); } - public function testZWJFamilyEmojiDistance() + public function testZWJFamilyEmojiDistance(): void { $this->assertSame(3, Levenshtein::distance("👨‍👩‍👧‍👦", "👨👩👧👦")); } - public function testSkinToneModifierDistance() + public function testSkinToneModifierDistance(): void { $this->assertSame(1, Levenshtein::distance("👍", "👍🏽")); } diff --git a/tests/MongeElkanTest.php b/tests/MongeElkanTest.php index b1c15b6..0940a88 100644 --- a/tests/MongeElkanTest.php +++ b/tests/MongeElkanTest.php @@ -1,18 +1,27 @@ assertEqualsWithDelta(1.0, MongeElkan::similarity("john smith", "john smith"), 1e-10); } - public function testPartialMatch() + public function testPartialMatch(): void { $a = "john smith"; $b = "jon smythe"; @@ -22,82 +31,82 @@ public function testPartialMatch() $this->assertLessThan(1.0, $result); } - public function testDifferentWords() + public function testDifferentWords(): void { $this->assertEqualsWithDelta(0.0, MongeElkan::similarity("abc def", "xyz uvw"), 1e-10); } - public function testSingleWordMatch() + public function testSingleWordMatch(): void { $this->assertEqualsWithDelta(1.0, MongeElkan::similarity("hello", "hello"), 1e-10); } - public function testSingleWordMismatch() + public function testSingleWordMismatch(): void { $similarity = MongeElkan::similarity("hello", "world"); $this->assertGreaterThan(0.0, $similarity); $this->assertLessThan(1.0, $similarity); } - public function testEmptyStrings() + public function testEmptyStrings(): void { $this->assertEqualsWithDelta(1.0, MongeElkan::similarity("", ""), 1e-10); } - public function testOneEmpty() + public function testOneEmpty(): void { $this->assertEqualsWithDelta(0.0, MongeElkan::similarity("test", ""), 1e-10); } - public function testMultibyteIdenticalSentences() + public function testMultibyteIdenticalSentences(): void { $this->assertEqualsWithDelta(1.0, MongeElkan::similarity("café latte", "café latte"), 1e-10); $this->assertEqualsWithDelta(1.0, MongeElkan::similarity("🚀 🌟", "🚀 🌟"), 1e-10); } - public function testMultibytePartialMatch() + public function testMultibytePartialMatch(): void { $result = MongeElkan::similarity("café latte", "cafe latte"); $this->assertGreaterThan(0.0, $result); $this->assertLessThan(1.0, $result); } - public function testJapaneseWords() + public function testJapaneseWords(): void { $result = MongeElkan::similarity("こんにちは 世界", "こんにちは せかい"); $this->assertGreaterThan(0.0, $result); $this->assertLessThan(1.0, $result); } - public function testCyrillicWords() + public function testCyrillicWords(): void { $result = MongeElkan::similarity("собака кошка", "собака медведь"); $this->assertGreaterThan(0.0, $result); $this->assertLessThan(1.0, $result); } - public function testHebrewWords() + public function testHebrewWords(): void { $result = MongeElkan::similarity("עברית טוב", "עבדית טוב"); $this->assertGreaterThan(0.0, $result); $this->assertLessThan(1.0, $result); } - public function testEmojiWords() + public function testEmojiWords(): void { $result = MongeElkan::similarity("🚀 🌟", "🚀 ⭐"); $this->assertGreaterThan(0.0, $result); $this->assertLessThan(1.0, $result); } - public function testMixedAsciiMultibyte() + public function testMixedAsciiMultibyte(): void { $result = MongeElkan::similarity("hello café", "hello cafe"); $this->assertGreaterThan(0.0, $result); $this->assertLessThan(1.0, $result); } - public function testSingleMultibyteWord() + public function testSingleMultibyteWord(): void { $this->assertEqualsWithDelta(1.0, MongeElkan::similarity("café", "café"), 1e-10); $result = MongeElkan::similarity("café", "cafe"); @@ -105,33 +114,33 @@ public function testSingleMultibyteWord() $this->assertLessThan(1.0, $result); } - public function testMultibyteDifferentWords() + public function testMultibyteDifferentWords(): void { $result = MongeElkan::similarity("café latte", "🚀 🌟"); $this->assertGreaterThanOrEqual(0.0, $result); $this->assertLessThan(1.0, $result); } - public function testMultibyteWithEmptyString() + public function testMultibyteWithEmptyString(): void { $this->assertEqualsWithDelta(0.0, MongeElkan::similarity("café latte", ""), 1e-10); } - public function testMultipleMultibyteWords() + public function testMultipleMultibyteWords(): void { $result = MongeElkan::similarity("こんにちは 世界 コンピューター", "こんにちわ 世界 コンピュータ"); $this->assertGreaterThan(0.0, $result); $this->assertLessThan(1.0, $result); } - public function testAsymmetryBehavior() + public function testAsymmetryBehavior(): void { $a = MongeElkan::similarity("john", "john smith"); $b = MongeElkan::similarity("john smith", "john"); $this->assertNotEquals($a, $b); } - public function testMultipleSpacesAndEmptyTokens() + public function testMultipleSpacesAndEmptyTokens(): void { $result = MongeElkan::similarity("hello world", "hello world"); $this->assertGreaterThan(0.0, $result); diff --git a/tests/NeedlemanWunschTest.php b/tests/NeedlemanWunschTest.php index 407a07e..fbad8e9 100644 --- a/tests/NeedlemanWunschTest.php +++ b/tests/NeedlemanWunschTest.php @@ -1,54 +1,58 @@ assertSame(7, NeedlemanWunsch::score("GATTACA", "GATTACA")); } - public function testCompletelyDifferentStrings() - { + public function testCompletelyDifferentStrings(): void + { $this->assertSame(-7, NeedlemanWunsch::score("AAAAAAA", "GGGGGGG")); } - public function testPartialMatch() - { + public function testPartialMatch(): void + { $this->assertSame(0, NeedlemanWunsch::score("GATTACA", "GCATGCU")); } - public function testEmptyStrings() + public function testEmptyStrings(): void { $this->assertSame(0, NeedlemanWunsch::score("", "")); } - public function testOneEmpty() - { + public function testOneEmpty(): void + { $this->assertSame(-4, NeedlemanWunsch::score("ACGT", "")); $this->assertSame(-4, NeedlemanWunsch::score("", "ACGT")); } - public function testCustomScoring() - { + public function testCustomScoring(): void + { $this->assertSame(20, NeedlemanWunsch::score("AAAA", "AAAA", match: 5, mismatch: -1, gap: -2)); } - public function testGapOnly() - { + public function testGapOnly(): void + { $this->assertSame(-1, NeedlemanWunsch::score("A", "AAA")); } - public function testValidUtf8Passes() + public function testValidUtf8Passes(): void { $this->assertSame(3, NeedlemanWunsch::score("ありがとう", "ありがと")); } - public function testInvalidUtf8Input() + public function testInvalidUtf8Input(): void { $this->expectException(\InvalidArgumentException::class); $invalid = "\xFF\xFF"; diff --git a/tests/SmithWatermanTest.php b/tests/SmithWatermanTest.php index f577bf7..51fb21e 100644 --- a/tests/SmithWatermanTest.php +++ b/tests/SmithWatermanTest.php @@ -1,55 +1,59 @@ assertSame(14, SmithWaterman::score("GATTACA", "GATTACA")); } - public function testCompletelyDifferentStrings() - { + public function testCompletelyDifferentStrings(): void + { $this->assertSame(0, SmithWaterman::score("AAAAAAA", "GGGGGGG")); } - public function testPartialOverlap() + public function testPartialOverlap(): void { $this->assertSame(5, SmithWaterman::score("GATTACA", "GCATGCU")); } - public function testEmptyStrings() + public function testEmptyStrings(): void { $this->assertSame(0, SmithWaterman::score("", "")); } - public function testOneEmpty() + public function testOneEmpty(): void { $this->assertSame(0, SmithWaterman::score("ACGT", "")); $this->assertSame(0, SmithWaterman::score("", "ACGT")); } - public function testCustomScoring() - { + public function testCustomScoring(): void + { $this->assertSame(20, SmithWaterman::score("AAAA", "AAAA", match: 5, mismatch: -1, gap: -2)); } - public function testSubstringAlignment() - { + public function testSubstringAlignment(): void + { $this->assertSame(4, SmithWaterman::score("ACGT", "CG")); } - public function testUtf8MultibyteCharacters() - { + public function testUtf8MultibyteCharacters(): void + { $score = SmithWaterman::score("あ", "い"); $this->assertSame(0, $score); } - public function testInvalidUtf8Input() + public function testInvalidUtf8Input(): void { $this->expectException(\InvalidArgumentException::class); $invalid = "\xFF\xFF";