From 260fcbea3475459052c3ed3338c880a4d365ff5f Mon Sep 17 00:00:00 2001 From: Sergey Biryukov Date: Thu, 25 Jun 2026 19:43:05 +0000 Subject: [PATCH 01/13] Code Modernization: Use `array_any()` in `WP_REST_Comments_Controller`. This commit replaces a `foreach` loop in `::check_post_type_supports_notes()` that iterates the editor supports, returns `true` as soon as an element has non-empty notes, and otherwise falls through to `false`. That is exactly what PHP 8.4's `array_any()` expresses in a single, more readable call. WordPress core includes a polyfill for `array_any()` on PHP < 8.4 as of WordPress 6.8, so the change works on every supported PHP version without raising the minimum requirement. Follow-up to [59783], [62550], [62553]. Props Soean. See #65519. git-svn-id: https://develop.svn.wordpress.org/trunk@62564 602fd350-edb4-49c9-b593-d223f7449a82 --- .../endpoints/class-wp-rest-comments-controller.php | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/rest-api/endpoints/class-wp-rest-comments-controller.php b/src/wp-includes/rest-api/endpoints/class-wp-rest-comments-controller.php index f462928847c77..d14aefb1f6308 100644 --- a/src/wp-includes/rest-api/endpoints/class-wp-rest-comments-controller.php +++ b/src/wp-includes/rest-api/endpoints/class-wp-rest-comments-controller.php @@ -2049,17 +2049,15 @@ protected function check_is_comment_content_allowed( $prepared_comment ) { */ private function check_post_type_supports_notes( $post_type ) { $supports = get_all_post_type_supports( $post_type ); + if ( ! isset( $supports['editor'] ) ) { return false; } + if ( ! is_array( $supports['editor'] ) ) { return false; } - foreach ( $supports['editor'] as $item ) { - if ( ! empty( $item['notes'] ) ) { - return true; - } - } - return false; + + return array_any( $supports['editor'], fn( $item ) => ! empty( $item['notes'] ) ); } } From 752d544e9ab7bb5bd9948203dc65a2f97bc43015 Mon Sep 17 00:00:00 2001 From: Sergey Biryukov Date: Fri, 26 Jun 2026 22:49:12 +0000 Subject: [PATCH 02/13] Docs: Correct `$format` default in `get_next_post_link()` and `next_post_link()`. Follow-up to [37254]. Props ishihara-takashi, sabernhardt, khokansardar, mindctrl, SergeyBiryukov. Fixes #65541. git-svn-id: https://develop.svn.wordpress.org/trunk@62565 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-includes/link-template.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/link-template.php b/src/wp-includes/link-template.php index cfff8b6525e10..223d6b5548fc6 100644 --- a/src/wp-includes/link-template.php +++ b/src/wp-includes/link-template.php @@ -2291,7 +2291,7 @@ function previous_post_link( $format = '« %link', $link = '%title', $in_sa * * @since 3.7.0 * - * @param string $format Optional. Link anchor format. Default '« %link'. + * @param string $format Optional. Link anchor format. Default '%link »'. * @param string $link Optional. Link permalink format. Default '%title'. * @param bool $in_same_term Optional. Whether link should be in the same taxonomy term. * Default false. @@ -2311,7 +2311,7 @@ function get_next_post_link( $format = '%link »', $link = '%title', $in_sa * * @see get_next_post_link() * - * @param string $format Optional. Link anchor format. Default '« %link'. + * @param string $format Optional. Link anchor format. Default '%link »'. * @param string $link Optional. Link permalink format. Default '%title'. * @param bool $in_same_term Optional. Whether link should be in the same taxonomy term. * Default false. From 2a5a37e54b1474f24e3f49a9345b22c49ff6e2e4 Mon Sep 17 00:00:00 2001 From: Sergey Biryukov Date: Sat, 27 Jun 2026 22:17:32 +0000 Subject: [PATCH 03/13] Build/Test Tools: Correct `git pull` command for syncing with upstream. Follow-up to [61202]. Props mkrndmane, mukesh27, khokansardar, dhruvang21, SergeyBiryukov. Fixes #65540. git-svn-id: https://develop.svn.wordpress.org/trunk@62566 602fd350-edb4-49c9-b593-d223f7449a82 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5201a5180c1da..820b54759c907 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ You can get started using the local development environment with these steps: 1. Then clone the forked repository to your computer using `git clone https://github.com//wordpress-develop.git`. 1. Navigate into the directory for the cloned repository using `cd wordpress-develop`. 1. Add the origin repo as an `upstream` remote via `git remote add upstream https://github.com/WordPress/wordpress-develop.git`. -1. Then you can keep your branches up to date via `git pull --ff upstream/trunk`, for example. +1. Then you can keep your branches up to date via `git pull --ff upstream trunk`, for example. Alternatively, if you have the [GitHub CLI](https://cli.github.com/) installed, you can simply run `gh repo fork WordPress/wordpress-develop --clone --remote` ([docs](https://cli.github.com/manual/gh_repo_fork)). This command will: 1. Fork the repository to your account (use the `--org` flag to clone into an organization). From e0e6680d69097e330921898e1747030e7d204f48 Mon Sep 17 00:00:00 2001 From: Joe Dolson Date: Sun, 28 Jun 2026 20:38:14 +0000 Subject: [PATCH 04/13] Administration: Fix selected/active buttons in High Contrast Mode. Follow up to [62467]. Replaces original fix, which turned out to be insufficient. Replaces pseudo-elements with more standard outlines, shifted in scale for visibility. Props sabernhardt, wildworks, apermo, joedolson. Fixes #65153. git-svn-id: https://develop.svn.wordpress.org/trunk@62567 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-includes/css/buttons.css | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/wp-includes/css/buttons.css b/src/wp-includes/css/buttons.css index 09457ce6a4dc5..967970a9ba461 100644 --- a/src/wp-includes/css/buttons.css +++ b/src/wp-includes/css/buttons.css @@ -194,7 +194,8 @@ TABLE OF CONTENTS: color: var(--wp-admin-theme-color-darker-20, #183ad6); border-color: var(--wp-admin-theme-color, #3858e9); box-shadow: inset 0 2px 6px -2px var(--wp-admin-theme-color-darker-20); - position: relative; + outline: 3px solid transparent; + outline-offset: -3px; } .wp-core-ui .button.active:focus { @@ -202,19 +203,7 @@ TABLE OF CONTENTS: color: var(--wp-admin-theme-color-darker-20, #183ad6); border-color: var(--wp-admin-theme-color-darker-20, #183ad6); box-shadow: inset 0 2px 6px -2px var(--wp-admin-theme-color-darker-20), 0 0 0 var(--wp-admin-border-width-focus, 1.5px) var(--wp-admin-theme-color, #3858e9); -} - -/* Only visible in Windows High Contrast mode */ -.wp-core-ui .button.active:before { - content: ""; - display: block; - position: absolute; - width: 100%; - height: 0; - border-top: 3px solid transparent; - bottom: 0; - left: 0; - box-sizing: border-box; + outline-width: 4px; } .wp-core-ui .button[disabled], From 8b36984e643a15ca94a9bffff3f9e92d29ac12e9 Mon Sep 17 00:00:00 2001 From: Joe Dolson Date: Sun, 28 Jun 2026 21:41:22 +0000 Subject: [PATCH 05/13] Administration: Fix cursor on first submenu list item in admin menu. The first submenu item in the collapsed view of the admin menu accepts a click event to navigate, but does not have `cursor: pointer` to indicate that it's interactive. These were removed in [51684], but this specific case (JS activate, menu collapsed, first list item) should remain. Props mazharulrobeen, sumitsingh, sabernhardt, swapnil1010, joedolson. Fixes #65250. git-svn-id: https://develop.svn.wordpress.org/trunk@62568 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-admin/css/admin-menu.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/wp-admin/css/admin-menu.css b/src/wp-admin/css/admin-menu.css index c4b32ac4b9e87..2b665f583484f 100644 --- a/src/wp-admin/css/admin-menu.css +++ b/src/wp-admin/css/admin-menu.css @@ -417,6 +417,10 @@ ul#adminmenu > li.current > a.current:after { border-color: transparent; } +.js #adminmenu .wp-submenu .wp-submenu-head { + cursor: pointer; +} + #adminmenu li.current, .folded #adminmenu li.wp-menu-open { border: 0 none; From cdf2433e06650ac5d007ccbffbe03c089bd7573a Mon Sep 17 00:00:00 2001 From: Sergey Biryukov Date: Sun, 28 Jun 2026 23:21:09 +0000 Subject: [PATCH 06/13] Build/Test Tools: Update GitHub CLI fork command in `README.md`. This resolves an error when running the documented command as of GitHub CLI 2.88.0: {{{ the `--remote` flag is unsupported when a repository argument is provided. }}} Reference: [https://github.com/cli/cli/pull/12375 GitHub CLI: fix: error when --remote flag used with repo argument]. Follow-up to [61202]. Props mkrndmane, khokansardar. Fixes #65542. git-svn-id: https://develop.svn.wordpress.org/trunk@62569 602fd350-edb4-49c9-b593-d223f7449a82 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 820b54759c907..bb6d06c034651 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ You can get started using the local development environment with these steps: 1. Add the origin repo as an `upstream` remote via `git remote add upstream https://github.com/WordPress/wordpress-develop.git`. 1. Then you can keep your branches up to date via `git pull --ff upstream trunk`, for example. -Alternatively, if you have the [GitHub CLI](https://cli.github.com/) installed, you can simply run `gh repo fork WordPress/wordpress-develop --clone --remote` ([docs](https://cli.github.com/manual/gh_repo_fork)). This command will: +Alternatively, if you have the [GitHub CLI](https://cli.github.com/) installed, you can simply run `gh repo fork WordPress/wordpress-develop --clone` ([docs](https://cli.github.com/manual/gh_repo_fork)). This command will: 1. Fork the repository to your account (use the `--org` flag to clone into an organization). 1. Clone the repository to your machine. 1. Add `WordPress/wordpress-develop` as `upstream` and set it to the default `remote` repository From f5523819aa419d97a9bade437a7bbe7d685e9505 Mon Sep 17 00:00:00 2001 From: Carlos Bravo Date: Mon, 29 Jun 2026 08:34:46 +0000 Subject: [PATCH 07/13] Build Tools: Replace deprecated browserslist --update-db command. Replaces the deprecated `--update-db` command in the `browserslist:update` Grunt task with `update-browserslist-db@latest`. Props ekla, sergeybiryukov, masteradhoc. Fixes #64900. git-svn-id: https://develop.svn.wordpress.org/trunk@62570 602fd350-edb4-49c9-b593-d223f7449a82 --- Gruntfile.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gruntfile.js b/Gruntfile.js index dae8c3e972e4c..ab56358b8f60d 100644 --- a/Gruntfile.js +++ b/Gruntfile.js @@ -2370,7 +2370,7 @@ module.exports = function(grunt) { grunt.registerTask( 'browserslist:update', 'Update the local database of browser supports', function() { grunt.log.writeln( `Updating browsers list` ); - spawn( 'npx', [ 'browserslist@latest', '--update-db' ], { + spawn( 'npx', [ 'update-browserslist-db@latest' ], { cwd: __dirname, stdio: 'inherit', } ); From d8490eb216694f793df6d078a99eadf71ab948ca Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 10 Jun 2026 23:19:37 +0200 Subject: [PATCH 08/13] Fix WP_Token_Map array export key length --- src/wp-includes/class-wp-token-map.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/class-wp-token-map.php b/src/wp-includes/class-wp-token-map.php index fc223b187f8c5..108f28475241a 100644 --- a/src/wp-includes/class-wp-token-map.php +++ b/src/wp-includes/class-wp-token-map.php @@ -662,7 +662,7 @@ public function to_array(): array { } foreach ( $this->large_words as $index => $group ) { - $prefix = substr( $this->groups, $index * ( $this->key_length + 1 ), 2 ); + $prefix = substr( $this->groups, $index * ( $this->key_length + 1 ), $this->key_length ); $group_length = strlen( $group ); $at = 0; while ( $at < $group_length ) { From f15c6d05d8954b27aaecd733ef3e79e7f198a183 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 10 Jun 2026 23:30:41 +0200 Subject: [PATCH 09/13] Fix WP_Token_Map read_token bounds --- src/wp-includes/class-wp-token-map.php | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/class-wp-token-map.php b/src/wp-includes/class-wp-token-map.php index 108f28475241a..da038556146fe 100644 --- a/src/wp-includes/class-wp-token-map.php +++ b/src/wp-includes/class-wp-token-map.php @@ -536,14 +536,16 @@ public function read_token( string $text, int $offset = 0, &$matched_token_byte_ $text_length = strlen( $text ); // Search for a long word first, if the text is long enough, and if that fails, a short one. - if ( $text_length > $this->key_length ) { + if ( $text_length - $offset > $this->key_length ) { /* * Keys cannot contain null bytes, which is taken care of for the full words, * but here it’s required to reject group keys with null bytes so that the * lookup doesn’t get off track when scanning the group string. */ if ( strcspn( $text, "\x00", $offset, $this->key_length ) < $this->key_length ) { - return null; + return strlen( $this->small_words ) > 0 + ? $this->read_small_token( $text, $offset, $matched_token_byte_length, $case_sensitivity ) + : null; } $group_key = substr( $text, $offset, $this->key_length ); @@ -596,6 +598,10 @@ private function read_small_token( string $text, int $offset = 0, &$matched_toke $ignore_case = 'ascii-case-insensitive' === $case_sensitivity; $small_length = strlen( $this->small_words ); $search_text = substr( $text, $offset, $this->key_length ); + if ( '' === $search_text ) { + return null; + } + if ( $ignore_case ) { $search_text = strtoupper( $search_text ); } @@ -617,6 +623,11 @@ private function read_small_token( string $text, int $offset = 0, &$matched_toke return $this->small_mappings[ $at / ( $this->key_length + 1 ) ]; } + if ( ! isset( $search_text[ $adjust ] ) ) { + $at += $this->key_length + 1; + continue 2; + } + if ( $search_text[ $adjust ] !== $this->small_words[ $at + $adjust ] && ( ! $ignore_case || strtoupper( $this->small_words[ $at + $adjust ] !== $search_text[ $adjust ] ) ) From 8f28fbb5b3bb7266d2fc21b0b5d5028c5f8e489a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 10 Jun 2026 23:31:53 +0200 Subject: [PATCH 10/13] Fix WP_Token_Map ASCII matching --- src/wp-includes/class-wp-token-map.php | 74 +++++++++++++++++++++----- 1 file changed, 62 insertions(+), 12 deletions(-) diff --git a/src/wp-includes/class-wp-token-map.php b/src/wp-includes/class-wp-token-map.php index da038556146fe..3409d5519b13c 100644 --- a/src/wp-includes/class-wp-token-map.php +++ b/src/wp-includes/class-wp-token-map.php @@ -451,13 +451,20 @@ public function contains( string $word, string $case_sensitivity = 'case-sensiti return false; } - $term = str_pad( $word, $this->key_length + 1, "\x00", STR_PAD_RIGHT ); - $word_at = $ignore_case ? stripos( $this->small_words, $term ) : strpos( $this->small_words, $term ); - if ( false === $word_at ) { - return false; + $term = str_pad( $word, $this->key_length + 1, "\x00", STR_PAD_RIGHT ); + if ( ! $ignore_case ) { + return false !== strpos( $this->small_words, $term ); + } + + $small_length = strlen( $this->small_words ); + $record_length = $this->key_length + 1; + for ( $at = 0; $at < $small_length; $at += $record_length ) { + if ( self::matches_at( $this->small_words, $term, $at, $record_length, $ignore_case ) ) { + return true; + } } - return true; + return false; } $group_key = substr( $word, 0, $this->key_length ); @@ -478,7 +485,7 @@ public function contains( string $word, string $case_sensitivity = 'case-sensiti $mapping_length = unpack( 'C', $group[ $at++ ] )[1]; $mapping_at = $at; - if ( $token_length === $length && 0 === substr_compare( $group, $slug, $token_at, $token_length, $ignore_case ) ) { + if ( $token_length === $length && self::matches_at( $group, $slug, $token_at, $token_length, $ignore_case ) ) { return true; } @@ -567,7 +574,7 @@ public function read_token( string $text, int $offset = 0, &$matched_token_byte_ $mapping_length = unpack( 'C', $group[ $at++ ] )[1]; $mapping_at = $at; - if ( 0 === substr_compare( $text, $token, $offset + $this->key_length, $token_length, $ignore_case ) ) { + if ( self::matches_at( $text, $token, $offset + $this->key_length, $token_length, $ignore_case ) ) { $matched_token_byte_length = $this->key_length + $token_length; return substr( $group, $mapping_at, $mapping_length ); } @@ -603,15 +610,18 @@ private function read_small_token( string $text, int $offset = 0, &$matched_toke } if ( $ignore_case ) { - $search_text = strtoupper( $search_text ); + $search_text = self::ascii_lowercase( $search_text ); } $starting_char = $search_text[0]; $at = 0; while ( $at < $small_length ) { + $stored_starting_char = $ignore_case + ? self::ascii_lowercase( $this->small_words[ $at ] ) + : $this->small_words[ $at ]; + if ( - $starting_char !== $this->small_words[ $at ] && - ( ! $ignore_case || strtoupper( $this->small_words[ $at ] ) !== $starting_char ) + $starting_char !== $stored_starting_char ) { $at += $this->key_length + 1; continue; @@ -628,9 +638,12 @@ private function read_small_token( string $text, int $offset = 0, &$matched_toke continue 2; } + $stored_char = $ignore_case + ? self::ascii_lowercase( $this->small_words[ $at + $adjust ] ) + : $this->small_words[ $at + $adjust ]; + if ( - $search_text[ $adjust ] !== $this->small_words[ $at + $adjust ] && - ( ! $ignore_case || strtoupper( $this->small_words[ $at + $adjust ] !== $search_text[ $adjust ] ) ) + $search_text[ $adjust ] !== $stored_char ) { $at += $this->key_length + 1; continue 2; @@ -840,4 +853,41 @@ private static function longest_first_then_alphabetical( string $a, string $b ): return strcmp( $a, $b ); } + + /** + * Checks whether a substring matches at a given offset. + * + * @since 6.6.0 + * + * @param string $haystack String to search within. + * @param string $needle String to match. + * @param int $offset Offset into the haystack. + * @param int $length Number of bytes to compare. + * @param bool $ignore_case Whether to fold ASCII case while matching. + * @return bool Whether the substring matched. + */ + private static function matches_at( string $haystack, string $needle, int $offset, int $length, bool $ignore_case ): bool { + $candidate = substr( $haystack, $offset, $length ); + if ( strlen( $candidate ) !== $length ) { + return false; + } + + if ( ! $ignore_case ) { + return $candidate === $needle; + } + + return self::ascii_lowercase( $candidate ) === self::ascii_lowercase( $needle ); + } + + /** + * Lowercases ASCII bytes only. + * + * @since 6.6.0 + * + * @param string $text Text to lowercase. + * @return string Text with only ASCII uppercase bytes folded to lowercase. + */ + private static function ascii_lowercase( string $text ): string { + return strtr( $text, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz' ); + } } From 7f68409f371c986c1b3d02c6e49983eee8768eee Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 10 Jun 2026 23:33:13 +0200 Subject: [PATCH 11/13] Handle WP_Token_Map folded group keys --- src/wp-includes/class-wp-token-map.php | 142 +++++++++++++++++++------ 1 file changed, 108 insertions(+), 34 deletions(-) diff --git a/src/wp-includes/class-wp-token-map.php b/src/wp-includes/class-wp-token-map.php index 3409d5519b13c..6e38fd9c05774 100644 --- a/src/wp-includes/class-wp-token-map.php +++ b/src/wp-includes/class-wp-token-map.php @@ -467,29 +467,33 @@ public function contains( string $word, string $case_sensitivity = 'case-sensiti return false; } - $group_key = substr( $word, 0, $this->key_length ); - $group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key ); - if ( false === $group_at ) { + $group_key = substr( $word, 0, $this->key_length ); + $group_indexes = $this->find_group_indexes( $group_key, $ignore_case ); + if ( empty( $group_indexes ) ) { return false; } - $group = $this->large_words[ $group_at / ( $this->key_length + 1 ) ]; - $group_length = strlen( $group ); - $slug = substr( $word, $this->key_length ); - $length = strlen( $slug ); - $at = 0; - while ( $at < $group_length ) { - $token_length = unpack( 'C', $group[ $at++ ] )[1]; - $token_at = $at; - $at += $token_length; - $mapping_length = unpack( 'C', $group[ $at++ ] )[1]; - $mapping_at = $at; + $slug = substr( $word, $this->key_length ); + $length = strlen( $slug ); - if ( $token_length === $length && self::matches_at( $group, $slug, $token_at, $token_length, $ignore_case ) ) { - return true; - } + foreach ( $group_indexes as $group_index ) { + $group = $this->large_words[ $group_index ]; + $group_length = strlen( $group ); + $at = 0; + + while ( $at < $group_length ) { + $token_length = unpack( 'C', $group[ $at++ ] )[1]; + $token_at = $at; + $at += $token_length; + $mapping_length = unpack( 'C', $group[ $at++ ] )[1]; + $mapping_at = $at; + + if ( $token_length === $length && self::matches_at( $group, $slug, $token_at, $token_length, $ignore_case ) ) { + return true; + } - $at = $mapping_at + $mapping_length; + $at = $mapping_at + $mapping_length; + } } return false; @@ -555,31 +559,67 @@ public function read_token( string $text, int $offset = 0, &$matched_token_byte_ : null; } - $group_key = substr( $text, $offset, $this->key_length ); - $group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key ); - if ( false === $group_at ) { + $group_key = substr( $text, $offset, $this->key_length ); + $group_indexes = $this->find_group_indexes( $group_key, $ignore_case ); + if ( empty( $group_indexes ) ) { // Perhaps a short word then. return strlen( $this->small_words ) > 0 ? $this->read_small_token( $text, $offset, $matched_token_byte_length, $case_sensitivity ) : null; } - $group = $this->large_words[ $group_at / ( $this->key_length + 1 ) ]; - $group_length = strlen( $group ); - $at = 0; - while ( $at < $group_length ) { - $token_length = unpack( 'C', $group[ $at++ ] )[1]; - $token = substr( $group, $at, $token_length ); - $at += $token_length; - $mapping_length = unpack( 'C', $group[ $at++ ] )[1]; - $mapping_at = $at; + if ( ! $ignore_case ) { + $group = $this->large_words[ $group_indexes[0] ]; + $group_length = strlen( $group ); + $at = 0; + while ( $at < $group_length ) { + $token_length = unpack( 'C', $group[ $at++ ] )[1]; + $token = substr( $group, $at, $token_length ); + $at += $token_length; + $mapping_length = unpack( 'C', $group[ $at++ ] )[1]; + $mapping_at = $at; + + if ( 0 === substr_compare( $text, $token, $offset + $this->key_length, $token_length ) ) { + $matched_token_byte_length = $this->key_length + $token_length; + return substr( $group, $mapping_at, $mapping_length ); + } + + $at = $mapping_at + $mapping_length; + } - if ( self::matches_at( $text, $token, $offset + $this->key_length, $token_length, $ignore_case ) ) { - $matched_token_byte_length = $this->key_length + $token_length; - return substr( $group, $mapping_at, $mapping_length ); + return strlen( $this->small_words ) > 0 + ? $this->read_small_token( $text, $offset, $matched_token_byte_length, $case_sensitivity ) + : null; + } + + $best_match_length = null; + $best_mapping = null; + foreach ( $group_indexes as $group_index ) { + $group = $this->large_words[ $group_index ]; + $group_length = strlen( $group ); + $at = 0; + while ( $at < $group_length ) { + $token_length = unpack( 'C', $group[ $at++ ] )[1]; + $token = substr( $group, $at, $token_length ); + $at += $token_length; + $mapping_length = unpack( 'C', $group[ $at++ ] )[1]; + $mapping_at = $at; + + if ( self::matches_at( $text, $token, $offset + $this->key_length, $token_length, $ignore_case ) ) { + $match_length = $this->key_length + $token_length; + if ( null === $best_match_length || $match_length > $best_match_length ) { + $best_match_length = $match_length; + $best_mapping = substr( $group, $mapping_at, $mapping_length ); + } + } + + $at = $mapping_at + $mapping_length; } + } - $at = $mapping_at + $mapping_length; + if ( null !== $best_match_length ) { + $matched_token_byte_length = $best_match_length; + return $best_mapping; } } @@ -854,6 +894,40 @@ private static function longest_first_then_alphabetical( string $a, string $b ): return strcmp( $a, $b ); } + /** + * Finds group indexes that match a lookup key. + * + * @since 6.6.0 + * + * @param string $group_key Group key to find. + * @param bool $ignore_case Whether to fold ASCII case while searching. + * @return int[] Matching group indexes. + */ + private function find_group_indexes( string $group_key, bool $ignore_case ): array { + if ( ! $ignore_case ) { + $group_at = strpos( $this->groups, $group_key ); + + return false === $group_at + ? array() + : array( $group_at / ( $this->key_length + 1 ) ); + } + + $group_indexes = array(); + $record_length = $this->key_length + 1; + $groups_length = strlen( $this->groups ); + $group_index = 0; + + for ( $at = 0; $at < $groups_length; $at += $record_length ) { + if ( self::matches_at( $this->groups, $group_key, $at, $this->key_length, $ignore_case ) ) { + $group_indexes[] = $group_index; + } + + ++$group_index; + } + + return $group_indexes; + } + /** * Checks whether a substring matches at a given offset. * From f082e73d973a0cc570a0d948c3d90ef0404ca069 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 10 Jun 2026 23:33:30 +0200 Subject: [PATCH 12/13] Escape WP_Token_Map precomputed source --- src/wp-includes/class-wp-token-map.php | 96 ++++++++++++++++++-------- 1 file changed, 68 insertions(+), 28 deletions(-) diff --git a/src/wp-includes/class-wp-token-map.php b/src/wp-includes/class-wp-token-map.php index 6e38fd9c05774..1818c3d700b11 100644 --- a/src/wp-includes/class-wp-token-map.php +++ b/src/wp-includes/class-wp-token-map.php @@ -785,7 +785,7 @@ public function precomputed_php_source_table( string $indent = "\t" ): string { $output .= "{$i2}\"storage_version\" => \"{$class_version}\",\n"; $output .= "{$i2}\"key_length\" => {$this->key_length},\n"; - $group_line = str_replace( "\x00", "\\x00", $this->groups ); + $group_line = self::escape_precomputed_php_string( $this->groups ); $output .= "{$i2}\"groups\" => \"{$group_line}\",\n"; $output .= "{$i2}\"large_words\" => array(\n"; @@ -798,7 +798,7 @@ public function precomputed_php_source_table( string $indent = "\t" ): string { $group = $this->large_words[ $index ]; $group_length = strlen( $group ); $comment_line = "{$i3}//"; - $data_line = "{$i3}\""; + $group_data = ''; $at = 0; while ( $at < $group_length ) { $token_length = unpack( 'C', $group[ $at++ ] )[1]; @@ -808,32 +808,11 @@ public function precomputed_php_source_table( string $indent = "\t" ): string { $mapping = substr( $group, $at, $mapping_length ); $at += $mapping_length; - $token_digits = str_pad( dechex( $token_length ), 2, '0', STR_PAD_LEFT ); - $mapping_digits = str_pad( dechex( $mapping_length ), 2, '0', STR_PAD_LEFT ); - - $mapping = preg_replace_callback( - "~[\\x00-\\x1f\\x22\\x5c]~", - static function ( $match_result ) { - switch ( $match_result[0] ) { - case '"': - return '\\"'; - - case '\\': - return '\\\\'; - - default: - $hex = dechex( ord( $match_result[0] ) ); - return "\\x{$hex}"; - } - }, - $mapping - ); - - $comment_line .= " {$prefix}{$token}[{$mapping}]"; - $data_line .= "\\x{$token_digits}{$token}\\x{$mapping_digits}{$mapping}"; + $group_data .= pack( 'C', $token_length ) . $token . pack( 'C', $mapping_length ) . $mapping; + $comment_line .= ' ' . self::escape_precomputed_php_comment( "{$prefix}{$token}" ) . '[' . self::escape_precomputed_php_comment( $mapping ) . ']'; } $comment_line .= ".\n"; - $data_line .= "\",\n"; + $data_line = "{$i3}\"" . self::escape_precomputed_php_string( $group_data ) . "\",\n"; $output .= $comment_line; $output .= $data_line; @@ -849,12 +828,12 @@ static function ( $match_result ) { $at += $this->key_length + 1; } - $small_text = str_replace( "\x00", '\x00', implode( '', $small_words ) ); + $small_text = self::escape_precomputed_php_string( implode( '', $small_words ) ); $output .= "{$i2}\"small_words\" => \"{$small_text}\",\n"; $output .= "{$i2}\"small_mappings\" => array(\n"; foreach ( $this->small_mappings as $mapping ) { - $output .= "{$i3}\"{$mapping}\",\n"; + $output .= "{$i3}\"" . self::escape_precomputed_php_string( $mapping ) . "\",\n"; } $output .= "{$i2})\n"; $output .= "{$i1})\n"; @@ -964,4 +943,65 @@ private static function matches_at( string $haystack, string $needle, int $offse private static function ascii_lowercase( string $text ): string { return strtr( $text, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz' ); } + + /** + * Escapes text for use inside a double-quoted PHP string literal. + * + * @since 6.6.0 + * + * @param string $text Text to escape. + * @return string Escaped string literal body. + */ + private static function escape_precomputed_php_string( string $text ): string { + $escaped = ''; + $length = strlen( $text ); + + for ( $i = 0; $i < $length; $i++ ) { + $byte = ord( $text[ $i ] ); + switch ( $text[ $i ] ) { + case '"': + $escaped .= '\\"'; + break; + + case '\\': + $escaped .= '\\\\'; + break; + + case '$': + $escaped .= '\\$'; + break; + + default: + $escaped .= ( $byte < 0x20 || $byte >= 0x7f ) + ? sprintf( '\\x%02x', $byte ) + : $text[ $i ]; + } + } + + return $escaped; + } + + /** + * Escapes text for use inside generated PHP comments. + * + * @since 6.6.0 + * + * @param string $text Text to escape. + * @return string Escaped comment text. + */ + private static function escape_precomputed_php_comment( string $text ): string { + $escaped = ''; + $length = strlen( $text ); + + for ( $i = 0; $i < $length; $i++ ) { + $byte = ord( $text[ $i ] ); + $char = $text[ $i ]; + + $escaped .= ( $byte < 0x20 || $byte >= 0x7f || '?' === $char || '\\' === $char ) + ? sprintf( '\\x%02x', $byte ) + : $char; + } + + return $escaped; + } } From 59e7b4966f6696e5b98dac53cddffe4e1e1b9d5d Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 10 Jun 2026 23:33:47 +0200 Subject: [PATCH 13/13] Add WP_Token_Map property tests --- .../wp-token-map/wpTokenMapProperties.php | 876 ++++++++++++++++++ 1 file changed, 876 insertions(+) create mode 100644 tests/phpunit/tests/wp-token-map/wpTokenMapProperties.php diff --git a/tests/phpunit/tests/wp-token-map/wpTokenMapProperties.php b/tests/phpunit/tests/wp-token-map/wpTokenMapProperties.php new file mode 100644 index 0000000000000..a2db89db04fce --- /dev/null +++ b/tests/phpunit/tests/wp-token-map/wpTokenMapProperties.php @@ -0,0 +1,876 @@ +assertInstanceOf( WP_Token_Map::class, $map ); + + foreach ( self::contains_probes( $mappings, $seed ) as $probe ) { + foreach ( self::case_sensitivities() as $case_sensitivity ) { + $this->assert_contains_matches_reference( $map, $mappings, $probe, $key_length, $seed, $case_sensitivity, 'contains' ); + } + } + } + + /** + * Ensure generated read_token() probes agree with a naive reference lookup. + * + * @ticket 60698 + * + * @dataProvider data_generated_token_sets + * + * @param array $mappings Generated token mappings. + * @param int $key_length Group key length for the generated map. + * @param int $seed Seed used to generate the token set. + */ + public function test_read_token_matches_reference_for_generated_documents( $mappings, $key_length, $seed ) { + $map = WP_Token_Map::from_array( $mappings, $key_length ); + $this->assertInstanceOf( WP_Token_Map::class, $map ); + + foreach ( self::generated_documents( $mappings, $seed ) as $document_index => $document ) { + $document_length = strlen( $document ); + + for ( $offset = 0; $offset <= $document_length; $offset++ ) { + foreach ( self::case_sensitivities() as $case_sensitivity ) { + $this->assert_read_token_matches_reference( + $map, + $mappings, + $document, + $offset, + $key_length, + $seed, + $case_sensitivity, + "read_token document {$document_index}" + ); + } + } + } + } + + /** + * Ensure generated nested-prefix families match greedily. + * + * @ticket 60698 + * + * @dataProvider data_key_lengths + * + * @param int $key_length Group key length for the generated map. + */ + public function test_generated_nested_prefix_families_match_longest_token( $key_length ) { + $mappings = array(); + $token = ''; + foreach ( array( 'a', 'b', 'c', 'D', ';', "\x80", 'e', 'f' ) as $chunk ) { + $token .= $chunk; + $mappings[ $token ] = 'value-' . strlen( $token ); + } + + $map = WP_Token_Map::from_array( $mappings, $key_length ); + $this->assertInstanceOf( WP_Token_Map::class, $map ); + + $document = "{$token} suffix"; + $length = null; + $this->assertSame( $mappings[ $token ], $map->read_token( $document, 0, $length ) ); + $this->assertSame( strlen( $token ), $length ); + } + + /** + * Ensure generated maps preserve behavior after to_array()/from_array(). + * + * @ticket 60698 + * + * @dataProvider data_generated_token_sets + * + * @param array $mappings Generated token mappings. + * @param int $key_length Group key length for the generated map. + * @param int $seed Seed used to generate the token set. + */ + public function test_generated_maps_round_trip_through_array_export( $mappings, $key_length, $seed ) { + $map = WP_Token_Map::from_array( $mappings, $key_length ); + $this->assertInstanceOf( WP_Token_Map::class, $map ); + + $round_tripped = WP_Token_Map::from_array( $map->to_array(), $key_length ); + $this->assertInstanceOf( WP_Token_Map::class, $round_tripped ); + + $this->assert_map_behavior_matches_reference( $round_tripped, $mappings, $key_length, $seed, 'to_array round-trip' ); + } + + /** + * Ensure generated maps preserve behavior after precomputed table export. + * + * @ticket 60698 + * + * @dataProvider data_generated_token_sets + * + * @param array $mappings Generated token mappings. + * @param int $key_length Group key length for the generated map. + * @param int $seed Seed used to generate the token set. + */ + public function test_generated_maps_round_trip_through_precomputed_source_table( $mappings, $key_length, $seed ) { + $map = WP_Token_Map::from_array( $mappings, $key_length ); + $this->assertInstanceOf( WP_Token_Map::class, $map ); + + $source_table = $map->precomputed_php_source_table(); + // phpcs:ignore Squiz.PHP.Eval.Discouraged -- This verifies generated source round-trips. + $round_tripped = eval( "return {$source_table};" ); + $this->assertInstanceOf( WP_Token_Map::class, $round_tripped ); + + $this->assert_map_behavior_matches_reference( $round_tripped, $mappings, $key_length, $seed, 'precomputed table round-trip' ); + } + + /** + * Ensure ASCII-insensitive matching leaves non-ASCII bytes literal. + * + * @ticket 60698 + */ + public function test_ascii_case_insensitive_matching_keeps_non_ascii_bytes_literal() { + $mappings = array( + "alpha\xE9" => 'latin-1-lower', + "bravo\xC3\xA9" => 'utf-8-lower', + "charlie\x80Z" => 'raw-byte', + ); + $map = WP_Token_Map::from_array( $mappings, 2 ); + + $this->assertTrue( $map->contains( "ALPHA\xE9", 'ascii-case-insensitive' ) ); + $this->assertFalse( $map->contains( "ALPHA\xC9", 'ascii-case-insensitive' ) ); + $this->assertTrue( $map->contains( "BRAVO\xC3\xA9", 'ascii-case-insensitive' ) ); + $this->assertFalse( $map->contains( "BRAVO\xC3\x89", 'ascii-case-insensitive' ) ); + + $length = null; + $this->assertSame( 'raw-byte', $map->read_token( "CHARLIE\x80z", 0, $length, 'ascii-case-insensitive' ) ); + $this->assertSame( strlen( "charlie\x80Z" ), $length ); + + $length = null; + $this->assertNull( $map->read_token( "CHARLIE\x81z", 0, $length, 'ascii-case-insensitive' ) ); + $this->assertNull( $length ); + } + + /** + * Ensure array export preserves one-byte group keys. + * + * This is the minimized regression for generated key_length=1 maps. + * + * @ticket 60698 + */ + public function test_array_export_preserves_single_byte_group_keys() { + $mappings = array( + 'a' => 'short', + 'ab' => 'long', + 'ac' => 'sibling', + ); + $map = WP_Token_Map::from_array( $mappings, 1 ); + + $expected = $mappings; + $actual = $map->to_array(); + ksort( $expected ); + ksort( $actual ); + + $this->assertSame( $expected, $actual ); + } + + /** + * Ensure ASCII-insensitive reads work for short tokens. + * + * This is the minimized regression for generated case-insensitive short + * token probes. + * + * @ticket 60698 + */ + public function test_ascii_case_insensitive_reads_short_tokens() { + $map = WP_Token_Map::from_array( array( 'ab' => 'short-token' ), 2 ); + $length = null; + + $this->assertSame( 'short-token', $map->read_token( 'AB', 0, $length, 'ascii-case-insensitive' ) ); + $this->assertSame( 2, $length ); + } + + /** + * Ensure ASCII-insensitive reads check every folded-equivalent group key. + * + * @ticket 60698 + * + * @dataProvider data_ascii_case_insensitive_group_key_collisions + * + * @param array $mappings Token mappings with folded-equivalent group keys. + * @param int $key_length Group key length for the generated map. + * @param string $probe Probe text. + * @param string $expected Expected mapping. + */ + public function test_ascii_case_insensitive_reads_folded_group_key_collisions( $mappings, $key_length, $probe, $expected ) { + $map = WP_Token_Map::from_array( $mappings, $key_length ); + $length = null; + + $this->assertTrue( $map->contains( $probe, 'ascii-case-insensitive' ) ); + $this->assertSame( $expected, $map->read_token( $probe, 0, $length, 'ascii-case-insensitive' ) ); + $this->assertSame( strlen( $probe ), $length ); + } + + /** + * Ensure generated PHP source escapes tokens and mappings safely. + * + * @ticket 60698 + */ + public function test_precomputed_source_table_escapes_php_string_and_comment_bytes() { + $mappings = array( + 'quote"token' => 'quote"value', + 'slash\\token' => 'slash\\value', + 'dollar$token' => 'dollar$value', + "control\ntoken" => "control\nvalue", + 'close?>tag' => 'close?>value', + "high\x80\xFFtoken" => "high\x80\xFFvalue", + ); + $map = WP_Token_Map::from_array( $mappings, 2 ); + + $source_table = $map->precomputed_php_source_table(); + // phpcs:ignore Squiz.PHP.Eval.Discouraged -- This verifies generated source round-trips. + $round_tripped = eval( "return {$source_table};" ); + + $this->assertInstanceOf( WP_Token_Map::class, $round_tripped ); + $this->assertSame( $map->to_array(), $round_tripped->to_array() ); + } + + /** + * Ensure short-token reads do not consume missing bytes. + * + * @ticket 60698 + */ + public function test_short_token_reads_ignore_text_shorter_than_token() { + $map = WP_Token_Map::from_array( array( 'ab' => 'short-token' ), 2 ); + $length = null; + + $this->assertNull( $map->read_token( 'a', 0, $length ) ); + $this->assertNull( $length ); + + $length = null; + $this->assertNull( $map->read_token( '', 0, $length ) ); + $this->assertNull( $length ); + } + + /** + * Data provider. + * + * @return array[]. + */ + public static function data_generated_token_sets() { + $cases = array( + 'seed 539231511 key_length 1' => array( 539231511, 1, 70 ), + 'seed 539231512 key_length 2' => array( 539231512, 2, 90 ), + 'seed 867530901 key_length 1' => array( 867530901, 1, 60 ), + 'seed 867530902 key_length 2' => array( 867530902, 2, 80 ), + ); + + foreach ( $cases as $name => $case ) { + list( $seed, $key_length, $target_count ) = $case; + yield $name => array( self::generate_token_set( $seed, $key_length, $target_count ), $key_length, $seed ); + } + } + + /** + * Data provider. + * + * @return array[]. + */ + public static function data_key_lengths() { + return array( + 'key length 1' => array( 1 ), + 'key length 2' => array( 2 ), + ); + } + + /** + * Data provider. + * + * @return array[]. + */ + public static function data_ascii_case_insensitive_group_key_collisions() { + return array( + 'key length 1' => array( + array( + 'Ab' => 'upper-group', + 'aa' => 'lower-group', + ), + 1, + 'aa', + 'lower-group', + ), + 'key length 2' => array( + array( + 'Abc' => 'mixed-group-one', + 'aBd' => 'mixed-group-two', + ), + 2, + 'abd', + 'mixed-group-two', + ), + ); + } + + /** + * Assert that a token map behaves like the reference implementation. + * + * @param WP_Token_Map $map Token map under test. + * @param array $mappings Generated token mappings. + * @param int $key_length Group key length for the generated map. + * @param int $seed Seed used to generate the token set. + * @param string $label Describes the map under test. + */ + private function assert_map_behavior_matches_reference( $map, $mappings, $key_length, $seed, $label ) { + foreach ( self::contains_probes( $mappings, $seed ) as $probe ) { + foreach ( self::case_sensitivities() as $case_sensitivity ) { + $this->assert_contains_matches_reference( $map, $mappings, $probe, $key_length, $seed, $case_sensitivity, "{$label} contains" ); + } + } + + foreach ( self::generated_documents( $mappings, $seed ) as $document_index => $document ) { + $document_length = strlen( $document ); + for ( $offset = 0; $offset <= $document_length; $offset++ ) { + foreach ( self::case_sensitivities() as $case_sensitivity ) { + $this->assert_read_token_matches_reference( + $map, + $mappings, + $document, + $offset, + $key_length, + $seed, + $case_sensitivity, + "{$label} read_token document {$document_index}" + ); + } + } + } + } + + /** + * Assert contains() behavior against the reference implementation. + * + * @param WP_Token_Map $map Token map under test. + * @param array $mappings Generated token mappings. + * @param string $probe Probe word. + * @param int $key_length Group key length for the generated map. + * @param int $seed Seed used to generate the token set. + * @param string $case_sensitivity Case sensitivity mode. + * @param string $operation Operation being tested. + */ + private function assert_contains_matches_reference( $map, $mappings, $probe, $key_length, $seed, $case_sensitivity, $operation ) { + $expected = self::reference_contains( $mappings, $probe, $case_sensitivity ); + $actual = $map->contains( $probe, $case_sensitivity ); + + if ( $expected !== $actual ) { + $this->assertSame( + $expected, + $actual, + self::failure_context( $mappings, $key_length, $seed, $case_sensitivity, $operation, $probe ) + ); + } + } + + /** + * Assert read_token() behavior against the reference implementation. + * + * @param WP_Token_Map $map Token map under test. + * @param array $mappings Generated token mappings. + * @param string $document Document to probe. + * @param int $offset Offset at which to probe. + * @param int $key_length Group key length for the generated map. + * @param int $seed Seed used to generate the token set. + * @param string $case_sensitivity Case sensitivity mode. + * @param string $operation Operation being tested. + */ + private function assert_read_token_matches_reference( $map, $mappings, $document, $offset, $key_length, $seed, $case_sensitivity, $operation ) { + $expected = self::reference_read_token( $mappings, $document, $offset, $case_sensitivity ); + $actual_length = null; + $actual_response = $map->read_token( $document, $offset, $actual_length, $case_sensitivity ); + + if ( $expected['value'] !== $actual_response ) { + $this->assertSame( + $expected['value'], + $actual_response, + self::failure_context( $mappings, $key_length, $seed, $case_sensitivity, $operation, $document, $offset ) . '; response' + ); + } + + if ( $expected['length'] !== $actual_length ) { + $this->assertSame( + $expected['length'], + $actual_length, + self::failure_context( $mappings, $key_length, $seed, $case_sensitivity, $operation, $document, $offset ) . '; matched length' + ); + } + } + + /** + * Return case-sensitivity modes used by the public API. + * + * @return string[] Case-sensitivity modes. + */ + private static function case_sensitivities() { + return array( 'case-sensitive', 'ascii-case-insensitive' ); + } + + /** + * Generate a deterministic token set. + * + * NUL is excluded from generated tokens because the implementation treats + * lookup words containing NUL as invalid. Probe words and documents include + * NUL so failed lookups still exercise that byte. + * + * @param int $seed Seed used to generate the token set. + * @param int $key_length Group key length for the generated map. + * @param int $target_count Number of generated tokens to target. + * @return array Generated token mappings. + */ + private static function generate_token_set( $seed, $key_length, $target_count ) { + $state = $seed; + $mappings = array(); + + self::add_token( $mappings, 'a', $seed ); + self::add_token( $mappings, 'B', $seed ); + if ( $key_length > 1 ) { + self::add_token( $mappings, 'c', $seed ); + } + self::add_token( $mappings, str_repeat( 'k', $key_length ), $seed ); + self::add_token( $mappings, str_repeat( 'L', 255 ), $seed ); + self::add_token( $mappings, "hi\x80A;", $seed ); + self::add_token( $mappings, "jo\xFFb;", $seed ); + self::add_token( $mappings, "utf\xC3\xA9;", $seed ); + self::add_token( $mappings, "euro\xE2\x82\xAC;", $seed ); + if ( 1 === $key_length ) { + self::add_token( $mappings, 'Ab', $seed ); + self::add_token( $mappings, 'aa', $seed ); + } else { + self::add_token( $mappings, 'Abc', $seed ); + self::add_token( $mappings, 'aBd', $seed ); + } + + $nested = ''; + foreach ( array( 'p', 'r', 'e', 'F', 'i', 'x', ';', "\x80", 'z' ) as $chunk ) { + $nested .= $chunk; + self::add_token( $mappings, $nested, $seed ); + } + + $group_key = 1 === $key_length ? 'g' : 'gy'; + for ( $i = 0; $i < 24; $i++ ) { + self::add_token( $mappings, $group_key . self::random_token_suffix( $state, 2 + ( $i % 7 ) ), $seed ); + } + + $attempts = 0; + while ( count( $mappings ) < $target_count && $attempts < $target_count * 40 ) { + self::add_token( $mappings, self::random_token( $state, $key_length, $attempts ), $seed ); + ++$attempts; + } + + return $mappings; + } + + /** + * Add a token to the generated map if it is unambiguous. + * + * @param array $mappings Generated token mappings. + * @param string $token Token to add. + * @param int $seed Seed used to generate the token set. + */ + private static function add_token( &$mappings, $token, $seed ) { + if ( '' === $token || false !== strpos( $token, "\x00" ) || WP_Token_Map::MAX_LENGTH <= strlen( $token ) ) { + return; + } + + foreach ( $mappings as $existing_token => $mapping ) { + if ( self::ascii_lowercase( $existing_token ) === self::ascii_lowercase( $token ) ) { + return; + } + } + + $mappings[ $token ] = 'value-' . $seed . '-' . count( $mappings ); + } + + /** + * Generate a token from the allowed byte classes. + * + * @param int $state Pseudo-random generator state. + * @param int $key_length Group key length for the generated map. + * @param int $index Token index. + * @return string Generated token. + */ + private static function random_token( &$state, $key_length, $index ) { + $choice = self::random_int( $state, 0, 9 ); + if ( $choice < 3 && $key_length > 1 ) { + $target_length = self::random_int( $state, 1, $key_length - 1 ); + } elseif ( $choice < 6 ) { + $target_length = $key_length; + } elseif ( $choice < 9 ) { + $target_length = self::random_int( $state, $key_length + 1, 24 ); + } else { + $target_length = self::random_int( $state, 48, 96 ); + } + + $token = chr( ord( 'm' ) + ( $index % 10 ) ); + while ( strlen( $token ) < $target_length ) { + $token .= self::random_token_chunk( $state ); + } + + return substr( $token, 0, $target_length ); + } + + /** + * Generate a random suffix. + * + * @param int $state Pseudo-random generator state. + * @param int $target_length Target byte length. + * @return string Generated suffix. + */ + private static function random_token_suffix( &$state, $target_length ) { + $suffix = ''; + while ( strlen( $suffix ) < $target_length ) { + $suffix .= self::random_token_chunk( $state ); + } + + return substr( $suffix, 0, $target_length ); + } + + /** + * Generate a random token chunk. + * + * @param int $state Pseudo-random generator state. + * @return string Generated chunk. + */ + private static function random_token_chunk( &$state ) { + $chunks = array( + 'a', + 'b', + 'C', + 'D', + '0', + '9', + ';', + "\x80", + "\xFF", + "\xC2\xA9", + "\xE2\x82\xAC", + ); + + return $chunks[ self::random_int( $state, 0, count( $chunks ) - 1 ) ]; + } + + /** + * Generate contains() probe words. + * + * @param array $mappings Generated token mappings. + * @param int $seed Seed used to generate the token set. + * @return string[] Probe words. + */ + private static function contains_probes( $mappings, $seed ) { + $state = $seed ^ 0x5A5A5A5A; + $probes = array( '', "\x00", "a\x00", "z\x00z" ); + + foreach ( array_keys( $mappings ) as $token ) { + $probes[] = $token; + $probes[] = self::swap_ascii_case( $token ); + $probes[] = $token . self::random_probe_byte( $state ); + $probes[] = self::mutate_one_byte( $token, $state ); + + if ( strlen( $token ) > 1 ) { + $probes[] = substr( $token, 0, -1 ); + } + + for ( $length = 1; $length < strlen( $token ); $length++ ) { + $probes[] = substr( $token, 0, $length ); + } + } + + for ( $i = 0; $i < 400; $i++ ) { + $probes[] = self::random_probe_word( $state, self::random_int( $state, 0, 32 ) ); + } + + return array_values( array_unique( $probes, SORT_STRING ) ); + } + + /** + * Generate documents for read_token() probes. + * + * @param array $mappings Generated token mappings. + * @param int $seed Seed used to generate the token set. + * @return string[] Generated documents. + */ + private static function generated_documents( $mappings, $seed ) { + $state = $seed ^ 0x13572468; + $tokens = array_keys( $mappings ); + usort( $tokens, array( __CLASS__, 'longest_first_then_alphabetical' ) ); + + $documents = array( + '', + 'prefix' . $tokens[0] . 'suffix', + self::swap_ascii_case( $tokens[0] ) . "\x00" . $tokens[ count( $tokens ) - 1 ], + ); + + for ( $i = 0; $i < 10; $i++ ) { + $document = ''; + for ( $j = 0; $j < 32; $j++ ) { + $token = $tokens[ self::random_int( $state, 0, count( $tokens ) - 1 ) ]; + switch ( self::random_int( $state, 0, 5 ) ) { + case 0: + $document .= $token; + break; + + case 1: + $document .= self::swap_ascii_case( $token ); + break; + + case 2: + $document .= substr( $token, 0, self::random_int( $state, 0, strlen( $token ) ) ); + break; + + case 3: + $document .= self::mutate_one_byte( $token, $state ); + break; + + case 4: + $document .= $token . self::random_probe_word( $state, self::random_int( $state, 1, 4 ) ); + break; + + default: + $document .= self::random_probe_word( $state, self::random_int( $state, 1, 8 ) ); + break; + } + } + + $documents[] = $document; + } + + return $documents; + } + + /** + * Reference implementation for contains(). + * + * @param array $mappings Generated token mappings. + * @param string $word Probe word. + * @param string $case_sensitivity Case sensitivity mode. + * @return bool Whether the generated set contains the probe word. + */ + private static function reference_contains( $mappings, $word, $case_sensitivity ) { + if ( 'case-sensitive' === $case_sensitivity ) { + return array_key_exists( $word, $mappings ); + } + + foreach ( array_keys( $mappings ) as $token ) { + if ( self::ascii_lowercase( $word ) === self::ascii_lowercase( $token ) ) { + return true; + } + } + + return false; + } + + /** + * Reference implementation for read_token(). + * + * @param array $mappings Generated token mappings. + * @param string $document Document to probe. + * @param int $offset Offset at which to probe. + * @param string $case_sensitivity Case sensitivity mode. + * @return array Expected response and matched token length. + */ + private static function reference_read_token( $mappings, $document, $offset, $case_sensitivity ) { + $tokens = array_keys( $mappings ); + $document_length = strlen( $document ); + $ignore_case = 'ascii-case-insensitive' === $case_sensitivity; + usort( $tokens, array( __CLASS__, 'longest_first_then_alphabetical' ) ); + + foreach ( $tokens as $token ) { + $token_length = strlen( $token ); + if ( $offset + $token_length > $document_length ) { + continue; + } + + $candidate = substr( $document, $offset, $token_length ); + $matches = $ignore_case + ? self::ascii_lowercase( $candidate ) === self::ascii_lowercase( $token ) + : $candidate === $token; + + if ( $matches ) { + return array( + 'value' => $mappings[ $token ], + 'length' => $token_length, + ); + } + } + + return array( + 'value' => null, + 'length' => null, + ); + } + + /** + * Sort longer strings first, then alphabetically. + * + * @param string $a First string to compare. + * @param string $b Second string to compare. + * @return int Sort order. + */ + private static function longest_first_then_alphabetical( $a, $b ) { + if ( $a === $b ) { + return 0; + } + + $length_a = strlen( $a ); + $length_b = strlen( $b ); + if ( $length_a !== $length_b ) { + return $length_b - $length_a; + } + + return strcmp( $a, $b ); + } + + /** + * Mutate one byte in a token. + * + * @param string $token Token to mutate. + * @param int $state Pseudo-random generator state. + * @return string Mutated token. + */ + private static function mutate_one_byte( $token, &$state ) { + if ( '' === $token ) { + return self::random_probe_byte( $state ); + } + + $offset = self::random_int( $state, 0, strlen( $token ) - 1 ); + $replacement = self::random_probe_byte( $state ); + while ( $replacement === $token[ $offset ] ) { + $replacement = self::random_probe_byte( $state ); + } + + return substr( $token, 0, $offset ) . $replacement . substr( $token, $offset + 1 ); + } + + /** + * Swap ASCII case in a byte string. + * + * @param string $text Text whose ASCII case should be swapped. + * @return string Text with ASCII case swapped. + */ + private static function swap_ascii_case( $text ) { + $output = ''; + $length = strlen( $text ); + + for ( $i = 0; $i < $length; $i++ ) { + $byte = ord( $text[ $i ] ); + if ( 0x41 <= $byte && $byte <= 0x5A ) { + $output .= chr( $byte + 0x20 ); + } elseif ( 0x61 <= $byte && $byte <= 0x7A ) { + $output .= chr( $byte - 0x20 ); + } else { + $output .= $text[ $i ]; + } + } + + return $output; + } + + /** + * Lowercase ASCII bytes only. + * + * @param string $text Text to lowercase. + * @return string Text with only ASCII uppercase bytes folded to lowercase. + */ + private static function ascii_lowercase( $text ) { + return strtr( $text, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz' ); + } + + /** + * Generate a random probe word. + * + * @param int $state Pseudo-random generator state. + * @param int $length Target byte length. + * @return string Generated word. + */ + private static function random_probe_word( &$state, $length ) { + $word = ''; + while ( strlen( $word ) < $length ) { + $word .= self::random_probe_byte( $state ); + } + + return substr( $word, 0, $length ); + } + + /** + * Generate one random probe byte. + * + * @param int $state Pseudo-random generator state. + * @return string Generated byte. + */ + private static function random_probe_byte( &$state ) { + $bytes = array( + "\x00", + 'a', + 'Z', + '4', + ';', + '_', + "\x80", + "\xFF", + "\xC3", + "\xA9", + "\xE2", + "\x82", + "\xAC", + ); + + return $bytes[ self::random_int( $state, 0, count( $bytes ) - 1 ) ]; + } + + /** + * Deterministic pseudo-random integer. + * + * @param int $state Pseudo-random generator state. + * @param int $min Minimum value. + * @param int $max Maximum value. + * @return int Generated integer. + */ + private static function random_int( &$state, $min, $max ) { + $state = ( ( 1103515245 * $state ) + 12345 ) % 2147483648; + + return $min + ( $state % ( $max - $min + 1 ) ); + } + + /** + * Build an actionable assertion failure message. + * + * @param array $mappings Generated token mappings. + * @param int $key_length Group key length for the generated map. + * @param int $seed Seed used to generate the token set. + * @param string $case_sensitivity Case sensitivity mode. + * @param string $operation Operation being tested. + * @param string $probe Probe word or document. + * @param int|null $offset Optional offset into the probe. + * @return string Assertion failure context. + */ + private static function failure_context( $mappings, $key_length, $seed, $case_sensitivity, $operation, $probe, $offset = null ) { + $context = "Seed {$seed}; key_length {$key_length}; {$operation}; case {$case_sensitivity}; probe " . bin2hex( $probe ); + if ( null !== $offset ) { + $context .= "; offset {$offset}"; + } + + return $context . '; token_set ' . base64_encode( serialize( $mappings ) ); + } +}