From 2482ea1ea7819c861b04c2beb75d39f0a4a2cf97 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 3 Jul 2026 14:55:59 +0200 Subject: [PATCH 1/5] Tests: Update html5lib test data from WPT --- tests/phpunit/data/html5lib-tests/AUTHORS.rst | 2 +- tests/phpunit/data/html5lib-tests/README.md | 18 +- .../tree-construction/README.md | 2 +- .../tree-construction/adoption02.dat | 17 + .../tree-construction/html5test-com.dat | 2 +- .../tree-construction/menuitem-element.dat | 3 +- .../processing-instructions.dat | 987 ++++++++++++++++++ .../tree-construction/scripted_adoption01.dat | 16 + .../tree-construction/scripted_ark.dat | 27 + .../tree-construction/scripted_webkit01.dat | 30 + .../tree-construction/tables01.dat | 31 +- .../tree-construction/tests1.dat | 34 +- .../tree-construction/tests10.dat | 46 +- .../tree-construction/tests17.dat | 10 +- .../tree-construction/tests18.dat | 44 +- .../tree-construction/tests2.dat | 2 +- .../tree-construction/tests7.dat | 10 +- .../tree-construction/tests9.dat | 52 +- .../tree-construction/tests_innerHTML_1.dat | 9 +- .../tree-construction/void-in-phrasing.dat | 151 +++ .../tree-construction/webkit01.dat | 32 +- .../tree-construction/webkit02.dat | 235 ++++- .../includes/build-visual-html-tree.php | 2 +- .../html-api/wpHtmlProcessorHtml5lib.php | 67 +- 24 files changed, 1672 insertions(+), 157 deletions(-) create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/processing-instructions.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/scripted_adoption01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/scripted_ark.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/scripted_webkit01.dat create mode 100644 tests/phpunit/data/html5lib-tests/tree-construction/void-in-phrasing.dat diff --git a/tests/phpunit/data/html5lib-tests/AUTHORS.rst b/tests/phpunit/data/html5lib-tests/AUTHORS.rst index 4a7de17ad456c..293d7fde18831 100644 --- a/tests/phpunit/data/html5lib-tests/AUTHORS.rst +++ b/tests/phpunit/data/html5lib-tests/AUTHORS.rst @@ -1,7 +1,7 @@ Credits ======= -The ``html5lib`` test data is maintained by: +The ``html5lib`` test data was originally maintained by: - James Graham - Geoffrey Sneddon diff --git a/tests/phpunit/data/html5lib-tests/README.md b/tests/phpunit/data/html5lib-tests/README.md index be775c8b497b5..f843703db8e0c 100644 --- a/tests/phpunit/data/html5lib-tests/README.md +++ b/tests/phpunit/data/html5lib-tests/README.md @@ -2,24 +2,28 @@ This directory contains a third-party test suite used for testing the WordPress HTML API. -`html5lib-tests` can be found on GitHub at [html5lib/html5lib-tests](https://github.com/html5lib/html5lib-tests). +The html5lib tree-construction tests are maintained by the Web Platform Tests project. +The raw `.dat` fixtures can be found on GitHub at +[web-platform-tests/wpt/html/syntax/parsing/resources](https://github.com/web-platform-tests/wpt/tree/master/html/syntax/parsing/resources). The necessary files have been copied to this directory: - `AUTHORS.rst` - `LICENSE` -- `README.md` - `tree-construction/README.md` - `tree-construction/*.dat` -The version of these files was taken from the git commit with -SHA [`a9f44960a9fedf265093d22b2aa3c7ca123727b9`](https://github.com/html5lib/html5lib-tests/commit/a9f44960a9fedf265093d22b2aa3c7ca123727b9). +The version of the WPT-copied files was taken from the git commit with +SHA [`c469a8a72a2ce58c04601255a45504ab9f5cc763`](https://github.com/web-platform-tests/wpt/commit/c469a8a72a2ce58c04601255a45504ab9f5cc763). + +The `AUTHORS.rst` and `LICENSE` files document the original `html5lib-tests` +attribution and license. ## Updating -If there have been changes to the html5lib-tests repository, this test suite can be updated. In +If there have been changes to the Web Platform Tests repository, this test suite can be updated. In order to update: 1. Check out the latest version of git repository mentioned above. -1. Copy the files listed above into this directory. -1. Update the SHA mentioned in this README file with the new html5lib-tests SHA. +1. Copy `README.md` and `*.dat` from `html/syntax/parsing/resources/` into `tree-construction/`. +1. Update the SHA mentioned in this README file with the new Web Platform Tests SHA. diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/README.md b/tests/phpunit/data/html5lib-tests/tree-construction/README.md index 4737a3a867e86..5005d4ec99367 100644 --- a/tests/phpunit/data/html5lib-tests/tree-construction/README.md +++ b/tests/phpunit/data/html5lib-tests/tree-construction/README.md @@ -31,7 +31,7 @@ errors. Then there \*may\* be a line that says "\#document-fragment", which must be followed by a newline (LF), followed by a string of characters that -indicates the context element, followed by a newline (LF). If the string +indicates the context element, followed by a newline (LF). If the string of characters starts with "svg ", the context element is in the SVG namespace and the substring after "svg " is the local name. If the string of characters starts with "math ", the context element is in the diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/adoption02.dat b/tests/phpunit/data/html5lib-tests/tree-construction/adoption02.dat index e54d8033bac71..acd388547a1ec 100644 --- a/tests/phpunit/data/html5lib-tests/tree-construction/adoption02.dat +++ b/tests/phpunit/data/html5lib-tests/tree-construction/adoption02.dat @@ -37,3 +37,20 @@ |
| | + +#data +
+#errors +(1,6): expected-doctype-but-got-start-tag +(1,22): unexpected-start-tag-implies-table-voodoo +(1,30): end-tag-too-early-named +(1,36): unexpected-start-tag-implies-end-tag +(1,36): expected-closing-tag-but-got-eof +#document +| +| +| +| +| +| +| diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/html5test-com.dat b/tests/phpunit/data/html5lib-tests/tree-construction/html5test-com.dat index 48d0bf95b5572..203c77ef7a693 100644 --- a/tests/phpunit/data/html5lib-tests/tree-construction/html5test-com.dat +++ b/tests/phpunit/data/html5lib-tests/tree-construction/html5test-com.dat @@ -134,7 +134,7 @@ #new-errors (1:2) unexpected-question-mark-instead-of-tag-name #document -| +| | | | diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/menuitem-element.dat b/tests/phpunit/data/html5lib-tests/tree-construction/menuitem-element.dat index fb13c3c33b0ab..f7c8e2c3ca74e 100644 --- a/tests/phpunit/data/html5lib-tests/tree-construction/menuitem-element.dat +++ b/tests/phpunit/data/html5lib-tests/tree-construction/menuitem-element.dat @@ -161,13 +161,14 @@ #data #errors -33: Stray start tag “menuitem”. +1:34: ERROR: End tag 'select' isn't allowed here. Currently open tags: html, body, select, menuitem. #document | | | | |
+#document +| +| +| +| +| + +#data +
+#document +| +| +| +| +| +| +| + +#data + +#document +| +| +|

TEXT +#errors +#script-on +#document +| +| +| +|

+| +| id="B" +|

X +#errors +#script-on +#document +| +| +| +|

+| +| size="4" +| +| size="4" +| +| size="5" +| 3 +#errors +#script-on +#document +| +| +| +| "1" +| 4 +#errors +#script-on +#document +| +| +| +| "1" +| abc #errors -(1,30): unexpected-start-tag-implies-table-voodoo -(1,58): eof-in-select +1:23: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, table. +1:56: ERROR: Character tokens aren't legal here +1:57: ERROR: Character tokens aren't legal here +1:58: ERROR: Character tokens aren't legal here +1:59: ERROR: Premature end of file. Currently open tags: html, body, table, select. #document | | @@ -462,8 +473,11 @@ #data

BC
#data #errors -(1,8): expected-doctype-but-got-start-tag -(1,16): unexpected-input-in-select +1:1: ERROR: Expected a doctype token +1:17: ERROR: Premature end of file. Currently open tags: html, body, select. #document | | | | +| diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/tests9.dat b/tests/phpunit/data/html5lib-tests/tree-construction/tests9.dat index f8d04b23bc0be..1456324ffc21f 100644 --- a/tests/phpunit/data/html5lib-tests/tree-construction/tests9.dat +++ b/tests/phpunit/data/html5lib-tests/tree-construction/tests9.dat @@ -48,20 +48,17 @@ #data #errors -(1,35) unexpected-start-tag-in-select -(1,42) unexpected-end-tag-in-select #document | | | | | #errors -(1,43) unexpected-start-tag-in-select -(1,50) unexpected-end-tag-in-select #document | | @@ -69,6 +66,7 @@ | | foobar

baz

quux #errors -(1,50) unexpected-start-tag-in-select -(1,54) unexpected-start-tag-in-select -(1,62) unexpected-end-tag-in-select -(1,66) unexpected-start-tag-in-select -(1,74) unexpected-end-tag-in-select -(1,77) unexpected-start-tag-in-select -(1,88) unexpected-table-element-end-tag-in-select-in-table +1:75: ERROR: Start tag 'p' isn't allowed here. Currently open tags: html, body, table, tbody, tr, td, select, math. +1:81: ERROR: End tag 'table' isn't allowed here. Currently open tags: html, body, table, tbody, tr, td, select. #document | | @@ -318,28 +311,45 @@ | | | foobar

baz

quux #errors -(1,36) unexpected-start-tag-implies-table-voodoo -(1,42) unexpected-start-tag-in-select -(1,46) unexpected-start-tag-in-select -(1,54) unexpected-end-tag-in-select -(1,58) unexpected-start-tag-in-select -(1,66) unexpected-end-tag-in-select -(1,69) unexpected-start-tag-in-select -(1,80) unexpected-table-element-end-tag-in-select-in-table +1:29: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, table. +1:37: ERROR: Start tag 'math' isn't allowed here. Currently open tags: html, body, table, select. +1:47: ERROR: Character tokens aren't legal here +1:48: ERROR: Character tokens aren't legal here +1:49: ERROR: Character tokens aren't legal here +1:59: ERROR: Character tokens aren't legal here +1:60: ERROR: Character tokens aren't legal here +1:61: ERROR: Character tokens aren't legal here +1:67: ERROR: Start tag 'p' isn't allowed here. Currently open tags: html, body, table, select, math. +1:67: ERROR: Start tag 'p' isn't allowed here. Currently open tags: html, body, table, select. +1:70: ERROR: Character tokens aren't legal here +1:71: ERROR: Character tokens aren't legal here +1:72: ERROR: Character tokens aren't legal here #document | | | | |