From d130c92041cb79146d4eeb0097141b553a786595 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 5 Feb 2026 12:54:33 +0100 Subject: [PATCH 1/4] Fix newlines disappearing inside PRE,TEXTAREA elements Add serialize + normalize tests Add more PRE tests Fix newlines disappearing inside PRE,TEXTAREA elements Handline PRE,LISTING lints Simpler fix Remove overly-specific tests Add LISTING tests Add explanatory comment lints Remove accidental import --- .../html-api/class-wp-html-processor.php | 26 +++++++++ .../html-api/wpHtmlProcessor-serialize.php | 57 +++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 55f955f2c1a9a..d5b073f65cddf 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1412,6 +1412,32 @@ public function serialize_token(): string { $html .= '>'; + /* + * The HTML parser strips a leading newline immediately after the start + * tag of TEXTAREA, PRE, and LISTING elements. When serializing, prepend + * a leading newline to ensure the semantic HTML content is preserved. + * + * For example, `
\n\nX
` must not become `
\nX
` because its content + * has changed. However, `
X
` and `
\nX
` are _equivalent_. + * + * > A start tag whose tag name is "textarea" + * > … + * > If the next token is a U+000A LINE FEED (LF) character token, then ignore + * > that token and move on to the next one. (Newlines at the start of textarea + * > elements are ignored as an authoring convenience.) + * + * > A start tag whose tag name is one of: "pre", "listing" + * > … + * > If the next token is a U+000A LINE FEED (LF) character token, then ignore + * > that token and move on to the next one. (Newlines at the start of pre blocks + * > are ignored as an authoring convenience.) + * + * @see https://html.spec.whatwg.org/multipage/parsing.html + */ + if ( $tag_name === 'TEXTAREA' || $tag_name === 'PRE' || $tag_name === 'LISTING' ) { + $html .= "\n"; + } + // Flush out self-contained elements. if ( $in_html && in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) { $text = $this->get_modifiable_text(); diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index e2b5a79c2de2f..cb9f4abce9535 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -321,4 +321,61 @@ public static function data_provider_serialize_doctype() { 'Double quotes in system ID' => array( '', '' ), ); } + + /** + * @ticket TBD + * + * @dataProvider data_provider_normalize_special_leading_newline_cases + */ + public function test_normalize_special_leading_newline_handling( string $input, string $expected ) { + $normalized = WP_HTML_Processor::normalize( $input ); + $this->assertEqualHTML( $expected, $normalized ); + $normalized_twice = WP_HTML_Processor::normalize( $normalized ); + $this->assertEqualHTML( $expected, $normalized_twice ); + } + + public static function data_provider_normalize_special_leading_newline_cases() { + return array( + 'Leading newline in PRE' => array( + "
\nline 1\nline 2
", + "
line 1\nline 2
", + ), + 'Double leading newline in PRE' => array( + "
\n\nline 2\nline 3
", + "
\n\nline 2\nline 3
", + ), + 'Multiple text nodes inside PRE' => array( + "
\nline 1 still line 1
", + '
line 1 still line 1
', + ), + 'Multiple text nodes inside PRE with leading newlines' => array( + "
\n\nline 2 still line 2
", + "
\n\nline 2 still line 2
", + ), + 'Leading newline in LISTING' => array( + "\nline 1\nline 2", + "line 1\nline 2", + ), + 'Double leading newline in LISTING' => array( + "\n\nline 2\nline 3", + "\n\nline 2\nline 3", + ), + 'Multiple text nodes inside LISTING' => array( + "\nline 1 still line 1", + 'line 1 still line 1', + ), + 'Multiple text nodes inside LISTING with leading newlines' => array( + "\n\nline 2 still line 2", + "\n\nline 2 still line 2", + ), + 'Leading newline in TEXTAREA' => array( + "", + "", + ), + 'Double leading newline in TEXTAREA' => array( + "", + "", + ), + ); + } } From 21b48eac066a43a79c868c3d319308882b96158c Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 5 Feb 2026 17:57:12 +0100 Subject: [PATCH 2/4] Ticket number, data provider --- tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index cb9f4abce9535..cef916436ead6 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -323,7 +323,7 @@ public static function data_provider_serialize_doctype() { } /** - * @ticket TBD + * @ticket 64607 * * @dataProvider data_provider_normalize_special_leading_newline_cases */ @@ -334,6 +334,11 @@ public function test_normalize_special_leading_newline_handling( string $input, $this->assertEqualHTML( $expected, $normalized_twice ); } + /** + * Data provider. + * + * @return array[] + */ public static function data_provider_normalize_special_leading_newline_cases() { return array( 'Leading newline in PRE' => array( From 03fb7f01c710c873f449497d5ec09f09d90e539a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 5 Feb 2026 19:50:25 +0100 Subject: [PATCH 3/4] YODA --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index d5b073f65cddf..502cc2ed3ee69 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1434,7 +1434,7 @@ public function serialize_token(): string { * * @see https://html.spec.whatwg.org/multipage/parsing.html */ - if ( $tag_name === 'TEXTAREA' || $tag_name === 'PRE' || $tag_name === 'LISTING' ) { + if ( 'TEXTAREA' === $tag_name || 'PRE' === $tag_name || 'LISTING' === $tag_name ) { $html .= "\n"; } From 0acab11958604124e56543931ffbfc8849e87304 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 6 Feb 2026 13:53:36 +0100 Subject: [PATCH 4/4] Apply suggestion from @mukeshpanchal27 Co-authored-by: Mukesh Panchal --- tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index cef916436ead6..175bb3845d554 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -323,9 +323,15 @@ public static function data_provider_serialize_doctype() { } /** + * Ensures that leading newlines in PRE, LISTING, and TEXTAREA elements are preserved upon normalization, + * and that normalization is idempotent in these cases. + * * @ticket 64607 * * @dataProvider data_provider_normalize_special_leading_newline_cases + * + * @param string $input HTML input containing leading newlines in PRE, LISTING, or TEXTAREA elements. + * @param string $expected Expected output after normalization, which should preserve leading newlines. */ public function test_normalize_special_leading_newline_handling( string $input, string $expected ) { $normalized = WP_HTML_Processor::normalize( $input );