Skip to content
29 changes: 24 additions & 5 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -934,12 +934,29 @@ private function step_in_body() {
$this->run_adoption_agency_algorithm();
return true;


/*
* > An end tag whose tag name is "br"
* > Parse error. Drop the attributes from the token, and act as described in the next
* > entry; i.e. act as if this was a "br" start tag token with no attributes, rather
* > than the end tag token that it actually is.
*/
case '-BR':
$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( "Closing BR tags require unimplemented special handling." );

/*
* > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr"
*/
case '+AREA':
case '+BR':
case '+EMBED':
case '+IMG':
case '+KEYGEN':
case '+WBR':
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
$this->state->frameset_ok = false;
return true;
}

Expand All @@ -966,13 +983,11 @@ private function step_in_body() {
case 'BASEFONT':
case 'BGSOUND':
case 'BODY':
case 'BR':
case 'CAPTION':
case 'COL':
case 'COLGROUP':
case 'DD':
case 'DT':
case 'EMBED':
case 'FORM':
case 'FRAME':
case 'FRAMESET':
Expand All @@ -981,7 +996,6 @@ private function step_in_body() {
case 'HTML':
case 'IFRAME':
case 'INPUT':
case 'KEYGEN':
case 'LI':
case 'LINK':
case 'LISTING':
Expand Down Expand Up @@ -1021,7 +1035,6 @@ private function step_in_body() {
case 'TR':
case 'TRACK':
case 'UL':
case 'WBR':
case 'XMP':
$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." );
Expand Down Expand Up @@ -1685,7 +1698,13 @@ public static function is_void( $tag_name ) {
'META' === $tag_name ||
'SOURCE' === $tag_name ||
'TRACK' === $tag_name ||
'WBR' === $tag_name
'WBR' === $tag_name ||

// Obsolete
//
// This does not appear in https://html.spec.whatwg.org/#void-elements
// but it was a void tag and browsers treat it as such.
'KEYGEN' === $tag_name
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

);
}

Expand Down
65 changes: 60 additions & 5 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,66 @@ public function test_fails_to_reconstruct_formatting_elements() {
$this->assertFalse( $p->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' );
}

/**
* Ensure non-nesting tags do not nest
*
* @ticket 60283
*
* @covers WP_HTML_Processor::step_in_body
* @covers WP_HTML_Processor::is_void
*
* @dataProvider data_void_tags
*/
public function test_cannot_nest_void_tags( $tag ) {
$p = WP_HTML_Processor::create_fragment( "{$tag}<div>" );

// We should have this structure:
// <html>
// <body>
// <$TAG />
// <$TAG />
// </body>
// </html>
//
// The breadcrumbs should be:
// HTML > BODY > $TAG

$result = $p->next_tag();

if ( WP_HTML_Processor::ERROR_UNSUPPORTED === $p->get_last_error() ) {
$this->markTestSkipped( "{$tag} is unsupported." );
}

$this->assertTrue( $result, "Could not find first {$tag}." );
$this->assertCount( 3, $p->get_breadcrumbs(), "{$tag} was not nested correctly." );
$this->assertTrue( $p->next_tag( 'DIV' ), "Could not find <div> tag." );
$this->assertCount( 3, $p->get_breadcrumbs(), "Following <div> was not nested correctly." );
}

/**
* Data provider.
*
* @return array[]
*/
public function data_void_tags() {
return array(
'AREA' => array( '<area>' ),
'BASE' => array( '<base>' ),
'BR' => array( '<br>' ),
'COL' => array( '<col>' ),
'EMBED' => array( '<embed>' ),
'HR' => array( '<hr>' ),
'IMG' => array( '<img>' ),
'INPUT' => array( '<input>' ),
'KEYGEN' => array( '<keygen>' ),
'LINK' => array( '<link>' ),
'META' => array( '<meta>' ),
'SOURCE' => array( '<source>' ),
'TRACK' => array( '<track>' ),
'WBR' => array( '<wbr>' ),
);
}

/**
* Ensures that special handling of unsupported tags is cleaned up
* as handling is implemented. Otherwise there's risk of leaving special
Expand Down Expand Up @@ -159,16 +219,13 @@ public function test_step_in_body_fails_on_unsupported_tags( $tag_name ) {
public function data_unsupported_special_in_body_tags() {
return array(
'APPLET' => array( 'APPLET' ),
'AREA' => array( 'AREA' ),
'BASE' => array( 'BASE' ),
'BASEFONT' => array( 'BASEFONT' ),
'BGSOUND' => array( 'BGSOUND' ),
'BODY' => array( 'BODY' ),
'BR' => array( 'BR' ),
'CAPTION' => array( 'CAPTION' ),
'COL' => array( 'COL' ),
'COLGROUP' => array( 'COLGROUP' ),
'EMBED' => array( 'EMBED' ),
'FORM' => array( 'FORM' ),
'FRAME' => array( 'FRAME' ),
'FRAMESET' => array( 'FRAMESET' ),
Expand All @@ -177,7 +234,6 @@ public function data_unsupported_special_in_body_tags() {
'HTML' => array( 'HTML' ),
'IFRAME' => array( 'IFRAME' ),
'INPUT' => array( 'INPUT' ),
'KEYGEN' => array( 'KEYGEN' ),
'LINK' => array( 'LINK' ),
'LISTING' => array( 'LISTING' ),
'MARQUEE' => array( 'MARQUEE' ),
Expand Down Expand Up @@ -214,7 +270,6 @@ public function data_unsupported_special_in_body_tags() {
'TITLE' => array( 'TITLE' ),
'TR' => array( 'TR' ),
'TRACK' => array( 'TRACK' ),
'WBR' => array( 'WBR' ),
'XMP' => array( 'XMP' ),
);
}
Expand Down
52 changes: 52 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php
Original file line number Diff line number Diff line change
Expand Up @@ -376,4 +376,56 @@ public function test_in_body_any_other_end_tag_with_unclosed_non_special_element
$this->assertSame( 'DIV', $p->get_tag(), "Expected to find DIV element, but found {$p->get_tag()} instead." );
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'DIV' ), $p->get_breadcrumbs(), 'Failed to produce expected DOM nesting: SPAN should be closed and DIV should be its sibling.' );
}

/**
* Verifies that when "in body" and encountering a BR end tag `</br …>`:
*
* > An end tag whose tag name is "br"
* > Parse error. Drop the attributes from the token, and act as described in the next entry;
* > i.e. act as if this was a "br" start tag token with no attributes, rather than the end
* > tag token that it actually is.
*
* When this handling is implemented, this test should be removed and
* `test_br_end_tag_special_behavior` should not be marked incomplete.
*
* @covers WP_HTML_Processor::step_in_body
*
* @ticket 60283
*
* @since 6.4.0
*/
public function test_br_end_tag_unsupported() {
$p = WP_HTML_Processor::create_fragment( '</br>' );

$this->assertFalse( $p->next_tag(), 'Found a BR tag that should not be handled.' );
$this->assertSame( WP_HTML_Processor::ERROR_UNSUPPORTED, $p->get_last_error() );
}

/**
* Verifies that when "in body" and encountering a BR end tag `</br …>`:
*
* > An end tag whose tag name is "br"
* > Parse error. Drop the attributes from the token, and act as described in the next entry;
* > i.e. act as if this was a "br" start tag token with no attributes, rather than the end
* > tag token that it actually is.
*
* @covers WP_HTML_Processor::step_in_body
*
* @ticket 60283
*
* @since 6.4.0
*/
public function test_br_end_tag_special_behavior() {
$this->markTestIncomplete( 'BR end tag special handling is unimplemented' );

$p = WP_HTML_Processor::create_fragment( '</br attribute="must be removed">' );

$this->assertTrue( $p->next_tag(), 'No BR tag found.' );
$this->assertFalse( $p->is_tag_closer(), '</br> should not be treated as an end tag.' );
$this->assertNull( $p->get_attribute_names_with_prefix( '' ), 'BR end tag had attributes.' );
$this->assertFalse( $p->set_attribute( 'new-attribute', 'added' ), 'BR end tag becomes an opener' );
$this->assertCount( 1, $p->get_attribute_names_with_prefix( '' ), 'Tag should have 1 attribute.' );
$this->assertSame( 'added', $p->get_attribute( 'new-attribute' ), 'Tag did not set attribute value correctly.' );
$this->assertSame( '<br new-attribute="added">', $p->get_updated_html(), 'Tag HTML was not updated correctly.' );
}
}