Skip to content
Closed
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
80c5983
Add test setup for running Core tests.
dmsnell Jun 16, 2024
0e1a917
Add quirks mode parameter to create_fragment
sirreal Jul 18, 2024
5267774
Add standards mode tests for class name case sensitivity
sirreal Jul 19, 2024
ed8f34f
Implement has_class
sirreal Jul 19, 2024
f2fa469
Add test with null bytes in class attribute
sirreal Jul 19, 2024
c103410
Remove lower-casing behavior of class_list
sirreal Jul 19, 2024
1435cf9
Update to use document_mode
sirreal Aug 12, 2024
4f18368
Replace null bytes in class_list class names
sirreal Aug 13, 2024
2540406
Fix tests
sirreal Aug 13, 2024
9be0a32
Handle all the comparison stuff with a protected comparable_class_nam…
sirreal Aug 13, 2024
cd43ef9
Lint
sirreal Aug 13, 2024
00e8aff
Improve phpdoc explanations
sirreal Aug 13, 2024
eb07339
Remove comment about styling (HTML structure is affected)
sirreal Aug 13, 2024
a9e924d
Revert "Replace null bytes in class_list class names"
sirreal Aug 13, 2024
1dc1752
Remove null-byte has_class test
sirreal Aug 13, 2024
8dc79bc
Merge branch 'trunk' into html-api/css-class-name-method-audit
sirreal Aug 23, 2024
be6091b
Revert ::create_fragment changes
sirreal Aug 23, 2024
07726b1
Adjust tests to use full parser in quirks/no-quirks
sirreal Aug 23, 2024
ec05abb
Move quirks mode to tag processor
sirreal Aug 23, 2024
176604c
Make comparable_class_name internal
sirreal Aug 23, 2024
bb6d772
Add information about how quirks mode changes behavior
sirreal Aug 26, 2024
15d479e
Remove comparable_class_name function call from has_class loop
sirreal Aug 26, 2024
8dbf6ab
Remove comparable_class_name method entirely
sirreal Aug 26, 2024
926e7d6
Add another test for quirks-mode add_class
sirreal Aug 26, 2024
f1f4224
Remove since tag from class_list
sirreal Aug 26, 2024
559315d
Lowerclass yielded class_list class names in quirks mode
sirreal Sep 2, 2024
df91415
Fix modifying class case when removing another class
sirreal Sep 2, 2024
21534f9
Merge branch 'trunk' into html-api/css-class-name-method-audit
sirreal Sep 3, 2024
0eaa9af
Fix equals sign alignment lint
sirreal Sep 3, 2024
3eaecd1
Merge branch 'trunk' into html-api/css-class-name-method-audit
dmsnell Sep 3, 2024
ab02ca4
Reintroduce an explanatory comment on `compat_mode` property
dmsnell Sep 3, 2024
cad5d62
Revert: Have `has_class()` return `null` for unsupported tokens.
dmsnell Sep 3, 2024
62cbb1d
Truncate explanatory comment on quirks mode constants and cite MDN.
dmsnell Sep 3, 2024
8217303
Modify tests
dmsnell Sep 3, 2024
b417e11
Preserve given casing of added and removed class names.
dmsnell Sep 4, 2024
618eec5
Merge branch 'trunk' into html-api/css-class-name-method-audit
dmsnell Sep 4, 2024
4c8db57
Merge branch 'test-everything' into html-api/css-class-name-method-audit
dmsnell Sep 4, 2024
48e00c9
Fix issue with classname updates, and change test to assert it.
dmsnell Sep 4, 2024
b099026
Remove test helpers accidentally added (can't force-push)
dmsnell Sep 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions src/wp-includes/html-api/class-wp-html-processor-state.php
Original file line number Diff line number Diff line change
Expand Up @@ -393,9 +393,6 @@ class WP_HTML_Processor_State {
* - When not in `QUIRKS_MODE`, a TABLE start tag implicitly closes an open P tag
* if one is in scope and open, otherwise the TABLE becomes a child of the P.
*
* `QUIRKS_MODE` impacts many styling-related aspects of an HTML document, but
* none of the other changes modifies how the HTML is parsed or selected.
Comment on lines -396 to -397
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The lines immediately above this about P > TABLE handling in quirks/no-quirks modes seem contradictory. That's directly related to tree-construction.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've updated the wording. It indeed was self-contradictory

*
* @see self::QUIRKS_MODE
* @see self::NO_QUIRKS_MODE
*
Expand Down
41 changes: 36 additions & 5 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,14 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
* @since 6.4.0
* @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances.
*
* @param string $html Input HTML fragment to process.
* @param string $context Context element for the fragment, must be default of `<body>`.
* @param string $encoding Text encoding of the document; must be default of 'UTF-8'.
* @param string $html Input HTML fragment to process.
* @param string $context Context element for the fragment, must be default of `<body>`.
* @param string $encoding Optional. Text encoding of the document; must be default of 'UTF-8'.
* @param string $document_mode Optional. Set document compatibility mode (quirks). Should be
* 'quirks-mode' or 'no-quirks-mode'. Default: 'no-quirks-mode'.
* @return static|null The created processor if successful, otherwise null.
*/
public static function create_fragment( $html, $context = '<body>', $encoding = 'UTF-8' ) {
public static function create_fragment( $html, $context = '<body>', $encoding = 'UTF-8', $document_mode = WP_HTML_Processor_State::NO_QUIRKS_MODE ) {
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A fragment parser would typically inherit the context element's document's compatibility mode.

I suspect we'll need to change how the context element is passed, but I don't think it will include information about it's document mode and this argument will remain helpful.

See #7141.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you think this is worth supporting right now? do we have enough warrant to include it? in what cases would we want to create a document fragment in quirks mode, and in those cases, how would we know?

I can definitely see value in having this, but also I wonder if this inclusion will help people understand better what they need to be doing or add confusion.

what do you think the consequences would be of simply not supporting a quirks-mode fragment parser? or at least of not having it in the primary function signature for creating the class?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was helpful and important for testing while working on these changes.

In what cases would we want to create a document fragment in quirks mode, and in those cases, how would we know?

The fragment should use the same mode as the document for the context element. This should become clear when we work on set_{inner,outer}_html where we'll be creating fragment parsers that use the parent parser's document mode.

Another option would be to adjust the full parser to handle doctype declarations and set the document_mode correctly according to the full HTML document. Then we could certainly omit these changes.

I'm also reluctant to eagerly add more to this method signature.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what do you think the consequences would be of simply not supporting a quirks-mode fragment parser?

If the full parser supports quirks mode and we have set_{inner,outer}_html methods, I think the fragment parser must support quirks mode.

or at least of not having it in the primary function signature for creating the class?

The mode should be based on the context element's document's mode. There's no way to provide that information to the fragment parser right now. The context element is currently passed to the fragment parser as an HTML-like string (only <body> is allowed right now) which seems insufficient to pass all the information the fragment parser requires.

There are other ways to handle this, for example an instance method could create a fragment from a node and set quirks mode appropriately as well as handle things like reading attributes, namespace, etc. This is all best discussed in #7141.

I am going to explore a change to handle quirks mode in the full parser based on the doctype declaration, that would be sufficient and we could remove this change.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could see some value in having a method like ->set_quirks_mode()

I still suspect that it'll be basically universal that we don't work in the full parser mode within WordPress. almost no HTML-processing code has access to the full document, and so that's what I meant when assuming no-quirks mode. We just don't know even when doing inner_html operations if the parent document is in quirks mode or not. "We can only assume UTF-8, no-quirks, <body> context."

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There may be value in allowing the compat mode to be changed on a processor. I'd like to leave those changes for consideration in their own PR. There was no way to change the compat mode before and I don't think we need to address that in this PR.

if ( '<body>' !== $context || 'UTF-8' !== $encoding ) {
return null;
}
Expand All @@ -303,6 +305,10 @@ public static function create_fragment( $html, $context = '<body>', $encoding =
$processor->state->encoding = $encoding;
$processor->state->encoding_confidence = 'certain';

if ( WP_HTML_Processor_State::QUIRKS_MODE === $document_mode ) {
$processor->state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE;
}

// @todo Create "fake" bookmarks for non-existent but implied nodes.
$processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 );
$processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 );
Expand Down Expand Up @@ -4636,6 +4642,30 @@ public function remove_class( $class_name ): bool {
return $this->is_virtual() ? false : parent::remove_class( $class_name );
}

/**
* Transform a class name string to a comparable form.
*
* When the document is in quirks mode, class names are transformed to
* ASCII lowercase for comparison. In no quirks mode, class names are
* compared as they are (case-sensitive).
*
* > When matching against a document which is in quirks mode, class names must be matched
* > ASCII case-insensitively; class selectors are otherwise case-sensitive, only matching
* > class names they are identical to.
*
* @see https://www.w3.org/TR/selectors-4/#class-html
*
* @since 6.7.0
*
* @param string $class_name The class name to transform.
* @return string The transformed class name.
*/
protected function comparable_class_name( string $class_name ): string {
return WP_HTML_Processor_State::QUIRKS_MODE === $this->state->document_mode
? strtolower( $class_name )
: $class_name;
}

/**
* Returns if a matched tag contains the given ASCII case-insensitive class name.
*
Expand All @@ -4645,7 +4675,7 @@ public function remove_class( $class_name ): bool {
* @return bool|null Whether the matched tag contains the given class name, or null if not matched.
*/
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sirreal I've reverted this change so we can consider it separately. I know it will be important during active format reconstruction, but I think that null is a kind of partially-implemented escape hatch that communicates that this isn't supported rather than indicating that a class definitively doesn't exist on the tag.

public function has_class( $wanted_class ): ?bool {
return $this->is_virtual() ? null : parent::has_class( $wanted_class );
return $this->is_virtual() ? false : parent::has_class( $wanted_class );
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This appears to be a small bug and isn't necessary in this PR. I'd be happy to make another PR if desired. is_virtual would suggest we're stopped on a tag, but the tag can't have any attributes. This should return false.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm. I would consider this more of a bug in the type signature. with active format reconstruction it is possible for virtual nodes to have a class, but the null was meant to convey exactly what you inferred: "This tag can have no classes" rather than "this has has no classes."

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug introduced in #6753

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That seems different from what the tag processor does, where "not on a tag" returns null, "on a tag" always returns false with no special handling for tags with no classes.

/**
* Returns if a matched tag contains the given ASCII case-insensitive class name.
*
* @since 6.4.0
*
* @param string $wanted_class Look for this CSS class name, ASCII case-insensitive.
* @return bool|null Whether the matched tag contains the given class name, or null if not matched.
*/
public function has_class( $wanted_class ): ?bool {
if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
return null;
}
$wanted_class = strtolower( $wanted_class );
foreach ( $this->class_list() as $class_name ) {
if ( $class_name === $wanted_class ) {
return true;
}
}
return false;
}

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug introduced in #6753

I think this was a confusion, there is no bug with types and #6753 did not introduce a bug here that I'm aware of. This is simply a question of what null and false mean here as return values.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for following up on these comments. null has been used to mean "cannot potentially contain that attribute or any others." maybe what we need is a comment update since we created virtual nodes.

at some point, if we allow reading these this might change, as nodes created during active format reconstruction contain the attributes of their original tags. for now, though, I prefer having a distinction between "this tag does not have this class" and "it's not possible to answer if this tag has this class"

}

/**
Expand All @@ -4663,6 +4693,7 @@ public function has_class( $wanted_class ): ?bool {
* // Outputs: "free <egg> lang-en "
*
* @since 6.6.0 Subclassed for the HTML Processor.
* @since 6.7.0 Null bytes are replaced with the replacement character (U+FFFD).
*/
public function class_list() {
return $this->is_virtual() ? null : parent::class_list();
Expand Down
34 changes: 23 additions & 11 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1126,6 +1126,8 @@ public function paused_at_incomplete_token(): bool {
* // Outputs: "free <egg> lang-en "
*
* @since 6.4.0
* @since 6.7.0 Class names are no longer force lower-cased.
Comment thread
sirreal marked this conversation as resolved.
Outdated
* @since 6.7.0 Null bytes are replaced with the replacement character (U+FFFD).
*/
public function class_list() {
if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
Expand Down Expand Up @@ -1155,12 +1157,7 @@ public function class_list() {
return;
}

/*
* CSS class names are case-insensitive in the ASCII range.
*
* @see https://www.w3.org/TR/CSS2/syndata.html#x1
*/
$name = strtolower( substr( $class, $at, $length ) );
$name = str_replace( "\x00", "\u{FFFD}", substr( $class, $at, $length ) );
Comment thread
sirreal marked this conversation as resolved.
$at += $length;

/*
Expand Down Expand Up @@ -1191,10 +1188,10 @@ public function has_class( $wanted_class ): ?bool {
return null;
}

$wanted_class = strtolower( $wanted_class );
$wanted_class = $this->comparable_class_name( $wanted_class );

foreach ( $this->class_list() as $class_name ) {
if ( $class_name === $wanted_class ) {
if ( $this->comparable_class_name( $class_name ) === $wanted_class ) {
Comment thread
sirreal marked this conversation as resolved.
Outdated
return true;
}
}
Expand Down Expand Up @@ -2296,7 +2293,7 @@ private function class_name_updates_to_attributes_updates(): void {
break;
}

$name = substr( $existing_class, $at, $name_length );
$name = $this->comparable_class_name( substr( $existing_class, $at, $name_length ) );
$at += $name_length;

// If this class is marked for removal, start processing the next one.
Expand Down Expand Up @@ -3831,7 +3828,7 @@ public function add_class( $class_name ): bool {
return false;
}

$this->classname_updates[ $class_name ] = self::ADD_CLASS;
$this->classname_updates[ $this->comparable_class_name( $class_name ) ] = self::ADD_CLASS;

return true;
}
Expand All @@ -3853,12 +3850,27 @@ public function remove_class( $class_name ): bool {
}

if ( null !== $this->tag_name_starts_at ) {
$this->classname_updates[ $class_name ] = self::REMOVE_CLASS;
$this->classname_updates[ $this->comparable_class_name( $class_name ) ] = self::REMOVE_CLASS;
}

return true;
}

/**
* Transform a class name string to a comparable form.
*
* This method may be subclassed to customize class names for comparison. For example, this
* allows for subclasses to support case-insensitive class name comparison.
*
* @since 6.7.0
*
* @param string $class_name The class name to transform.
* @return string The transformed class name.
*/
protected function comparable_class_name( string $class_name ): string {
Comment thread
sirreal marked this conversation as resolved.
Outdated
return $class_name;
}

/**
* Returns the string representation of the HTML Tag Processor.
*
Expand Down
137 changes: 137 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -531,4 +531,141 @@ public function test_foreign_content_script_self_closing() {
$processor = WP_HTML_Processor::create_fragment( '<svg><script />' );
$this->assertTrue( $processor->next_tag( 'script' ) );
}

/**
* Ensures that the tag processor is case sensitive when removing CSS classes in no-quirks mode.
*
* @ticket 61531
*
* @covers ::remove_class
*/
public function test_remove_class_no_quirks_mode() {
$processor = WP_HTML_Processor::create_fragment( '<span class="UPPER"></span>' );
$processor->next_tag();
$processor->remove_class( 'upper' );
$this->assertSame( '<span class="UPPER"></span>', $processor->get_updated_html() );

$processor->remove_class( 'UPPER' );
$this->assertSame( '<span ></span>', $processor->get_updated_html() );
}

/**
* Ensures that the tag processor is case sensitive when adding CSS classes in no-quirks mode.
*
* @ticket 61531
*
* @covers ::add_class
*/
public function test_add_class_no_quirks_mode() {
$processor = WP_HTML_Processor::create_fragment( '<span class="UPPER"></span>' );
$processor->next_tag();
$processor->add_class( 'UPPER' );
$this->assertSame( '<span class="UPPER"></span>', $processor->get_updated_html() );

$processor->add_class( 'upper' );
$this->assertSame( '<span class="UPPER upper"></span>', $processor->get_updated_html() );
}

/**
* Ensures that the tag processor is case sensitive when checking has CSS classes in no-quirks mode.
*
* @ticket 61531
*
* @covers ::has_class
*/
public function test_has_class_no_quirks_mode() {
$processor = WP_HTML_Processor::create_fragment( '<span class="UPPER"></span>' );
$processor->next_tag();
$this->assertFalse( $processor->has_class( 'upper' ) );
$this->assertTrue( $processor->has_class( 'UPPER' ) );
}

/**
* Ensures that the tag processor lists unique CSS class names in no-quirks mode.
*
* @ticket 61531
*
* @covers ::class_list
*/
public function test_class_list_no_quirks_mode() {
$processor = WP_HTML_Processor::create_fragment( '<span class="A A a B b É É é"></span>' );
$processor->next_tag();
$class_list = iterator_to_array( $processor->class_list() );
$this->assertSame(
array( 'A', 'a', 'B', 'b', 'É', 'é' ),
$class_list
);
}

/**
* Ensures that the tag processor is case insensitive when removing CSS classes in quirks mode.
*
* @ticket 61531
*
* @covers ::remove_class
*/
public function test_remove_class_quirks_mode() {
$processor = WP_HTML_Processor::create_fragment( '<span class="UPPER"></span>', '<body>', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE );
$processor->next_tag();
$processor->remove_class( 'upper' );
$this->assertSame( '<span ></span>', $processor->get_updated_html() );
}

/**
* Ensures that the tag processor is case insensitive when adding CSS classes in quirks mode.
*
* @ticket 61531
*
* @covers ::add_class
*/
public function test_add_class_quirks_mode() {
$processor = WP_HTML_Processor::create_fragment( '<span class="UPPER"></span>', '<body>', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE );
$processor->next_tag();
$processor->add_class( 'upper' );
$this->assertSame( '<span class="UPPER"></span>', $processor->get_updated_html() );
}

/**
* Ensures that the tag processor is case sensitive when checking has CSS classes in quirks mode.
*
* @ticket 61531
*
* @covers ::has_class
*/
public function test_has_class_quirks_mode() {
$processor = WP_HTML_Processor::create_fragment( '<span class="UPPER"></span>', '<body>', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE );
$processor->next_tag();
$this->assertTrue( $processor->has_class( 'upper' ) );
$this->assertTrue( $processor->has_class( 'UPPER' ) );
}

/**
* Ensures that the tag processor lists unique CSS class names in quirks mode.
*
* @ticket 61531
*
* @covers ::class_list
*/
public function test_class_list_quirks_mode() {
$processor = WP_HTML_Processor::create_fragment( '<span class="A A a B b É É é"></span>', '<body>', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE );
$processor->next_tag();
$class_list = iterator_to_array( $processor->class_list() );
$this->assertSame(
array( 'A', 'a', 'B', 'b', 'É', 'é' ),
$class_list
);
}

/**
* Ensures that the tag processor matches class names with null bytes correctly.
*
* @ticket 61531
*
* @covers ::has_class
*/
public function test_has_class_null_byte_class_name() {
$processor = WP_HTML_Processor::create_fragment( "<span class='null-byte-\0-there'></span>" );
$processor->next_tag();
$this->assertTrue( $processor->has_class( 'null-byte-�-there' ) );
}
}
16 changes: 16 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -2211,6 +2211,22 @@ public function test_class_list_visits_unique_class_names_only_once() {
$this->assertSame( array( 'one' ), $found_classes, 'Visited multiple copies of the same class name when it should have skipped the duplicates.' );
}

/**
* Ensures that null bytes are replaced with the replacement character (U+FFFD) in class_list.
*
* @ticket 61531
*
* @covers ::class_list
*/
public function test_class_list_null_bytes_replaced() {
$processor = new WP_HTML_Tag_Processor( "<div class='a \0 b\0 \0c\0'>" );
$processor->next_tag();

$found_classes = iterator_to_array( $processor->class_list() );

$this->assertSame( array( 'a', "\u{FFFD}", "b\u{FFFD}", "\u{FFFD}c\u{FFFD}" ), $found_classes );
}

/**
* @ticket 59209
*
Expand Down