Skip to content
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion src/wp-includes/html-api/class-wp-html-open-elements.php
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ public function current_node() {
*
* @see https://html.spec.whatwg.org/#has-an-element-in-the-specific-scope
*
* @param string $tag_name Name of tag check.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I missed this; this looks accidental, no?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, thanks. Not sure what happened here.
added back in 6400821

* @param string[] $termination_list List of elements that terminate the search.
* @return bool Whether the element was found in a specific scope.
*/
Expand All @@ -116,6 +115,13 @@ public function has_element_in_specific_scope( $tag_name, $termination_list ) {
return true;
}

if (
'(internal: H1 through H6 - do not use)' === $tag_name &&
in_array( $node->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true )
) {
return true;
}

switch ( $node->node_name ) {
case 'HTML':
return false;
Expand Down Expand Up @@ -270,6 +276,13 @@ public function pop_until( $tag_name ) {
foreach ( $this->walk_up() as $item ) {
$this->pop();

if (
'(internal: H1 through H6 - do not use)' === $tag_name &&
in_array( $item->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true )
) {
return true;
}

if ( $tag_name === $item->node_name ) {
return true;
}
Expand Down
56 changes: 55 additions & 1 deletion src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
* - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY.
* - Form elements: BUTTON, FIELDSET, SEARCH.
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
* - Heading elements: HGROUP.
* - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
* - Links: A.
* - Lists: DL.
* - Media elements: FIGCAPTION, FIGURE, IMG.
Expand Down Expand Up @@ -697,6 +697,60 @@ private function step_in_body() {
$this->state->stack_of_open_elements->pop_until( $tag_name );
return true;

/*
* > A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
*/
case '+H1':
case '+H2':
case '+H3':
case '+H4':
case '+H5':
case '+H6':
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
$this->close_a_p_element();
}

if (
in_array(
$this->state->stack_of_open_elements->current_node()->node_name,
array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ),
true
)
) {
// @TODO: Indicate a parse error once it's possible.
$this->state->stack_of_open_elements->pop();
}

$this->insert_html_element( $this->state->current_token );
return true;

/*
* > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
*/
case '-H1':
case '-H2':
case '-H3':
case '-H4':
case '-H5':
case '-H6':
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As an alternative to the string literal, we could spell out the whole list

Suggested change
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) {
if (
! $this->state->stack_of_open_elements->has_element_in_scope( 'H1' ) &&
! $this->state->stack_of_open_elements->has_element_in_scope( 'H2' ) &&
/* etc */
) {

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you. as we discussed, I think this is going to be very difficult to do in practice for now, specifically because of the pop_until() behavior. using each element individually requires looping in a way that we don't need to do. I'd prefer we leave this a bit awkward internally for now until we have a better solution that doesn't tradeoff efficiency to get there.

/*
* This is a parse error; ignore the token.
*
* @TODO: Indicate a parse error once it's possible.
*/
return $this->step();
}

$this->generate_implied_end_tags();

if ( $this->state->stack_of_open_elements->current_node()->node_name !== $tag_name ) {
// @TODO: Record parse error: this error doesn't impact parsing.
}

$this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' );
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one's a bit trickier than the other one; we could change pop_until's signature to accept either a string or an array.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see above: if we don't need array passing I'd prefer that so we can avoid creating the array

return true;

/*
* > An end tag whose tag name is "p"
*/
Expand Down
2 changes: 0 additions & 2 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ public function test_stops_processing_after_unsupported_elements() {
*
* @covers WP_HTML_Processor::next_tag
* @covers WP_HTML_Processor::seek
*
* @throws WP_HTML_Unsupported_Exception
*/
public function test_clear_to_navigate_after_seeking() {
$p = WP_HTML_Processor::create_fragment( '<div one><strong></strong></div><p><strong two></strong></p>' );
Expand Down
54 changes: 31 additions & 23 deletions tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ public function data_single_tag_of_supported_elements() {
'FIGURE',
'FONT',
'FOOTER',
'H1',
'H2',
'H3',
'H4',
'H5',
'H6',
'HEADER',
'HGROUP',
'I',
Expand Down Expand Up @@ -142,12 +148,6 @@ public function data_unsupported_elements() {
'FORM',
'FRAME',
'FRAMESET',
'H1',
'H2',
'H3',
'H4',
'H5',
'H6',
'HEAD',
'HR',
'HTML',
Expand Down Expand Up @@ -352,6 +352,14 @@ public function data_html_target_with_breadcrumbs() {
),
'MAIN inside MAIN inside SPAN' => array( '<span><main><main target>', array( 'HTML', 'BODY', 'SPAN', 'MAIN', 'MAIN' ), 1 ),
'MAIN next to unclosed P' => array( '<p><main target>', array( 'HTML', 'BODY', 'MAIN' ), 1 ),

// H1 - H6 close out _any_ H1 - H6 when encountering _any_ of H1 - H6, making this section surprising.
'EM inside H3 after unclosed P' => array( '<p><h3><em target>Important Message</em></h3>', array( 'HTML', 'BODY', 'H3', 'EM' ), 1 ),
'H4 after H2' => array( '<h2>Major</h2><h4 target>Minor</h4>', array( 'HTML', 'BODY', 'H4' ), 1 ),
'H4 after unclosed H2' => array( '<h2>Major<h4 target>Minor</h3>', array( 'HTML', 'BODY', 'H4' ), 1 ),
'H4 inside H2' => array( '<h2><span>Major<h4 target>Minor</h3></span>', array( 'HTML', 'BODY', 'H2', 'SPAN', 'H4' ), 1 ),
'H5 after unclosed H4 inside H2' => array( '<h2><span>Major<h4>Minor</span></h3><h5 target>', array( 'HTML', 'BODY', 'H2', 'SPAN', 'H5' ), 1 ),
'H5 after H4 inside H2' => array( '<h2><span>Major<h4>Minor</h4></span></h3><h5 target>', array( 'HTML', 'BODY', 'H5' ), 1 ),
);
}

Expand Down Expand Up @@ -387,29 +395,29 @@ public function test_reports_if_tag_matches_breadcrumbs_of_various_specificity(
public function data_html_with_breadcrumbs_of_various_specificity() {
return array(
// Test with void elements.
'Inner IMG' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', 'figure', 'img' ), true ),
'Inner IMG wildcard' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', '*', 'img' ), true ),
'Inner IMG no wildcard' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', 'img' ), false ),
'Full specification' => array( '<div><span><figure><img target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'img' ), true ),
'Invalid Full specification' => array( '<div><span><figure><img target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'img' ), false ),
'Inner IMG' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', 'figure', 'img' ), true ),
'Inner IMG wildcard' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', '*', 'img' ), true ),
'Inner IMG no wildcard' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', 'img' ), false ),
'Full specification' => array( '<div><span><figure><img target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'img' ), true ),
'Invalid Full specification' => array( '<div><span><figure><img target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'img' ), false ),

// Test also with non-void elements that open and close.
'Inner P' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', 'figure', 'p' ), true ),
'Inner P wildcard' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', '*', 'p' ), true ),
'Inner P no wildcard' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', 'p' ), false ),
'Full specification (P)' => array( '<div><span><figure><p target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), true ),
'Invalid Full specification (P)' => array( '<div><span><figure><p target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'p' ), false ),
'Inner P' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', 'figure', 'p' ), true ),
'Inner P wildcard' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', '*', 'p' ), true ),
'Inner P no wildcard' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', 'p' ), false ),
'Full specification (P)' => array( '<div><span><figure><p target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), true ),
'Invalid Full specification (P)' => array( '<div><span><figure><p target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'p' ), false ),

// Ensure that matches aren't on tag closers.
'Inner P' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', 'figure', 'p' ), false ),
'Inner P wildcard' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', '*', 'p' ), false ),
'Inner P no wildcard' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', 'p' ), false ),
'Full specification (P)' => array( '<div><span><figure></p target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), false ),
'Invalid Full specification (P)' => array( '<div><span><figure></p target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'p' ), false ),
'Inner P (Closer)' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', 'figure', 'p' ), false ),
'Inner P wildcard (Closer)' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', '*', 'p' ), false ),
'Inner P no wildcard (Closer)' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', 'p' ), false ),
'Full specification (P) (Closer)' => array( '<div><span><figure></p target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), false ),
'Invalid Full specification (P) (Closer)' => array( '<div><span><figure></p target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'p' ), false ),

// Test wildcard behaviors.
'Single wildcard element' => array( '<figure><code><div><p><span><img target></span></p></div></code></figure>', array( '*' ), true ),
'Child of wildcard element' => array( '<figure><code><div><p><span><img target></span></p></div></code></figure>', array( 'SPAN', '*' ), true ),
'Single wildcard element' => array( '<figure><code><div><p><span><img target></span></p></div></code></figure>', array( '*' ), true ),
'Child of wildcard element' => array( '<figure><code><div><p><span><img target></span></p></div></code></figure>', array( 'SPAN', '*' ), true ),
);
}

Expand Down
16 changes: 2 additions & 14 deletions tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,6 @@ public function data_article_container_group() {
* element in scope, that it skips the tag entirely.
*
* @ticket 58961
*
* @since 6.4.0
*
* @throws Exception
*/
public function test_in_body_skips_unexpected_button_closer() {
$p = WP_HTML_Processor::create_fragment( '<div>Test</button></div>' );
Expand All @@ -145,10 +141,6 @@ public function test_in_body_skips_unexpected_button_closer() {
* Verifies insertion of a BUTTON element when no existing BUTTON is already in scope.
*
* @ticket 58961
*
* @since 6.4.0
*
* @throws WP_HTML_Unsupported_Exception
*/
public function test_in_body_button_with_no_button_in_scope() {
$p = WP_HTML_Processor::create_fragment( '<div><p>Click the button <button one>here</button>!</p></div><button two>not here</button>' );
Expand All @@ -174,8 +166,6 @@ public function test_in_body_button_with_no_button_in_scope() {
* @ticket 58961
*
* @since 6.4.0
*
* @throws WP_HTML_Unsupported_Exception
*/
public function test_in_body_button_with_button_in_scope_as_parent() {
$p = WP_HTML_Processor::create_fragment( '<div><p>Click the button <button one>almost<button two>here</button>!</p></div><button three>not here</button>' );
Expand Down Expand Up @@ -209,8 +199,6 @@ public function test_in_body_button_with_button_in_scope_as_parent() {
* @ticket 58961
*
* @since 6.4.0
*
* @throws WP_HTML_Unsupported_Exception
*/
public function test_in_body_button_with_button_in_scope_as_ancestor() {
$p = WP_HTML_Processor::create_fragment( '<div><button one><p>Click the button <span><button two>here</button>!</span></p></div><button three>not here</button>' );
Expand All @@ -236,7 +224,7 @@ public function test_in_body_button_with_button_in_scope_as_ancestor() {
$this->assertSame( array( 'HTML', 'BODY', 'BUTTON' ), $p->get_breadcrumbs(), 'Failed to produce expected DOM nesting for third button.' );
}

/*
/**
* Verifies that when "in body" and encountering "any other end tag"
* that the HTML processor ignores the end tag if there's a special
* element on the stack of open elements before the matching opening.
Expand All @@ -259,7 +247,7 @@ public function test_in_body_any_other_end_tag_with_unclosed_special_element() {
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN', 'DIV' ), $p->get_breadcrumbs(), 'Failed to produce expected DOM nesting: SPAN should still be open and DIV should be its child.' );
}

/*
/**
* Verifies that when "in body" and encountering "any other end tag"
* that the HTML processor closes appropriate elements on the stack of
* open elements up to the matching opening.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
<?php
/**
* Unit tests covering WP_HTML_Processor compliance with HTML5 semantic parsing rules
* for the H1 - H6 heading elements.
*
* @package WordPress
* @subpackage HTML-API
*
* @since 6.5.0
*
* @group html-api
*
* @coversDefaultClass WP_HTML_Processor
*/
class Tests_HtmlApi_WpHtmlProcessorSemanticRulesHeadingElements extends WP_UnitTestCase {
/*******************************************************************
* RULES FOR "IN BODY" MODE
*******************************************************************/

/**
* Verifies that H1 through H6 elements generate implied end tags.
*
* @ticket 60060
*
* @covers WP_HTML_Processor::step
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it next_tag we're using here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

more specifically this is covering the semantic rules inside step 🤷‍♂️

*
* @dataProvider data_heading_elements
*
* @param string $tag_name Name of H1 - H6 element under test.
*/
public function test_in_body_heading_element_closes_open_p_tag( $tag_name ) {
$processor = WP_HTML_Processor::create_fragment(
"<p>Open<{$tag_name}>Closed P</{$tag_name}><img></p>"
);

$processor->next_tag( $tag_name );
$this->assertSame(
array( 'HTML', 'BODY', $tag_name ),
$processor->get_breadcrumbs(),
"Expected {$tag_name} to be a direct child of the BODY, having closed the open P element."
);

$processor->next_tag( 'IMG' );
$this->assertSame(
array( 'HTML', 'BODY', 'IMG' ),
$processor->get_breadcrumbs(),
'Expected IMG to be a direct child of BODY, having closed the open P element.'
);
}

/**
* Data provider.
*
* @return array[].
*/
public function data_heading_elements() {
return array(
'H1' => array( 'H1' ),
'H2' => array( 'H2' ),
'H3' => array( 'H3' ),
'H4' => array( 'H4' ),
'H5' => array( 'H5' ),
'H6' => array( 'H5' ),
);
}

/**
* Verifies that H1 through H6 elements close an open H1 through H6 element.
*
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing @ticket and @covers:

Suggested change
*
*
* @ticket 60060
*
* @covers WP_HTML_Processor::next_tag
*

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added in 4772a7d

* @dataProvider data_heading_combinations
*
* @param string $first_heading H1 - H6 element appearing (unclosed) before the second.
* @param string $second_heading H1 - H6 element appearing after the first.
*/
public function test_in_body_heading_element_closes_other_heading_elements( $first_heading, $second_heading ) {
$processor = WP_HTML_Processor::create_fragment(
"<div><{$first_heading} first> then <{$second_heading} second> and end </{$second_heading}><img></{$first_heading}></div>"
);

while ( $processor->next_tag() && null === $processor->get_attribute( 'second' ) ) {
continue;
}

$this->assertTrue(
$processor->get_attribute( 'second' ),
"Failed to find expected {$second_heading} tag."
);

$this->assertSame(
array( 'HTML', 'BODY', 'DIV', $second_heading ),
$processor->get_breadcrumbs(),
"Expected {$second_heading} to be a direct child of the DIV, having closed the open {$first_heading} element."
);

$processor->next_tag( 'IMG' );
$this->assertSame(
array( 'HTML', 'BODY', 'DIV', 'IMG' ),
$processor->get_breadcrumbs(),
"Expected IMG to be a direct child of DIV, having closed the open {$first_heading} element."
);
}

/**
* Data provider.
*
* @return array[]
*/
public function data_heading_combinations() {
$headings = array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' );

$combinations = array();

// Create all unique pairs of H1 - H6 elements.
foreach ( $headings as $first_tag ) {
foreach ( $headings as $second_tag ) {
$combinations[ "{$first_tag} then {$second_tag}" ] = array( $first_tag, $second_tag );
}
}

return $combinations;
}
}