Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Avoid WPCS lint nags; skip tests for unsupported input or fragment co…
…ntext.
  • Loading branch information
dmsnell authored and sirreal committed Dec 19, 2023
commit 0bc81b777881ac45d3be33d5253524a486b79d52
129 changes: 91 additions & 38 deletions tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,32 @@
*/
class Tests_HtmlApi_WpHtmlProcessorHtml5lib extends WP_UnitTestCase {
/**
* Verify the parsing results of the HTML Processor against the
* test cases in the Html5lib tests project.
*
* @ticket {TICKET_NUMBER}
*
* @dataProvider data_external_html5lib_tests
*
* @param string $fragment_context Context element in which to parse HTML, such as BODY or SVG.
* @param string $html Given test HTML.
* @param string $result Tree structure of parsed HTML.
*/
public function test_external_html5lib( $html, $result ) {
$processed = self::build_html5_treelike_string( $html );

if ( $processed["error"] === "unsupported" ) {
$this->markTestSkipped();
return;
public function test_external_html5lib( $fragment_context, $html, $result ) {
$processed_tree = self::build_html5_treelike_string( $fragment_context, $html );
if ( null === $processed_tree ) {
$this->markTestSkipped( 'Skipped test because it contains unsupported markup.' );
} else {
$this->assertEquals( $processed_tree, $result );
}

$this->assertEquals( $processed["output"], $result );
}


/**
* Data provider.
*
* Tests from https://github.com/html5lib/html5lib-tests
*
* @return array[]
*/
public function data_external_html5lib_tests() {
$test_dir = __DIR__ . '/html5lib-tests/tree-construction/';
Expand All @@ -50,48 +58,72 @@ public function data_external_html5lib_tests() {
}


static function build_html5_treelike_string( $html ) {
$p = WP_HTML_Processor::create_fragment( $html );
/**
* Generates the tree-like structure represented in the Html5lib tests.
*
* @param string $fragment_context Context element in which to parse HTML, such as BODY or SVG.
* @param string $html Given test HTML.
* @return string|null Tree structure of parsed HTML, if supported, else null.
*/
static function build_html5_treelike_string( $fragment_context, $html ) {
$p = WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" );
if ( null === $p ) {
return null;
}

$output = "<html>\n <head>\n <body>\n";
while ( $p->next_tag() ) {
// breadcrumbs include our tag, so skip 1 nesting level
// Breadcrumbs include this tag, so skip 1 nesting level.
foreach ( $p->get_breadcrumbs() as $index => $_ ) {
if ( $index ) {
$output .= ' ';
}
}
$t = strtolower( $p->get_tag() );
$t = strtolower( $p->get_tag() );
$output .= "<{$t}>\n";
}

return [ "output" => $output, "error" => $p->get_last_error() ];
if ( WP_HTML_Processor::ERROR_UNSUPPORTED === $p->get_last_error() ) {
return null;
}

return $output;
}

/**
* Convert a given Html5lib test file into a test triplet.
*
* @param string $filename Path to `.dat` file with test cases.
*
* @return array|Generator Test triplets of HTML fragment context element,
* HTML, and the DOM structure it represents.
*/
static function parse_html5_dat_testfile( $filename ) {
$handle = fopen( $filename, 'r', false );

/**
* Represents which section of the test case is being parsed.
*
* @var ?string
*/
$state = null;

$test_html = '';
$test_dom = '';
$test_html = '';
$test_dom = '';
$test_context_element = 'body';

while ( false !== ( $line = fgets( $handle ) ) ) {

if ( $line[0] === '#' ) {
// finish section
if ( $line == "#data\n" ) {
// If we're switching from a previous state, yield
if ( '#' === $line[0] ) {
// Finish section.
if ( "#data\n" === $line ) {
// Yield when switching from a previous state.
if ( $state ) {
yield [ $test_html, $test_dom ];
yield array( $test_context_element, $test_html, $test_dom );
}

// finish previous test
$test_html = "";
$test_dom = "";
// Finish previous test.
$test_html = '';
$test_dom = '';
}

$state = trim( substr( $line, 1 ) );
Expand All @@ -100,23 +132,44 @@ static function parse_html5_dat_testfile( $filename ) {
}

switch ( $state ) {
// Each test must begin with a string "#data" followed by a newline (LF). All
// subsequent lines until a line that says "#errors" are the test data and must be
// passed to the system being tested unchanged, except with the final newline (on the
// last line) removed.
/*
* Each test must begin with a string "#data" followed by a newline (LF). All
* subsequent lines until a line that says "#errors" are the test data and must be
* passed to the system being tested unchanged, except with the final newline (on the
* last line) removed.
*/
case 'data':
$test_html .= $line;
break;

// Then there must be a line that says "#document", which must be followed by a dump of
// the tree of the parsed DOM. Each node must be represented by a single line. Each line
// must start with "| ", followed by two spaces per parent node that the node has before
// the root document node.
/*
* Then there *may* be a line that says "#document-fragment", which must
* be followed by a newline (LF), followed by a string of characters that
* indicates the context element, followed by a newline (LF). If the
* string of characters starts with "svg ", the context element is in
* the SVG namespace and the substring after "svg " is the local name.
* If the string of characters starts with "math ", the context element
* is in the MathML namespace and the substring after "math " is the
* local name. Otherwise, the context element is in the HTML namespace
* and the string is the local name. If this line is present the "#data"
* must be parsed using the HTML fragment parsing algorithm with the
* context element as context.
*/
case 'document-fragment':
$test_context_element = explode( ' ', $line )[0];
break;

/*
* Then there must be a line that says "#document", which must be followed by a dump of
* the tree of the parsed DOM. Each node must be represented by a single line. Each line
* must start with "| ", followed by two spaces per parent node that the node has before
* the root document node.
*/
case 'document':
// Ignore everything that doesn't look like an element
// Ignore everything that doesn't look like an element.
if ( '|' === $line[0] ) {
$candidate = substr( $line, 2 );
$trimmed = trim( $candidate );
$trimmed = trim( $candidate );
if ( '<' === $trimmed[0] && '<!DOCTYPE' !== substr( $trimmed, 0, 9 ) ) {
$test_dom .= $candidate;
}
Expand All @@ -125,10 +178,10 @@ static function parse_html5_dat_testfile( $filename ) {
}
}

// EOF - return our last result
return [ $test_html, $test_dom ];

fclose( $handle );

// Return the last result when reaching the end of the file.
return array( $test_context_element, $test_html, $test_dom );
}
}