From 7854cb838e844012551144892e0c6ddd46aa07cc Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 16 Jan 2024 18:34:26 -0600 Subject: [PATCH 1/2] HTML API: Introduce `WP_HTML::tag()` for safely creating HTML. --- .../html-api/class-wp-html-processor.php | 155 ++++++++++++++++++ .../html-api/class-wp-html-tag-processor.php | 9 +- src/wp-includes/html-api/class-wp-html.php | 154 +++++++++++++++++ src/wp-settings.php | 1 + 4 files changed, 315 insertions(+), 4 deletions(-) create mode 100644 src/wp-includes/html-api/class-wp-html.php diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index cce26a60c5350..c729161e286e5 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1544,6 +1544,161 @@ private function insert_html_element( $token ) { * HTML Specification Helpers */ + /** + * Returns whether a given element is an HTML tag name. + * + * @todo Verify this list. + * + * @since 6.5.0 + * + * @param string $tag_name Tag name to check. + * @return bool Whether the element is defined in the HTML specification. + */ + public static function is_html_tag( $tag_name ) { + $tag_name = strtoupper( $tag_name ); + + return ( + 'A' === $tag_name || + 'ABBR' === $tag_name || + 'ACRONYM' === $tag_name || // Neutralized. + 'ADDRESS' === $tag_name || + 'APPLET' === $tag_name || // Deprecated. + 'AREA' === $tag_name || + 'ARTICLE' === $tag_name || + 'ASIDE' === $tag_name || + 'AUDIO' === $tag_name || + 'B' === $tag_name || + 'BASE' === $tag_name || + 'BDI' === $tag_name || + 'BDO' === $tag_name || + 'BGSOUND' === $tag_name || // Deprecated; self-closing if self-closing flag provided, otherwise normal. + 'BIG' === $tag_name || + 'BLINK' === $tag_name || // Deprecated. + 'BODY' === $tag_name || + 'BR' === $tag_name || + 'BUTTON' === $tag_name || + 'CANVAS' === $tag_name || + 'CAPTION' === $tag_name || + 'CENTER' === $tag_name || // Neutralized. + 'CITE' === $tag_name || + 'CODE' === $tag_name || + 'COL' === $tag_name || + 'COLGROUP' === $tag_name || + 'DATA' === $tag_name || + 'DATALIST' === $tag_name || + 'DD' === $tag_name || + 'DEL' === $tag_name || + 'DETAILS' === $tag_name || + 'DFN' === $tag_name || + 'DIALOG' === $tag_name || + 'DIR' === $tag_name || + 'DIV' === $tag_name || + 'DL' === $tag_name || + 'DT' === $tag_name || + 'EM' === $tag_name || + 'EMBED' === $tag_name || + 'FIELDSET' === $tag_name || + 'FIGCAPTION' === $tag_name || + 'FIGURE' === $tag_name || + 'FONT' === $tag_name || + 'FOOTER' === $tag_name || + 'FORM' === $tag_name || + 'FRAME' === $tag_name || + 'FRAMESET' === $tag_name || + 'H1' === $tag_name || + 'H2' === $tag_name || + 'H3' === $tag_name || + 'H4' === $tag_name || + 'H5' === $tag_name || + 'H6' === $tag_name || + 'HEAD' === $tag_name || + 'HEADER' === $tag_name || + 'HGROUP' === $tag_name || + 'HR' === $tag_name || + 'HTML' === $tag_name || + 'I' === $tag_name || + 'IFRAME' === $tag_name || + 'IMG' === $tag_name || + 'INPUT' === $tag_name || + 'INS' === $tag_name || + 'ISINDEX' === $tag_name || // Deprecated. + 'KBD' === $tag_name || + 'KEYGEN' === $tag_name || // Deprecated; void. + 'LABEL' === $tag_name || + 'LEGEND' === $tag_name || + 'LI' === $tag_name || + 'LINK' === $tag_name || + 'LISTING' === $tag_name || // Deprecated, use PRE instead. + 'MAIN' === $tag_name || + 'MAP' === $tag_name || + 'MARK' === $tag_name || + 'MARQUEE' === $tag_name || // Deprecated. + 'MATH' === $tag_name || + 'MENU' === $tag_name || + 'META' === $tag_name || + 'METER' === $tag_name || + 'MULTICOL' === $tag_name || // Deprecated. + 'NAV' === $tag_name || + 'NEXTID' === $tag_name || // Deprecated. + 'NOBR' === $tag_name || // Neutralized. + 'NOEMBED' === $tag_name || // Neutralized. + 'NOFRAMES' === $tag_name || // Neutralized. + 'NOSCRIPT' === $tag_name || + 'OBJECT' === $tag_name || + 'OL' === $tag_name || + 'OPTGROUP' === $tag_name || + 'OPTION' === $tag_name || + 'OUTPUT' === $tag_name || + 'P' === $tag_name || + 'PICTURE' === $tag_name || + 'PLAINTEXT' === $tag_name || // Neutralized. + 'PRE' === $tag_name || + 'PROGRESS' === $tag_name || + 'Q' === $tag_name || + 'RB' === $tag_name || // Neutralized. + 'RP' === $tag_name || + 'RT' === $tag_name || + 'RTC' === $tag_name || // Neutralized. + 'RUBY' === $tag_name || + 'SAMP' === $tag_name || + 'SCRIPT' === $tag_name || + 'SEARCH' === $tag_name || + 'SECTION' === $tag_name || + 'SELECT' === $tag_name || + 'SLOT' === $tag_name || + 'SMALL' === $tag_name || + 'SOURCE' === $tag_name || + 'SPACER' === $tag_name || // Deprecated. + 'SPAN' === $tag_name || + 'STRIKE' === $tag_name || + 'STRONG' === $tag_name || + 'STYLE' === $tag_name || + 'SUB' === $tag_name || + 'SUMMARY' === $tag_name || + 'SUP' === $tag_name || + 'SVG' === $tag_name || + 'TABLE' === $tag_name || + 'TBODY' === $tag_name || + 'TD' === $tag_name || + 'TEMPLATE' === $tag_name || + 'TEXTAREA' === $tag_name || + 'TFOOT' === $tag_name || + 'TH' === $tag_name || + 'THEAD' === $tag_name || + 'TIME' === $tag_name || + 'TITLE' === $tag_name || + 'TR' === $tag_name || + 'TRACK' === $tag_name || + 'TT' === $tag_name || + 'U' === $tag_name || + 'UL' === $tag_name || + 'VAR' === $tag_name || + 'VIDEO' === $tag_name || + 'WBR' === $tag_name || + 'XMP' === $tag_name // Deprecated, use PRE instead. + ); + } + /** * Returns whether an element of a given name is in the HTML special category. * diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index ee6209c69e0ae..9aa44353fd97d 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -2286,15 +2286,16 @@ public function is_tag_closer() { * * For boolean attributes special handling is provided: * - When `true` is passed as the value, then only the attribute name is added to the tag. - * - When `false` is passed, the attribute gets removed if it existed before. + * - When `false` or `null` is passed, the attribute gets removed if it existed before. * * For string attributes, the value is escaped using the `esc_attr` function. * * @since 6.2.0 * @since 6.2.1 Fix: Only create a single update for multiple calls with case-variant attribute names. + * @since 6.5.0 Allows passing `null` to remove attribute. * - * @param string $name The attribute name to target. - * @param string|bool $value The new attribute value. + * @param string $name The attribute name to target. + * @param string|bool|null $value The new attribute value. * @return bool Whether an attribute value was set. */ public function set_attribute( $name, $value ) { @@ -2354,7 +2355,7 @@ public function set_attribute( $name, $value ) { * > To represent a false value, the attribute has to be omitted altogether. * - HTML5 spec, https://html.spec.whatwg.org/#boolean-attributes */ - if ( false === $value ) { + if ( null === $value || false === $value ) { return $this->remove_attribute( $name ); } diff --git a/src/wp-includes/html-api/class-wp-html.php b/src/wp-includes/html-api/class-wp-html.php new file mode 100644 index 0000000000000..189ebf44de134 --- /dev/null +++ b/src/wp-includes/html-api/class-wp-html.php @@ -0,0 +1,154 @@ + 'is-safe' ), 'Hello, world!' ); + * //
Hello, world!
+ * + * echo WP_HTML::tag( 'input', array( 'type' => '">', 'disabled' => true ), 'Is this > that?' ); + * // + * + * echo WP_HTML::tag( 'p', null, 'Is this > that?' ); + * //

Is this > that?

+ * + * echo WP_HTML::tag( 'wp-emoji', array( 'name' => ':smile:' ), null, 'self-closing' ); + * // + * + * @since 6.5.0 + * + * @param string $tag_name Name of tag to create. + * @param ?array $attributes Key/value pairs of attribute names and their values. + * Values may be boolean, null, or a string. + * @param ?string $inner_text Will always be escaped to preserve the given string in the rendered page. + * @param ?string $element_type 'self-closing' to self-close the generated HTML for a custom-element. + * This only generates the self-closing flag for non-HTML tags, as HTML + * itself contains no self-closing tags. + * @return string|null Generated HTML for the tag if provided valid inputs, otherwise null. + */ + public static function tag( $tag_name, $attributes = null, $inner_text = null, $element_type = 'html' ) { + if ( + ! is_string( $tag_name ) || + ( null !== $attributes && ! is_array( $attributes ) ) || + ( null !== $inner_text && ! is_string( $inner_text ) ) + ) { + return null; + } + + // Validate tag name. + if ( 0 === strlen( $tag_name ) ) { + return null; + } + + // Compare the first byte against [a-zA-Z]. + $tag_initial = ord( $tag_name[0] ); + if ( + // Before A or after Z. + ( $tag_initial < 65 || $tag_initial > 90 ) && + + // Before a or after z. + ( $tag_initial < 97 || $tag_initial > 122 ) + ) { + return null; + } + if ( strlen( $tag_name ) !== strcspn( $tag_name, " \t\f\r\n/>" ) ) { + return null; + } + + $is_void = WP_HTML_Processor::is_void( $tag_name ); + $self_closes = ( + ! $is_void && + 'self-closing' === $element_type && + ! WP_HTML_Processor::is_html_tag( $tag_name ) + ); + + /* + * This is unexpected with the closing tag, but it's required + * for special tags with modifiable text, such as TEXTAREA. + */ + $source_html = $self_closes ? "<{$tag_name}/>" : "<{$tag_name}>"; + + $processor = new WP_HTML_Tag_Processor( $source_html ); + $processor->next_tag(); + + if ( null !== $attributes ) { + foreach ( $attributes as $name => $value ) { + $processor->set_attribute( $name, $value ); + } + } + + /* + * Strip off expected closing tag; it will be appropriately + * re-added if necessary after appending the inner text. + */ + $html = substr( $processor->get_updated_html(), 0, -strlen( "" ) ); + + if ( $is_void || $self_closes ) { + return $html; + } + + if ( $inner_text ) { + $big_tag_name = strtoupper( $tag_name ); + + /* + * Since HTML PRE and TEXTAREA elements strip a leading newline, if + * their inner content contains a leading newline, then they _need_ + * to begin with a leading newline before the inner text so that it + * doesn't confuse the syntax for the content. + */ + if ( + ( 'PRE' === $big_tag_name || 'TEXTAREA' === $big_tag_name ) && + "\n" === $inner_text[0] + ) { + $html .= "\n"; + } + + switch ( $big_tag_name ) { + case 'SCRIPT': + case 'STYLE': + /* + * Over-zealously prevent escaping from SCRIPT and STYLE tags. + * It would be more complete to run the Tag Processor and look + * for the appropriate closers, but that requires parsing the + * contents which could add unexpected cost. This simplification + * will reject some rare and valid SCRIPT and STYLE text contents, + * but will never allow invalid ones. + */ + if ( false !== stripos( $inner_text, ""; + + return $html; + } +} diff --git a/src/wp-settings.php b/src/wp-settings.php index c8835db31a005..a54dfa22ac35f 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -245,6 +245,7 @@ require ABSPATH . WPINC . '/html-api/class-wp-html-token.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php'; +require ABSPATH . WPINC . '/html-api/class-wp-html.php'; require ABSPATH . WPINC . '/class-wp-http.php'; require ABSPATH . WPINC . '/class-wp-http-streams.php'; require ABSPATH . WPINC . '/class-wp-http-curl.php'; From a7d6cca4a9498554cfbe357b3f4c9b096cb586e5 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 1 Feb 2024 17:18:29 +0100 Subject: [PATCH 2/2] Add missing HTML elements --- src/wp-includes/html-api/class-wp-html-processor.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index c729161e286e5..a47fcc0260019 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1574,6 +1574,7 @@ public static function is_html_tag( $tag_name ) { 'BGSOUND' === $tag_name || // Deprecated; self-closing if self-closing flag provided, otherwise normal. 'BIG' === $tag_name || 'BLINK' === $tag_name || // Deprecated. + 'BLOCKQUOTE' === $tag_name || 'BODY' === $tag_name || 'BR' === $tag_name || 'BUTTON' === $tag_name || @@ -1618,6 +1619,7 @@ public static function is_html_tag( $tag_name ) { 'HTML' === $tag_name || 'I' === $tag_name || 'IFRAME' === $tag_name || + 'IMAGE' === $tag_name || 'IMG' === $tag_name || 'INPUT' === $tag_name || 'INS' === $tag_name || @@ -1650,6 +1652,7 @@ public static function is_html_tag( $tag_name ) { 'OPTION' === $tag_name || 'OUTPUT' === $tag_name || 'P' === $tag_name || + 'PARAM' === $tag_name || 'PICTURE' === $tag_name || 'PLAINTEXT' === $tag_name || // Neutralized. 'PRE' === $tag_name || @@ -1660,6 +1663,7 @@ public static function is_html_tag( $tag_name ) { 'RT' === $tag_name || 'RTC' === $tag_name || // Neutralized. 'RUBY' === $tag_name || + 'S' === $tag_name || 'SAMP' === $tag_name || 'SCRIPT' === $tag_name || 'SEARCH' === $tag_name ||