diff --git a/src/wp-content/themes/twentytwentyfour/theme.json b/src/wp-content/themes/twentytwentyfour/theme.json index 7988b1af5c15f..2586fcab0a7ec 100644 --- a/src/wp-content/themes/twentytwentyfour/theme.json +++ b/src/wp-content/themes/twentytwentyfour/theme.json @@ -247,7 +247,7 @@ { "fontFamily": "Iowan Old Style, Apple Garamond, Baskerville, Times New Roman, Droid Serif, Times, Source Serif Pro, serif, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol", "name": "System Serif", - "slug": "system-Serif" + "slug": "system-serif" } ], "fontSizes": [ diff --git a/src/wp-includes/block-patterns.php b/src/wp-includes/block-patterns.php index 66bdfd68e7caa..e5c770e1d4330 100644 --- a/src/wp-includes/block-patterns.php +++ b/src/wp-includes/block-patterns.php @@ -135,6 +135,20 @@ function _register_core_block_patterns_and_categories() { 'description' => __( 'Different layouts containing video or audio.' ), ) ); + register_block_pattern_category( + 'videos', + array( + 'label' => _x( 'Videos', 'Block pattern category' ), + 'description' => __( 'Different layouts containing videos.' ), + ) + ); + register_block_pattern_category( + 'audio', + array( + 'label' => _x( 'Audio', 'Block pattern category' ), + 'description' => __( 'Different layouts containing audio.' ), + ) + ); register_block_pattern_category( 'posts', array( diff --git a/src/wp-includes/block-template-utils.php b/src/wp-includes/block-template-utils.php index e2cd7c1587b9f..3f912ad0611b6 100644 --- a/src/wp-includes/block-template-utils.php +++ b/src/wp-includes/block-template-utils.php @@ -251,7 +251,7 @@ function _get_block_templates_paths( $base_directory ) { * @param string $template_type 'wp_template' or 'wp_template_part'. * @param string $slug Template slug. * @return array|null { - * Array with template metadata if $template_type is one of 'wp_template' or 'wp_template_part'. + * Array with template metadata if $template_type is one of 'wp_template' or 'wp_template_part', * null otherwise. * * @type string $slug Template slug. diff --git a/src/wp-includes/blocks.php b/src/wp-includes/blocks.php index 431e2b015332c..dcb899c9e532d 100644 --- a/src/wp-includes/blocks.php +++ b/src/wp-includes/blocks.php @@ -758,22 +758,28 @@ function get_hooked_blocks() { } /** - * Conditionally returns the markup for a given hooked block type. + * Conditionally returns the markup for a given hooked block. * - * Accepts two arguments: A reference to an anchor block, and the name of a hooked block type. + * Accepts three arguments: A hooked block, its type, and a reference to an anchor block. * If the anchor block has already been processed, and the given hooked block type is in the list * of ignored hooked blocks, an empty string is returned. * + * The hooked block type is specified separately as it's possible that a filter might've modified + * the hooked block such that `$hooked_block['blockName']` does no longer reflect the original type. + * * This function is meant for internal use only. * * @since 6.5.0 * @access private * - * @param array $anchor_block The anchor block. Passed by reference. - * @param string $hooked_block_type The name of the hooked block type. - * @return string The markup for the given hooked block type, or an empty string if the block is ignored. + * @param array $hooked_block The hooked block, represented as a parsed block array. + * @param string $hooked_block_type The type of the hooked block. This could be different from + * $hooked_block['blockName'], as a filter might've modified the latter. + * @param array $anchor_block The anchor block, represented as a parsed block array. + * Passed by reference. + * @return string The markup for the given hooked block, or an empty string if the block is ignored. */ -function get_hooked_block_markup( &$anchor_block, $hooked_block_type ) { +function get_hooked_block_markup( $hooked_block, $hooked_block_type, &$anchor_block ) { if ( ! isset( $anchor_block['attrs']['metadata']['ignoredHookedBlocks'] ) ) { $anchor_block['attrs']['metadata']['ignoredHookedBlocks'] = array(); } @@ -786,7 +792,70 @@ function get_hooked_block_markup( &$anchor_block, $hooked_block_type ) { // However, its presence does not affect the frontend. $anchor_block['attrs']['metadata']['ignoredHookedBlocks'][] = $hooked_block_type; - return get_comment_delimited_block_content( $hooked_block_type, array(), '' ); + return serialize_block( $hooked_block ); +} + +/** + * Returns the markup for blocks hooked to the given anchor block in a specific relative position. + * + * @since 6.5.0 + * @access private + * + * @param array $parsed_anchor_block The anchor block, in parsed block array format. + * @param string $relative_position The relative position of the hooked blocks. + * Can be one of 'before', 'after', 'first_child', or 'last_child'. + * @param array $hooked_blocks An array of hooked block types, grouped by anchor block and relative position. + * @param WP_Block_Template|array $context The block template, template part, or pattern that the anchor block belongs to. + * @return string + */ +function insert_hooked_blocks( &$parsed_anchor_block, $relative_position, $hooked_blocks, $context ) { + $anchor_block_type = $parsed_anchor_block['blockName']; + $hooked_block_types = isset( $hooked_blocks[ $anchor_block_type ][ $relative_position ] ) + ? $hooked_blocks[ $anchor_block_type ][ $relative_position ] + : array(); + + /** + * Filters the list of hooked block types for a given anchor block type and relative position. + * + * @since 6.4.0 + * + * @param string[] $hooked_block_types The list of hooked block types. + * @param string $relative_position The relative position of the hooked blocks. + * Can be one of 'before', 'after', 'first_child', or 'last_child'. + * @param string $anchor_block_type The anchor block type. + * @param WP_Block_Template|array $context The block template, template part, or pattern that the anchor block belongs to. + */ + $hooked_block_types = apply_filters( 'hooked_block_types', $hooked_block_types, $relative_position, $anchor_block_type, $context ); + + $markup = ''; + foreach ( $hooked_block_types as $hooked_block_type ) { + $parsed_hooked_block = array( + 'blockName' => $hooked_block_type, + 'attrs' => array(), + 'innerBlocks' => array(), + 'innerContent' => array(), + ); + + /** + * Filters the parsed block array for a given hooked block. + * + * The dynamic portion of the hook name, `$hooked_block_type`, refers to the block type name of the specific hooked block. + * + * @since 6.5.0 + * + * @param array $parsed_hooked_block The parsed block array for the given hooked block type. + * @param string $relative_position The relative position of the hooked block. + * @param array $parsed_anchor_block The anchor block, in parsed block array format. + * @param WP_Block_Template|array $context The block template, template part, or pattern that the anchor block belongs to. + */ + $parsed_hooked_block = apply_filters( "hooked_block_{$hooked_block_type}", $parsed_hooked_block, $relative_position, $parsed_anchor_block, $context ); + + // It's possible that the `hooked_block_{$hooked_block_type}` filter returned a block of a different type, + // so we need to pass the original $hooked_block_type as well. + $markup .= get_hooked_block_markup( $parsed_hooked_block, $hooked_block_type, $parsed_anchor_block ); + } + + return $markup; } /** @@ -826,40 +895,10 @@ function make_before_block_visitor( $hooked_blocks, $context ) { if ( $parent_block && ! $prev ) { // Candidate for first-child insertion. - $relative_position = 'first_child'; - $anchor_block_type = $parent_block['blockName']; - $hooked_block_types = isset( $hooked_blocks[ $anchor_block_type ][ $relative_position ] ) - ? $hooked_blocks[ $anchor_block_type ][ $relative_position ] - : array(); - - /** - * Filters the list of hooked block types for a given anchor block type and relative position. - * - * @since 6.4.0 - * - * @param string[] $hooked_block_types The list of hooked block types. - * @param string $relative_position The relative position of the hooked blocks. - * Can be one of 'before', 'after', 'first_child', or 'last_child'. - * @param string $anchor_block_type The anchor block type. - * @param WP_Block_Template|array $context The block template, template part, or pattern that the anchor block belongs to. - */ - $hooked_block_types = apply_filters( 'hooked_block_types', $hooked_block_types, $relative_position, $anchor_block_type, $context ); - foreach ( $hooked_block_types as $hooked_block_type ) { - $markup .= get_hooked_block_markup( $parent_block, $hooked_block_type ); - } + $markup .= insert_hooked_blocks( $parent_block, 'first_child', $hooked_blocks, $context ); } - $relative_position = 'before'; - $anchor_block_type = $block['blockName']; - $hooked_block_types = isset( $hooked_blocks[ $anchor_block_type ][ $relative_position ] ) - ? $hooked_blocks[ $anchor_block_type ][ $relative_position ] - : array(); - - /** This filter is documented in wp-includes/blocks.php */ - $hooked_block_types = apply_filters( 'hooked_block_types', $hooked_block_types, $relative_position, $anchor_block_type, $context ); - foreach ( $hooked_block_types as $hooked_block_type ) { - $markup .= get_hooked_block_markup( $block, $hooked_block_type ); - } + $markup .= insert_hooked_blocks( $block, 'before', $hooked_blocks, $context ); return $markup; }; @@ -895,33 +934,11 @@ function make_after_block_visitor( $hooked_blocks, $context ) { * @return string The serialized markup for the given block, with the markup for any hooked blocks appended to it. */ return function ( &$block, &$parent_block = null, $next = null ) use ( $hooked_blocks, $context ) { - $markup = ''; - - $relative_position = 'after'; - $anchor_block_type = $block['blockName']; - $hooked_block_types = isset( $hooked_blocks[ $anchor_block_type ][ $relative_position ] ) - ? $hooked_blocks[ $anchor_block_type ][ $relative_position ] - : array(); - - /** This filter is documented in wp-includes/blocks.php */ - $hooked_block_types = apply_filters( 'hooked_block_types', $hooked_block_types, $relative_position, $anchor_block_type, $context ); - foreach ( $hooked_block_types as $hooked_block_type ) { - $markup .= get_hooked_block_markup( $block, $hooked_block_type ); - } + $markup = insert_hooked_blocks( $block, 'after', $hooked_blocks, $context ); if ( $parent_block && ! $next ) { // Candidate for last-child insertion. - $relative_position = 'last_child'; - $anchor_block_type = $parent_block['blockName']; - $hooked_block_types = isset( $hooked_blocks[ $anchor_block_type ][ $relative_position ] ) - ? $hooked_blocks[ $anchor_block_type ][ $relative_position ] - : array(); - - /** This filter is documented in wp-includes/blocks.php */ - $hooked_block_types = apply_filters( 'hooked_block_types', $hooked_block_types, $relative_position, $anchor_block_type, $context ); - foreach ( $hooked_block_types as $hooked_block_type ) { - $markup .= get_hooked_block_markup( $parent_block, $hooked_block_type ); - } + $markup .= insert_hooked_blocks( $parent_block, 'last_child', $hooked_blocks, $context ); } return $markup; diff --git a/src/wp-includes/blocks/image.php b/src/wp-includes/blocks/image.php index acefd5714bbd4..7df0949c8dfa0 100644 --- a/src/wp-includes/blocks/image.php +++ b/src/wp-includes/blocks/image.php @@ -229,26 +229,32 @@ function block_core_image_render_lightbox( $block_content, $block ) { $body_content = $w->get_updated_html(); // Add a button alongside image in the body content. - $img = null; - preg_match( '/]+>/', $body_content, $img ); - - $button = - $img[0] - . ''; - - $body_content = preg_replace( '/]+>/', $button, $body_content ); + $body_content = preg_replace_callback( + '/]+>/', + static function ( $img_match ) use ( $aria_label ) { + $button_html = WP_HTML::render( + <<<'HTML' + +HTML, + array( + 'label' => $aria_label, + 'interactivity' => array( + 'data-wp-on--click' => 'actions.core.image.showLightbox', + 'data-wp-style--right' => 'context.core.image.imageButtonRight', + 'data-wp-style--top' => 'context.core.image.imageButtonTop', + ), + ) + ); + + return $img_match[0] . $button_html; + }, + $body_content, + 1 + ); // We need both a responsive image and an enlarged image to animate // the zoom seamlessly on slow internet connections; the responsive @@ -295,40 +301,46 @@ class="lightbox-trigger" if ( wp_theme_has_theme_json() ) { $global_styles_color = wp_get_global_styles( array( 'color' ) ); if ( ! empty( $global_styles_color['background'] ) ) { - $background_color = esc_attr( $global_styles_color['background'] ); + $background_color = $global_styles_color['background']; } if ( ! empty( $global_styles_color['text'] ) ) { - $close_button_color = esc_attr( $global_styles_color['text'] ); + $close_button_color = $global_styles_color['text']; } } - $close_button_icon = ''; - $close_button_label = esc_attr__( 'Close' ); - - $lightbox_html = << - - - - - -HTML; + $lightbox_html = WP_HTML::render( + << + + + + + +HTML, + array( + 'background_color' => $background_color, + 'close_button_color' => $close_button_color, + 'close_label' => __( 'Close' ), + 'lightbox_animation_class' => $lightbox_animation, + 'interactivity' => array( + 'data-wp-bind--role' => 'selectors.core.image.roleAttribute', + 'data-wp-bind--aria-label' => 'selectors.core.image.dialogLabel', + 'data-wp-body' => '', + 'data-wp-class--initialized' => 'context.core.image.initialized', + 'data-wp-class--active' => 'context.core.image.lightboxEnabled', + 'data-wp-class--hideAnimationEnabled' => 'context.core.image.hideAnimationEnabled', + 'data-wp-bind--aria-modal' => 'selectors.core.image.ariaModal', + 'data-wp-effect' => 'effects.core.image.initLightbox', + 'data-wp-on--keydown' => 'actions.core.image.handleKeydown', + 'data-wp-on--touchstart' => 'actions.core.image.handleTouchStart', + 'data-wp-on--touchmove' => 'actions.core.image.handleTouchMove', + 'data-wp-on--touchend' => 'actions.core.image.handleTouchEnd', + 'data-wp-on--click' => 'actions.core.image.hideLightbox', + ), + ) + ); return str_replace( '', $lightbox_html . '', $body_content ); } diff --git a/src/wp-includes/canonical.php b/src/wp-includes/canonical.php index 0b94791fd9b45..093493731f91a 100644 --- a/src/wp-includes/canonical.php +++ b/src/wp-includes/canonical.php @@ -550,13 +550,23 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { $is_attachment_redirect = false; if ( is_attachment() && ! get_option( 'wp_attachment_pages_enabled' ) ) { - $attachment_id = get_query_var( 'attachment_id' ); - - if ( current_user_can( 'read_post', $attachment_id ) ) { - $redirect_url = wp_get_attachment_url( $attachment_id ); - - $is_attachment_redirect = true; + $attachment_id = get_query_var( 'attachment_id' ); + $attachment_post = get_post( $attachment_id ); + $attachment_parent_id = $attachment_post ? $attachment_post->post_parent : 0; + + $attachment_url = wp_get_attachment_url( $attachment_id ); + if ( $attachment_url !== $redirect_url ) { + /* + * If an attachment is attached to a post, it inherits the parent post's status. Fetch the + * parent post to check its status later. + */ + if ( $attachment_parent_id ) { + $redirect_obj = get_post( $attachment_parent_id ); + } + $redirect_url = $attachment_url; } + + $is_attachment_redirect = true; } $redirect['query'] = preg_replace( '#^\??&*?#', '', $redirect['query'] ); diff --git a/src/wp-includes/class-wp-editor.php b/src/wp-includes/class-wp-editor.php index 5d7ba224cc207..7f5ef265c6fbc 100644 --- a/src/wp-includes/class-wp-editor.php +++ b/src/wp-includes/class-wp-editor.php @@ -158,12 +158,8 @@ public static function parse_settings( $editor_id, $settings ) { */ public static function editor( $content, $editor_id, $settings = array() ) { $set = self::parse_settings( $editor_id, $settings ); - $editor_class = ' class="' . trim( esc_attr( $set['editor_class'] ) . ' wp-editor-area' ) . '"'; - $tabindex = $set['tabindex'] ? ' tabindex="' . (int) $set['tabindex'] . '"' : ''; $default_editor = 'html'; $buttons = ''; - $autocomplete = ''; - $editor_id_attr = esc_attr( $editor_id ); if ( $set['drag_drop_upload'] ) { self::$drag_drop_upload = true; @@ -180,8 +176,6 @@ public static function editor( $content, $editor_id, $settings = array() ) { } if ( self::$this_tinymce ) { - $autocomplete = ' autocomplete="off"'; - if ( self::$this_quicktags ) { $default_editor = $set['default_editor'] ? $set['default_editor'] : wp_default_editor(); // 'html' is used for the "Text" editor tab. @@ -189,10 +183,16 @@ public static function editor( $content, $editor_id, $settings = array() ) { $default_editor = 'tinymce'; } - $buttons .= '\n"; - $buttons .= '\n"; + $buttons .= WP_HTML::render( << + +HTML, + array( + 'id' => $editor_id, + 'text_label' => _x( 'Text', 'Name for the Text editor tab (formerly HTML)' ), + 'visual_label' => _x( 'Visual', 'Name for the Visual editor tab' ), + ) + ); } else { $default_editor = 'tinymce'; } @@ -201,11 +201,13 @@ public static function editor( $content, $editor_id, $settings = array() ) { $switch_class = 'html' === $default_editor ? 'html-active' : 'tmce-active'; $wrap_class = 'wp-core-ui wp-editor-wrap ' . $switch_class; - if ( $set['_content_editor_dfw'] ) { - $wrap_class .= ' has-dfw'; - } - - echo '
'; + echo WP_HTML::render( + '
', + array( + 'id' => $editor_id, + 'has_dfw' => $set['_content_editor_dfw'] ? 'has-dfw' : '', + ) + ); if ( self::$editor_buttons_css ) { wp_print_styles( 'editor-buttons' ); @@ -217,7 +219,10 @@ public static function editor( $content, $editor_id, $settings = array() ) { } if ( ! empty( $buttons ) || $set['media_buttons'] ) { - echo '
'; + echo WP_HTML::render( + '
', + array( 'id' => $editor_id ) + ); if ( $set['media_buttons'] ) { self::$has_medialib = true; @@ -226,7 +231,10 @@ public static function editor( $content, $editor_id, $settings = array() ) { require ABSPATH . 'wp-admin/includes/media.php'; } - echo '
'; + echo WP_HTML::render( + '
', + array( 'id' => $editor_id ) + ); /** * Fires after the default media button(s) are displayed. @@ -249,10 +257,13 @@ public static function editor( $content, $editor_id, $settings = array() ) { if ( 'content' === $editor_id && ! empty( $GLOBALS['current_screen'] ) && 'post' === $GLOBALS['current_screen']->base ) { $toolbar_id = 'ed_toolbar'; } else { - $toolbar_id = 'qt_' . $editor_id_attr . '_toolbar'; + $toolbar_id = 'qt_' . $editor_id . '_toolbar'; } - $quicktags_toolbar = '
'; + $quicktags_toolbar = WP_HTML::render( + '
', + array( 'id' => $toolbar_id ) + ); } /** @@ -264,10 +275,28 @@ public static function editor( $content, $editor_id, $settings = array() ) { */ $the_editor = apply_filters( 'the_editor', - '
' . + WP_HTML::render( + '
', + array( 'id' => $editor_id ) + ) . $quicktags_toolbar . - '%s
' + WP_HTML::render( + <<<'HTML' + +HTML, + array( + 'autocomplete' => self::$this_tinymce ? 'off' : null, + 'editor_class' => trim( "{$set['editor_class']} wp-editor-area" ), + 'height' => ! empty( $set['editor_height'] ) + ? array( 'style' => "height: {$set['editor_height']}px;" ) + : array( 'rows' => (string) $set['textarea_rows'] ), + 'id' => $editor_id, + 'name' => $set['textarea_name'], + 'tabindex' => $set['tabindex'] ? (string) $set['tabindex'] : null, + ) + ) . + '
' ); // Prepare the content for the Visual or Text editor, only when TinyMCE is used (back-compat). @@ -300,12 +329,16 @@ public static function editor( $content, $editor_id, $settings = array() ) { $content = apply_filters_deprecated( 'richedit_pre', array( $content ), '4.3.0', 'format_for_editor' ); } - if ( false !== stripos( $content, 'textarea' ) ) { - $content = preg_replace( '%next_tag( 'TEXTAREA' ) ) { + if ( $editor_id === $processor->get_attribute( 'id' ) ) { + $processor->set_modifiable_text( $content ); + break; + } } + $the_editor = $processor->get_updated_html(); - printf( $the_editor, $content ); - echo "\n
\n\n"; + echo "{$the_editor}\n
\n\n"; self::editor_settings( $editor_id, $set ); } diff --git a/src/wp-includes/class-wp-locale-switcher.php b/src/wp-includes/class-wp-locale-switcher.php index b3e163014aa90..9f1c4831ed446 100644 --- a/src/wp-includes/class-wp-locale-switcher.php +++ b/src/wp-includes/class-wp-locale-switcher.php @@ -283,7 +283,7 @@ private function change_locale( $locale ) { $wp_locale = new WP_Locale(); - WP_Translation_Controller::instance()->set_locale( $locale ); + WP_Translation_Controller::get_instance()->set_locale( $locale ); /** * Fires when the locale is switched to or restored. diff --git a/src/wp-includes/class-wp-theme-json.php b/src/wp-includes/class-wp-theme-json.php index 4094e115242c1..f73781b2b3181 100644 --- a/src/wp-includes/class-wp-theme-json.php +++ b/src/wp-includes/class-wp-theme-json.php @@ -935,7 +935,7 @@ protected static function get_blocks_metadata() { if ( $duotone_support ) { $root_selector = wp_get_block_css_selector( $block_type ); - $duotone_selector = WP_Theme_JSON::scope_selector( $root_selector, $duotone_support ); + $duotone_selector = static::scope_selector( $root_selector, $duotone_support ); } } @@ -1078,7 +1078,7 @@ public function get_stylesheet( $types = array( 'variables', 'styles', 'presets' $setting_nodes[ $root_settings_key ]['selector'] = $options['root_selector']; } if ( false !== $root_style_key ) { - $setting_nodes[ $root_style_key ]['selector'] = $options['root_selector']; + $style_nodes[ $root_style_key ]['selector'] = $options['root_selector']; } } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 8a3f981ae6363..4bde9e1c099c8 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -149,17 +149,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { */ const MAX_BOOKMARKS = 100; - /** - * Static query for instructing the Tag Processor to visit every token. - * - * @access private - * - * @since 6.4.0 - * - * @var array - */ - const VISIT_EVERYTHING = array( 'tag_closers' => 'visit' ); - /** * Holds the working state of the parser, including the stack of * open elements and the stack of active formatting elements. @@ -424,6 +413,30 @@ public function next_tag( $query = null ) { return false; } + /** + * Ensures internal accounting is maintained for HTML semantic rules while + * the underlying Tag Processor class is seeking to a bookmark. + * + * This doesn't currently have a way to represent non-tags and doesn't process + * semantic rules for text nodes. For access to the raw tokens consider using + * WP_HTML_Tag_Processor instead. + * + * @since 6.5.0 Added for internal support; do not use. + * + * @access private + * + * @return bool + */ + public function next_token() { + $found_a_token = parent::next_token(); + + if ( '#tag' === $this->get_token_type() ) { + $this->step( self::REPROCESS_CURRENT_NODE ); + } + + return $found_a_token; + } + /** * Indicates if the currently-matched tag matches the given breadcrumbs. * @@ -520,7 +533,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) { $this->state->stack_of_open_elements->pop(); } - parent::next_tag( self::VISIT_EVERYTHING ); + while ( parent::next_token() && '#tag' !== $this->get_token_type() ) { + continue; + } } // Finish stepping when there are no more tokens in the document. diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index ee6209c69e0ae..0618500426aaf 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -247,6 +247,95 @@ * } * } * + * ## Tokens and finer-grained processing. + * + * It's possible to scan through every lexical token in the + * HTML document using the `next_token()` function. This + * alternative form takes no argument and provides no built-in + * query syntax. + * + * Example: + * + * $title = '(untitled)'; + * $text = ''; + * while ( $processor->next_token() ) { + * switch ( $processor->get_token_name() ) { + * case '#text': + * $text .= $processor->get_modifiable_text(); + * break; + * + * case 'BR': + * $text .= "\n"; + * break; + * + * case 'TITLE': + * $title = $processor->get_modifiable_text(); + * break; + * } + * } + * return trim( "# {$title}\n\n{$text}" ); + * + * ### Tokens and _modifiable text_. + * + * #### Special "atomic" HTML elements. + * + * Not all HTML elements are able to contain other elements inside of them. + * For instance, the contents inside a TITLE element are plaintext (except + * that character references like & will be decoded). This means that + * if the string `` appears inside a TITLE element, then it's not an + * image tag, but rather it's text describing an image tag. Likewise, the + * contents of a SCRIPT or STYLE element are handled entirely separately in + * a browser than the contents of other elements because they represent a + * different language than HTML. + * + * For these elements the Tag Processor treats the entire sequence as one, + * from the opening tag, including its contents, through its closing tag. + * This means that the it's not possible to match the closing tag for a + * SCRIPT element unless it's unexpected; the Tag Processor already matched + * it when it found the opening tag. + * + * The inner contents of these elements are that element's _modifiable text_. + * + * The special elements are: + * - `SCRIPT` whose contents are treated as raw plaintext but supports a legacy + * style of including Javascript inside of HTML comments to avoid accidentally + * closing the SCRIPT from inside a Javascript string. E.g. `console.log( '' )`. + * - `TITLE` and `TEXTAREA` whose contents are treated as plaintext and then any + * character references are decoded. E.g. `1 < 2 < 3` becomes `1 < 2 < 3`. + * - `IFRAME`, `NOSCRIPT`, `NOEMBED`, `NOFRAME`, `STYLE` whose contents are treated as + * raw plaintext and left as-is. E.g. `1 < 2 < 3` remains `1 < 2 < 3`. + * + * #### Other tokens with modifiable text. + * + * There are also non-elements which are void/self-closing in nature and contain + * modifiable text that is part of that individual syntax token itself. + * + * - `#text` nodes, whose entire token _is_ the modifiable text. + * - HTML comments and tokens that become comments due to some syntax error. The + * text for these tokens is the portion of the comment inside of the syntax. + * E.g. for `` the text is `" comment "` (note the spaces are included). + * - `CDATA` sections, whose text is the content inside of the section itself. E.g. for + * `` the text is `"some content"` (with restrictions [1]). + * - "Funky comments," which are a special case of invalid closing tags whose name is + * invalid. The text for these nodes is the text that a browser would transform into + * an HTML comment when parsing. E.g. for `` the text is `%post_author`. + * - `DOCTYPE` declarations like `` which have no closing tag. + * - XML Processing instruction nodes like `` (with restrictions [2]). + * - The empty end tag `` which is ignored in the browser and DOM. + * + * [1]: There are no CDATA sections in HTML. When encountering `` becomes a bogus HTML comment, meaning there can be no CDATA + * section in an HTML document containing `>`. The Tag Processor will first find + * all valid and bogus HTML comments, and then if the comment _would_ have been a + * CDATA section _were they to exist_, it will indicate this as the type of comment. + * + * [2]: XML allows a broader range of characters in a processing instruction's target name + * and disallows "xml" as a name, since it's special. The Tag Processor only recognizes + * target names with an ASCII-representable subset of characters. It also exhibits the + * same constraint as with CDATA sections, in that `>` cannot exist within the token + * since Processing Instructions do no exist within HTML and their syntax transforms + * into a bogus comment in the DOM. + * * ## Design and limitations * * The Tag Processor is designed to linearly scan HTML documents and tokenize @@ -320,7 +409,8 @@ * @since 6.2.1 Fix: Support for various invalid comments; attribute updates are case-insensitive. * @since 6.3.2 Fix: Skip HTML-like content inside rawtext elements such as STYLE. * @since 6.5.0 Pauses processor when input ends in an incomplete syntax token. - * Introduces "special" elements which act like void elements, e.g. STYLE. + * Introduces "special" elements which act like void elements, e.g. TITLE, STYLE. + * Allows scanning through all tokens and processing modifiable text, where applicable. */ class WP_HTML_Tag_Processor { /** @@ -396,23 +486,47 @@ class WP_HTML_Tag_Processor { /** * Specifies mode of operation of the parser at any given time. * - * | State | Meaning | - * | --------------|----------------------------------------------------------------------| - * | *Ready* | The parser is ready to run. | - * | *Complete* | There is nothing left to parse. | - * | *Incomplete* | The HTML ended in the middle of a token; nothing more can be parsed. | - * | *Matched tag* | Found an HTML tag; it's possible to modify its attributes. | + * | State | Meaning | + * | ----------------|----------------------------------------------------------------------| + * | *Ready* | The parser is ready to run. | + * | *Complete* | There is nothing left to parse. | + * | *Incomplete* | The HTML ended in the middle of a token; nothing more can be parsed. | + * | *Matched tag* | Found an HTML tag; it's possible to modify its attributes. | + * | *Text node* | Found a #text node; this is plaintext and modifiable. | + * | *CDATA node* | Found a CDATA section; this is modifiable. | + * | *Comment* | Found a comment or bogus comment; this is modifiable. | + * | *Presumptuous* | Found an empty tag closer: ``. | + * | *Funky comment* | Found a tag closer with an invalid tag name; this is modifiable. | * * @since 6.5.0 * * @see WP_HTML_Tag_Processor::STATE_READY * @see WP_HTML_Tag_Processor::STATE_COMPLETE - * @see WP_HTML_Tag_Processor::STATE_INCOMPLETE + * @see WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT * @see WP_HTML_Tag_Processor::STATE_MATCHED_TAG + * @see WP_HTML_Tag_Processor::STATE_TEXT_NODE + * @see WP_HTML_Tag_Processor::STATE_CDATA_NODE + * @see WP_HTML_Tag_Processor::STATE_COMMENT + * @see WP_HTML_Tag_Processor::STATE_DOCTYPE + * @see WP_HTML_Tag_Processor::STATE_PRESUMPTUOUS_TAG + * @see WP_HTML_Tag_Processor::STATE_FUNKY_COMMENT * * @var string */ - private $parser_state = self::STATE_READY; + protected $parser_state = self::STATE_READY; + + /** + * What kind of syntax token became an HTML comment. + * + * Since there are many ways in which HTML syntax can create an HTML comment, + * this indicates which of those caused it. This allows the Tag Processor to + * represent more from the original input document than would appear in the DOM. + * + * @since 6.5.0 + * + * @var string|null + */ + protected $comment_type = null; /** * How many bytes from the original HTML document have been read and parsed. @@ -490,6 +604,24 @@ class WP_HTML_Tag_Processor { */ private $tag_name_length; + /** + * Byte offset into input document where current modifiable text starts. + * + * @since 6.5.0 + * + * @var int + */ + private $text_starts_at; + + /** + * Byte length of modifiable text. + * + * @since 6.5.0 + * + * @var string + */ + private $text_length; + /** * Whether the current tag is an opening tag, e.g.
, or a closing tag, e.g.
. * @@ -705,13 +837,13 @@ public function next_tag( $query = null ) { * @return bool Whether a token was parsed. */ public function next_token() { - $this->get_updated_html(); $was_at = $this->bytes_already_parsed; + $this->get_updated_html(); // Don't proceed if there's nothing more to scan. if ( self::STATE_COMPLETE === $this->parser_state || - self::STATE_INCOMPLETE === $this->parser_state + self::STATE_INCOMPLETE_INPUT === $this->parser_state ) { return false; } @@ -729,13 +861,27 @@ public function next_token() { // Find the next tag if it exists. if ( false === $this->parse_next_tag() ) { - if ( self::STATE_INCOMPLETE === $this->parser_state ) { + if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) { $this->bytes_already_parsed = $was_at; } return false; } + /* + * For legacy reasons the rest of this function handles tags and their + * attributes. If the processor has reached the end of the document + * or if it matched any other token then it should return here to avoid + * attempting to process tag-specific syntax. + */ + if ( + self::STATE_INCOMPLETE_INPUT !== $this->parser_state && + self::STATE_COMPLETE !== $this->parser_state && + self::STATE_MATCHED_TAG !== $this->parser_state + ) { + return true; + } + // Parse all of its attributes. while ( $this->parse_next_attribute() ) { continue; @@ -743,11 +889,11 @@ public function next_token() { // Ensure that the tag closes before the end of the document. if ( - self::STATE_INCOMPLETE === $this->parser_state || + self::STATE_INCOMPLETE_INPUT === $this->parser_state || $this->bytes_already_parsed >= strlen( $this->html ) ) { // Does this appropriately clear state (parsed attributes)? - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; $this->bytes_already_parsed = $was_at; return false; @@ -755,14 +901,14 @@ public function next_token() { $tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed ); if ( false === $tag_ends_at ) { - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; $this->bytes_already_parsed = $was_at; return false; } $this->parser_state = self::STATE_MATCHED_TAG; $this->token_length = $tag_ends_at - $this->token_starts_at; - $this->bytes_already_parsed = $tag_ends_at; + $this->bytes_already_parsed = $tag_ends_at + 1; /* * For non-DATA sections which might contain text that looks like HTML tags but @@ -771,8 +917,8 @@ public function next_token() { */ $t = $this->html[ $this->tag_name_starts_at ]; if ( - ! $this->is_closing_tag && - ( + $this->is_closing_tag || + ! ( 'i' === $t || 'I' === $t || 'n' === $t || 'N' === $t || 's' === $t || 'S' === $t || @@ -780,38 +926,81 @@ public function next_token() { 'x' === $t || 'X' === $t ) ) { - $tag_name = $this->get_tag(); + return true; + } - if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) { - $this->parser_state = self::STATE_INCOMPLETE; - $this->bytes_already_parsed = $was_at; + $tag_name = $this->get_tag(); - return false; - } elseif ( - ( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name ) && - ! $this->skip_rcdata( $tag_name ) - ) { - $this->parser_state = self::STATE_INCOMPLETE; - $this->bytes_already_parsed = $was_at; + /* + * Preserve the opening tag pointers, as these will be overwritten + * when finding the closing tag. They will be reset after finding + * the closing to tag to point to the opening of the special atomic + * tag sequence. + */ + $tag_name_starts_at = $this->tag_name_starts_at; + $tag_name_length = $this->tag_name_length; + $tag_ends_at = $this->token_starts_at + $this->token_length; + $attributes = $this->attributes; + $duplicate_attributes = $this->duplicate_attributes; + + // Find the closing tag if necessary. + $found_closer = false; + switch ( $tag_name ) { + case 'SCRIPT': + $found_closer = $this->skip_script_data(); + break; - return false; - } elseif ( - ( - 'IFRAME' === $tag_name || - 'NOEMBED' === $tag_name || - 'NOFRAMES' === $tag_name || - 'STYLE' === $tag_name || - 'XMP' === $tag_name - ) && - ! $this->skip_rawtext( $tag_name ) - ) { - $this->parser_state = self::STATE_INCOMPLETE; - $this->bytes_already_parsed = $was_at; + case 'TEXTAREA': + case 'TITLE': + $found_closer = $this->skip_rcdata( $tag_name ); + break; - return false; - } + /* + * In the browser this list would include the NOSCRIPT element, + * but the Tag Processor is an environment with the scripting + * flag disabled, meaning that it needs to descend into the + * NOSCRIPT element to be able to properly process what will be + * sent to a browser. + * + * Note that this rule makes HTML5 syntax incompatible with XML, + * because the parsing of this token depends on client application. + * The NOSCRIPT element cannot be represented in the XHTML syntax. + */ + case 'IFRAME': + case 'NOEMBED': + case 'NOFRAMES': + case 'STYLE': + case 'XMP': + $found_closer = $this->skip_rawtext( $tag_name ); + break; + + // No other tags should be treated in their entirety here. + default: + return true; } + if ( ! $found_closer ) { + $this->parser_state = self::STATE_INCOMPLETE_INPUT; + $this->bytes_already_parsed = $was_at; + return false; + } + + /* + * The values here look like they reference the opening tag but they reference + * the closing tag instead. This is why the opening tag values were stored + * above in a variable. It reads confusingly here, but that's because the + * functions that skip the contents have moved all the internal cursors past + * the inner content of the tag. + */ + $this->token_starts_at = $was_at; + $this->token_length = $this->bytes_already_parsed - $this->token_starts_at; + $this->text_starts_at = $tag_ends_at + 1; + $this->text_length = $this->tag_name_starts_at - $this->text_starts_at; + $this->tag_name_starts_at = $tag_name_starts_at; + $this->tag_name_length = $tag_name_length; + $this->attributes = $attributes; + $this->duplicate_attributes = $duplicate_attributes; + return true; } @@ -830,7 +1019,7 @@ public function next_token() { * @return bool Whether the parse paused at the start of an incomplete token. */ public function paused_at_incomplete_token() { - return self::STATE_INCOMPLETE === $this->parser_state; + return self::STATE_INCOMPLETE_INPUT === $this->parser_state; } /** @@ -1007,7 +1196,10 @@ public function has_class( $wanted_class ) { */ public function set_bookmark( $name ) { // It only makes sense to set a bookmark if the parser has paused on a concrete token. - if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { + if ( + self::STATE_COMPLETE === $this->parser_state || + self::STATE_INCOMPLETE_INPUT === $this->parser_state + ) { return false; } @@ -1082,15 +1274,15 @@ private function skip_rcdata( $tag_name ) { $at = $this->bytes_already_parsed; while ( false !== $at && $at < $doc_length ) { - $at = strpos( $this->html, 'html, 'tag_name_starts_at = $at; // Fail if there is no possible tag closer. if ( false === $at || ( $at + $tag_length ) >= $doc_length ) { return false; } - $closer_potentially_starts_at = $at; - $at += 2; + $at += 2; /* * Find a case-insensitive match to the tag name. @@ -1131,13 +1323,23 @@ private function skip_rcdata( $tag_name ) { while ( $this->parse_next_attribute() ) { continue; } + $at = $this->bytes_already_parsed; if ( $at >= strlen( $this->html ) ) { return false; } - if ( '>' === $html[ $at ] || '/' === $html[ $at ] ) { - $this->bytes_already_parsed = $closer_potentially_starts_at; + if ( '>' === $html[ $at ] ) { + $this->bytes_already_parsed = $at + 1; + return true; + } + + if ( $at + 1 >= strlen( $this->html ) ) { + return false; + } + + if ( '/' === $html[ $at ] && '>' === $html[ $at + 1 ] ) { + $this->bytes_already_parsed = $at + 2; return true; } } @@ -1259,6 +1461,7 @@ private function skip_script_data() { if ( $is_closing ) { $this->bytes_already_parsed = $closer_potentially_starts_at; + $this->tag_name_starts_at = $closer_potentially_starts_at; if ( $this->bytes_already_parsed >= $doc_length ) { return false; } @@ -1268,13 +1471,13 @@ private function skip_script_data() { } if ( $this->bytes_already_parsed >= $doc_length ) { - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } if ( '>' === $html[ $this->bytes_already_parsed ] ) { - $this->bytes_already_parsed = $closer_potentially_starts_at; + ++$this->bytes_already_parsed; return true; } } @@ -1303,17 +1506,34 @@ private function parse_next_tag() { $html = $this->html; $doc_length = strlen( $html ); - $at = $this->bytes_already_parsed; + $was_at = $this->bytes_already_parsed; + $at = $was_at; while ( false !== $at && $at < $doc_length ) { $at = strpos( $html, '<', $at ); + if ( $at > $was_at ) { + $this->parser_state = self::STATE_TEXT_NODE; + $this->token_starts_at = $was_at; + $this->token_length = $at - $was_at; + $this->text_starts_at = $was_at; + $this->text_length = $this->token_length; + $this->bytes_already_parsed = $at; + return true; + } + /* * This does not imply an incomplete parse; it indicates that there * can be nothing left in the document other than a #text node. */ if ( false === $at ) { - return false; + $this->parser_state = self::STATE_TEXT_NODE; + $this->token_starts_at = $was_at; + $this->token_length = strlen( $html ) - $was_at; + $this->text_starts_at = $was_at; + $this->text_length = $this->token_length; + $this->bytes_already_parsed = strlen( $html ); + return true; } $this->token_starts_at = $at; @@ -1342,8 +1562,9 @@ private function parse_next_tag() { $tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 ); if ( $tag_name_prefix_length > 0 ) { ++$at; - $this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length ); + $this->parser_state = self::STATE_MATCHED_TAG; $this->tag_name_starts_at = $at; + $this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length ); $this->bytes_already_parsed = $at + $this->tag_name_length; return true; } @@ -1353,18 +1574,18 @@ private function parse_next_tag() { * the document. There is nothing left to parse. */ if ( $at + 1 >= $doc_length ) { - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } /* - * + * ``. Unlike other comment + * and bogus comment syntax, these leave no clear insertion point for text and + * they need to be modified specially in order to contain text. E.g. to store + * `?` as the modifiable text, the `` needs to become ``, which + * involves inserting an additional `-` into the token after the modifiable text. + */ + $this->parser_state = self::STATE_COMMENT; + $this->comment_type = self::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT; + $this->token_length = $closer_at + $span_of_dashes + 1 - $this->token_starts_at; + + // Only provide modifiable text if the token is long enough to contain it. + if ( $span_of_dashes >= 2 ) { + $this->comment_type = self::COMMENT_AS_HTML_COMMENT; + $this->text_starts_at = $this->token_starts_at + 4; + $this->text_length = $span_of_dashes - 2; + } + + $this->bytes_already_parsed = $closer_at + $span_of_dashes + 1; + return true; } /* @@ -1397,51 +1637,39 @@ private function parse_next_tag() { while ( ++$closer_at < $doc_length ) { $closer_at = strpos( $html, '--', $closer_at ); if ( false === $closer_at ) { - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } if ( $closer_at + 2 < $doc_length && '>' === $html[ $closer_at + 2 ] ) { - $at = $closer_at + 3; - continue 2; + $this->parser_state = self::STATE_COMMENT; + $this->comment_type = self::COMMENT_AS_HTML_COMMENT; + $this->token_length = $closer_at + 3 - $this->token_starts_at; + $this->text_starts_at = $this->token_starts_at + 4; + $this->text_length = $closer_at - $this->text_starts_at; + $this->bytes_already_parsed = $closer_at + 3; + return true; } - if ( $closer_at + 3 < $doc_length && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) { - $at = $closer_at + 4; - continue 2; + if ( + $closer_at + 3 < $doc_length && + '!' === $html[ $closer_at + 2 ] && + '>' === $html[ $closer_at + 3 ] + ) { + $this->parser_state = self::STATE_COMMENT; + $this->comment_type = self::COMMENT_AS_HTML_COMMENT; + $this->token_length = $closer_at + 4 - $this->token_starts_at; + $this->text_starts_at = $this->token_starts_at + 4; + $this->text_length = $closer_at - $this->text_starts_at; + $this->bytes_already_parsed = $closer_at + 4; + return true; } } } /* - * - * The CDATA is case-sensitive. - * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state - */ - if ( - $doc_length > $at + 8 && - '[' === $html[ $at + 2 ] && - 'C' === $html[ $at + 3 ] && - 'D' === $html[ $at + 4 ] && - 'A' === $html[ $at + 5 ] && - 'T' === $html[ $at + 6 ] && - 'A' === $html[ $at + 7 ] && - '[' === $html[ $at + 8 ] - ) { - $closer_at = strpos( $html, ']]>', $at + 9 ); - if ( false === $closer_at ) { - $this->parser_state = self::STATE_INCOMPLETE; - - return false; - } - - $at = $closer_at + 3; - continue; - } - - /* - * + * ` * These are ASCII-case-insensitive. * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ @@ -1457,13 +1685,17 @@ private function parse_next_tag() { ) { $closer_at = strpos( $html, '>', $at + 9 ); if ( false === $closer_at ) { - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } - $at = $closer_at + 1; - continue; + $this->parser_state = self::STATE_DOCTYPE; + $this->token_length = $closer_at + 1 - $this->token_starts_at; + $this->text_starts_at = $this->token_starts_at + 9; + $this->text_length = $closer_at - $this->text_starts_at; + $this->bytes_already_parsed = $closer_at + 1; + return true; } /* @@ -1471,14 +1703,53 @@ private function parse_next_tag() { * to the bogus comment state - skip to the nearest >. If no closer is * found then the HTML was truncated inside the markup declaration. */ - $at = strpos( $html, '>', $at + 1 ); - if ( false === $at ) { - $this->parser_state = self::STATE_INCOMPLETE; + $closer_at = strpos( $html, '>', $at + 1 ); + if ( false === $closer_at ) { + $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } - continue; + $this->parser_state = self::STATE_COMMENT; + $this->comment_type = self::COMMENT_AS_INVALID_HTML; + $this->token_length = $closer_at + 1 - $this->token_starts_at; + $this->text_starts_at = $this->token_starts_at + 2; + $this->text_length = $closer_at - $this->text_starts_at; + $this->bytes_already_parsed = $closer_at + 1; + + /* + * Identify nodes that would be CDATA if HTML had CDATA sections. + * + * This section must occur after identifying the bogus comment end + * because in an HTML parser it will span to the nearest `>`, even + * if there's no `]]>` as would be required in an XML document. It + * is therefore not possible to parse a CDATA section containing + * a `>` in the HTML syntax. + * + * Inside foreign elements there is a discrepancy between browsers + * and the specification on this. + * + * @todo Track whether the Tag Processor is inside a foreign element + * and require the proper closing `]]>` in those cases. + */ + if ( + $this->token_length >= 10 && + '[' === $html[ $this->token_starts_at + 2 ] && + 'C' === $html[ $this->token_starts_at + 3 ] && + 'D' === $html[ $this->token_starts_at + 4 ] && + 'A' === $html[ $this->token_starts_at + 5 ] && + 'T' === $html[ $this->token_starts_at + 6 ] && + 'A' === $html[ $this->token_starts_at + 7 ] && + '[' === $html[ $this->token_starts_at + 8 ] && + ']' === $html[ $closer_at - 1 ] + ) { + $this->parser_state = self::STATE_COMMENT; + $this->comment_type = self::COMMENT_AS_CDATA_LOOKALIKE; + $this->text_starts_at += 7; + $this->text_length -= 9; + } + + return true; } /* @@ -1491,30 +1762,80 @@ private function parse_next_tag() { * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name */ if ( '>' === $html[ $at + 1 ] ) { - ++$at; - continue; + $this->parser_state = self::STATE_PRESUMPTUOUS_TAG; + $this->token_length = $at + 2 - $this->token_starts_at; + $this->bytes_already_parsed = $at + 2; + return true; } /* - * + * ` * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ if ( '?' === $html[ $at + 1 ] ) { $closer_at = strpos( $html, '>', $at + 2 ); if ( false === $closer_at ) { - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } - $at = $closer_at + 1; - continue; + $this->parser_state = self::STATE_COMMENT; + $this->comment_type = self::COMMENT_AS_INVALID_HTML; + $this->token_length = $closer_at + 1 - $this->token_starts_at; + $this->text_starts_at = $this->token_starts_at + 2; + $this->text_length = $closer_at - $this->text_starts_at; + $this->bytes_already_parsed = $closer_at + 1; + + /* + * Identify a Processing Instruction node were HTML to have them. + * + * This section must occur after identifying the bogus comment end + * because in an HTML parser it will span to the nearest `>`, even + * if there's no `?>` as would be required in an XML document. It + * is therefore not possible to parse a Processing Instruction node + * containing a `>` in the HTML syntax. + * + * XML allows for more target names, but this code only identifies + * those with ASCII-representable target names. This means that it + * may identify some Processing Instruction nodes as bogus comments, + * but it will not misinterpret the HTML structure. By limiting the + * identification to these target names the Tag Processor can avoid + * the need to start parsing UTF-8 sequences. + * + * > NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | + * [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | + * [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | + * [#x10000-#xEFFFF] + * > NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] + * + * @see https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PITarget + */ + if ( $this->token_length >= 5 && '?' === $html[ $closer_at - 1 ] ) { + $comment_text = substr( $html, $this->token_starts_at + 2, $this->token_length - 4 ); + $pi_target_length = strspn( $comment_text, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ:_' ); + + if ( 0 < $pi_target_length ) { + $pi_target_length += strspn( $comment_text, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:_-.', $pi_target_length ); + + $this->comment_type = self::COMMENT_AS_PI_NODE_LOOKALIKE; + $this->tag_name_starts_at = $this->token_starts_at + 2; + $this->tag_name_length = $pi_target_length; + $this->text_starts_at += $pi_target_length; + $this->text_length -= $pi_target_length + 1; + } + } + + return true; } /* * If a non-alpha starts the tag name in a tag closer it's a comment. * Find the first `>`, which closes the comment. * + * This parser classifies these particular comments as special "funky comments" + * which are made available for further processing. + * * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name */ if ( $this->is_closing_tag ) { @@ -1525,13 +1846,17 @@ private function parse_next_tag() { $closer_at = strpos( $html, '>', $at + 3 ); if ( false === $closer_at ) { - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } - $at = $closer_at + 1; - continue; + $this->parser_state = self::STATE_FUNKY_COMMENT; + $this->token_length = $closer_at + 1 - $this->token_starts_at; + $this->text_starts_at = $this->token_starts_at + 2; + $this->text_length = $closer_at - $this->text_starts_at; + $this->bytes_already_parsed = $closer_at + 1; + return true; } ++$at; @@ -1551,7 +1876,7 @@ private function parse_next_attribute() { // Skip whitespace and slashes. $this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n/", $this->bytes_already_parsed ); if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } @@ -1575,14 +1900,14 @@ private function parse_next_attribute() { $attribute_name = substr( $this->html, $attribute_start, $name_length ); $this->bytes_already_parsed += $name_length; if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } $this->skip_whitespace(); if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } @@ -1592,7 +1917,7 @@ private function parse_next_attribute() { ++$this->bytes_already_parsed; $this->skip_whitespace(); if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } @@ -1620,7 +1945,7 @@ private function parse_next_attribute() { } if ( $attribute_end >= strlen( $this->html ) ) { - $this->parser_state = self::STATE_INCOMPLETE; + $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } @@ -1692,8 +2017,11 @@ private function after_tag() { $this->token_length = null; $this->tag_name_starts_at = null; $this->tag_name_length = null; + $this->text_starts_at = null; + $this->text_length = null; $this->is_closing_tag = null; $this->attributes = array(); + $this->comment_type = null; $this->duplicate_attributes = null; } @@ -1985,7 +2313,7 @@ public function seek( $bookmark_name ) { // Point this tag processor before the sought tag opener and consume it. $this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start; - return $this->next_tag( array( 'tag_closers' => 'visit' ) ); + return $this->next_token(); } /** @@ -2216,13 +2544,24 @@ public function get_attribute_names_with_prefix( $prefix ) { * @return string|null Name of currently matched tag in input HTML, or `null` if none found. */ public function get_tag() { - if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { + if ( null === $this->tag_name_starts_at ) { return null; } $tag_name = substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length ); - return strtoupper( $tag_name ); + if ( self::STATE_MATCHED_TAG === $this->parser_state ) { + return strtoupper( $tag_name ); + } + + if ( + self::STATE_COMMENT === $this->parser_state && + self::COMMENT_AS_PI_NODE_LOOKALIKE === $this->get_comment_type() + ) { + return $tag_name; + } + + return null; } /** @@ -2281,6 +2620,235 @@ public function is_tag_closer() { ); } + /** + * Indicates the kind of matched token, if any. + * + * This differs from `get_token_name()` in that it always + * returns a static string indicating the type, whereas + * `get_token_name()` may return values derived from the + * token itself, such as a tag name or processing + * instruction tag. + * + * Possible values: + * - `#tag` when matched on a tag. + * - `#text` when matched on a text node. + * - `#cdata-section` when matched on a CDATA node. + * - `#comment` when matched on a comment. + * - `#doctype` when matched on a DOCTYPE declaration. + * - `#presumptuous-tag` when matched on an empty tag closer. + * - `#funky-comment` when matched on a funky comment. + * + * @since 6.5.0 + * + * @return string|null What kind of token is matched, or null. + */ + public function get_token_type() { + switch ( $this->parser_state ) { + case self::STATE_MATCHED_TAG: + return '#tag'; + + case self::STATE_DOCTYPE: + return '#doctype'; + + default: + return $this->get_token_name(); + } + } + + /** + * Returns the node name represented by the token. + * + * This matches the DOM API value `nodeName`. Some values + * are static, such as `#text` for a text node, while others + * are dynamically generated from the token itself. + * + * Dynamic names: + * - Uppercase tag name for tag matches. + * - `html` for DOCTYPE declarations. + * + * Note that if the Tag Processor is not matched on a token + * then this function will return `null`, either because it + * hasn't yet found a token or because it reached the end + * of the document without matching a token. + * + * @since 6.5.0 + * + * @return string|null Name of the matched token. + */ + public function get_token_name() { + switch ( $this->parser_state ) { + case self::STATE_MATCHED_TAG: + return $this->get_tag(); + + case self::STATE_TEXT_NODE: + return '#text'; + + case self::STATE_CDATA_NODE: + return '#cdata-section'; + + case self::STATE_COMMENT: + return '#comment'; + + case self::STATE_DOCTYPE: + return 'html'; + + case self::STATE_PRESUMPTUOUS_TAG: + return '#presumptuous-tag'; + + case self::STATE_FUNKY_COMMENT: + return '#funky-comment'; + } + } + + /** + * Indicates what kind of comment produced the comment node. + * + * Because there are different kinds of HTML syntax which produce + * comments, the Tag Processor tracks and exposes this as a type + * for the comment. Nominally only regular HTML comments exist as + * they are commonly known, but a number of unrelated syntax errors + * also produce comments. + * + * @see self::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT + * @see self::COMMENT_AS_CDATA_LOOKALIKE + * @see self::COMMENT_AS_INVALID_HTML + * @see self::COMMENT_AS_HTML_COMMENT + * @see self::COMMENT_AS_PI_NODE_LOOKALIKE + * + * @since 6.5.0 + * + * @return string|null + */ + public function get_comment_type() { + if ( self::STATE_COMMENT !== $this->parser_state ) { + return null; + } + + return $this->comment_type; + } + + /** + * Returns the modifiable text for a matched token, or an empty string. + * + * Modifiable text is text content that may be read and changed without + * changing the HTML structure of the document around it. This includes + * the contents of `#text` nodes in the HTML as well as the inner + * contents of HTML comments, Processing Instructions, and others, even + * though these nodes aren't part of a parsed DOM tree. They also contain + * the contents of SCRIPT and STYLE tags, of TEXTAREA tags, and of any + * other section in an HTML document which cannot contain HTML markup (DATA). + * + * If a token has no modifiable text then an empty string is returned to + * avoid needless crashing or type errors. An empty string does not mean + * that a token has modifiable text, and a token with modifiable text may + * have an empty string (e.g. a comment with no contents). + * + * @since 6.5.0 + * + * @return string + */ + public function get_modifiable_text() { + if ( null === $this->text_starts_at ) { + return ''; + } + + $text = substr( $this->html, $this->text_starts_at, $this->text_length ); + + // Comment data is not decoded. + if ( + self::STATE_CDATA_NODE === $this->parser_state || + self::STATE_COMMENT === $this->parser_state || + self::STATE_DOCTYPE === $this->parser_state || + self::STATE_FUNKY_COMMENT === $this->parser_state + ) { + return $text; + } + + $tag_name = $this->get_tag(); + if ( + // Script data is not decoded. + 'SCRIPT' === $tag_name || + + // RAWTEXT data is not decoded. + 'IFRAME' === $tag_name || + 'NOEMBED' === $tag_name || + 'NOFRAMES' === $tag_name || + 'STYLE' === $tag_name || + 'XMP' === $tag_name + ) { + return $text; + } + + $decoded = html_entity_decode( $text, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE ); + + if ( empty( $decoded ) ) { + return ''; + } + + /* + * TEXTAREA skips a leading newline, but this newline may appear not only as the + * literal character `\n`, but also as a character reference, such as in the + * following markup: ``. + * + * For these cases it's important to first decode the text content before checking + * for a leading newline and removing it. + */ + if ( + self::STATE_MATCHED_TAG === $this->parser_state && + 'TEXTAREA' === $tag_name && + strlen( $decoded ) > 0 && + "\n" === $decoded[0] + ) { + return substr( $decoded, 1 ); + } + + return $decoded; + } + + /** + * Sets the modifiable text for the matched token, if possible. + * + * @param string $text Replace the modifiable text with this string. + * @return bool Whether the modifiable text was updated. + */ + public function set_modifiable_text( $text ) { + if ( null === $this->text_starts_at || ! is_string( $text ) ) { + return false; + } + + switch ( $this->get_token_name() ) { + case '#text': + $this->lexical_updates[] = new WP_HTML_Text_Replacement( + $this->text_starts_at, + $this->text_length, + esc_html( $text ) + ); + break; + + case 'TEXTAREA': + $this->lexical_updates[] = new WP_HTML_Text_Replacement( + $this->text_starts_at, + $this->text_length, + preg_replace( '~lexical_updates[] = new WP_HTML_Text_Replacement( + $this->text_starts_at, + $this->text_length, + preg_replace( '~get_updated_html(); + return true; + } + /** * Updates or creates a new attribute on the currently matched tag with the passed value. * @@ -2354,14 +2922,37 @@ public function set_attribute( $name, $value ) { * > To represent a false value, the attribute has to be omitted altogether. * - HTML5 spec, https://html.spec.whatwg.org/#boolean-attributes */ - if ( false === $value ) { + if ( false === $value || null === $value ) { return $this->remove_attribute( $name ); } if ( true === $value ) { $updated_attribute = $name; } else { - $escaped_new_value = esc_attr( $value ); + $tag_name = $this->get_tag(); + $comparable_name = strtolower( $name ); + + /* + * Escape URL attributes. + * + * @see https://html.spec.whatwg.org/#attributes-3 + */ + if ( + ! str_starts_with( $value, 'data:' ) && ( + 'cite' === $comparable_name || + 'formaction' === $comparable_name || + 'href' === $comparable_name || + 'ping' === $comparable_name || + 'src' === $comparable_name || + ( 'FORM' === $tag_name && 'action' === $comparable_name ) || + ( 'OBJECT' === $tag_name && 'data' === $comparable_name ) || + ( 'VIDEO' === $tag_name && 'poster' === $comparable_name ) + ) + ) { + $escaped_new_value = esc_url( $value ); + } else { + $escaped_new_value = esc_attr( $value ); + } $updated_attribute = "{$name}=\"{$escaped_new_value}\""; } @@ -2746,7 +3337,7 @@ private function matches() { } /** - * Parser Ready State + * Parser Ready State. * * Indicates that the parser is ready to run and waiting for a state transition. * It may not have started yet, or it may have just finished parsing a token and @@ -2759,7 +3350,7 @@ private function matches() { const STATE_READY = 'STATE_READY'; /** - * Parser Complete State + * Parser Complete State. * * Indicates that the parser has reached the end of the document and there is * nothing left to scan. It finished parsing the last token completely. @@ -2771,7 +3362,7 @@ private function matches() { const STATE_COMPLETE = 'STATE_COMPLETE'; /** - * Parser Incomplete State + * Parser Incomplete Input State. * * Indicates that the parser has reached the end of the document before finishing * a token. It started parsing a token but there is a possibility that the input @@ -2784,10 +3375,10 @@ private function matches() { * * @access private */ - const STATE_INCOMPLETE = 'STATE_INCOMPLETE'; + const STATE_INCOMPLETE_INPUT = 'STATE_INCOMPLETE_INPUT'; /** - * Parser Matched Tag State + * Parser Matched Tag State. * * Indicates that the parser has found an HTML tag and it's possible to get * the tag name and read or modify its attributes (if it's not a closing tag). @@ -2797,4 +3388,153 @@ private function matches() { * @access private */ const STATE_MATCHED_TAG = 'STATE_MATCHED_TAG'; + + /** + * Parser Text Node State. + * + * Indicates that the parser has found a text node and it's possible + * to read and modify that text. + * + * @since 6.5.0 + * + * @access private + */ + const STATE_TEXT_NODE = 'STATE_TEXT_NODE'; + + /** + * Parser CDATA Node State. + * + * Indicates that the parser has found a CDATA node and it's possible + * to read and modify its modifiable text. Note that in HTML there are + * no CDATA nodes outside of foreign content (SVG and MathML). Outside + * of foreign content, they are treated as HTML comments. + * + * @since 6.5.0 + * + * @access private + */ + const STATE_CDATA_NODE = 'STATE_CDATA_NODE'; + + /** + * Indicates that the parser has found an HTML comment and it's + * possible to read and modify its modifiable text. + * + * @since 6.5.0 + * + * @access private + */ + const STATE_COMMENT = 'STATE_COMMENT'; + + /** + * Indicates that the parser has found a DOCTYPE node and it's + * possible to read and modify its modifiable text. + * + * @since 6.5.0 + * + * @access private + */ + const STATE_DOCTYPE = 'STATE_DOCTYPE'; + + /** + * Indicates that the parser has found an empty tag closer ``. + * + * Note that in HTML there are no empty tag closers, and they + * are ignored. Nonetheless, the Tag Processor still + * recognizes them as they appear in the HTML stream. + * + * These were historically discussed as a "presumptuous tag + * closer," which would close the nearest open tag, but were + * dismissed in favor of explicitly-closing tags. + * + * @since 6.5.0 + * + * @access private + */ + const STATE_PRESUMPTUOUS_TAG = 'STATE_PRESUMPTUOUS_TAG'; + + /** + * Indicates that the parser has found a "funky comment" + * and it's possible to read and modify its modifiable text. + * + * Example: + * + * + * + * + * + * Funky comments are tag closers with invalid tag names. Note + * that in HTML these are turn into bogus comments. Nonetheless, + * the Tag Processor recognizes them in a stream of HTML and + * exposes them for inspection and modification. + * + * @since 6.5.0 + * + * @access private + */ + const STATE_FUNKY_COMMENT = 'STATE_WP_FUNKY'; + + /** + * Indicates that a comment was created when encountering abruptly-closed HTML comment. + * + * Example: + * + * + * + * + * @since 6.5.0 + */ + const COMMENT_AS_ABRUPTLY_CLOSED_COMMENT = 'COMMENT_AS_ABRUPTLY_CLOSED_COMMENT'; + + /** + * Indicates that a comment would be parsed as a CDATA node, + * were HTML to allow CDATA nodes outside of foreign content. + * + * Example: + * + * + * + * This is an HTML comment, but it looks like a CDATA node. + * + * @since 6.5.0 + */ + const COMMENT_AS_CDATA_LOOKALIKE = 'COMMENT_AS_CDATA_LOOKALIKE'; + + /** + * Indicates that a comment was created when encountering + * normative HTML comment syntax. + * + * Example: + * + * + * + * @since 6.5.0 + */ + const COMMENT_AS_HTML_COMMENT = 'COMMENT_AS_HTML_COMMENT'; + + /** + * Indicates that a comment would be parsed as a Processing + * Instruction node, were they to exist within HTML. + * + * Example: + * + * + * + * This is an HTML comment, but it looks like a CDATA node. + * + * @since 6.5.0 + */ + const COMMENT_AS_PI_NODE_LOOKALIKE = 'COMMENT_AS_PI_NODE_LOOKALIKE'; + + /** + * Indicates that a comment was created when encountering invalid + * HTML input, a so-called "bogus comment." + * + * Example: + * + * + * + * + * @since 6.5.0 + */ + const COMMENT_AS_INVALID_HTML = 'COMMENT_AS_INVALID_HTML'; } diff --git a/src/wp-includes/html-api/class-wp-html-template.php b/src/wp-includes/html-api/class-wp-html-template.php new file mode 100644 index 0000000000000..aeed2aff7c3ab --- /dev/null +++ b/src/wp-includes/html-api/class-wp-html-template.php @@ -0,0 +1,166 @@ +">', + * array( + * 'profile_url' => 'https://profiles.example.com/username', + * 'name' => $user->display_name + * ) + * ); + * // Outputs: Bobby Tables + * + * Do not escape the values supplied to the argument array! This function will escape each + * parameter's value as needed and additional manual escaping may lead to incorrect output. + * + * ## Syntax. + * + * ### Substitution Placeholders. + * + * - `` finds `named_arg` in the arguments array, escapes its value if possible, + * and replaces the placeholder with the escaped value. These may exist inside double-quoted + * HTML tag attributes or in HTML text content between tags. They cannot be used to output a tag + * name or content inside a comment. + * + * ### Spread Attributes. + * + * - `...named_arg` when found within an HTML tag will lookup `named_arg` in the arguments array + * and, if it's an array, will set the attribute on the tag for each key/value pair whose value + * is a string. The + * + * ## Notes. + * + * - Attributes may only be supplied for a limited set of types: a string value assigns a double-quoted + * attribute value; `true` sets the attribute as a boolean attribute; `null` removes the attribute. + * If provided any other type of value the attribute will be ignored and its existing value persists. + * + * - If multiple HTML attributes are specified for a given tag they will be applied as if calling + * `set_attribute()` in the order they are specified in the temlpate. This includes any attributes + * assigned through the attribute spread syntax. + * + * - Substitutions in text nodes may only contain string values. If provided any other type of value + * the placeholder will be removed with nothing in its place. + * + * - This function currently escapes all value provided in the arguments array. In the future + * it may provide the ability to nest pre-rendered HTML into the template, but this functionality + * is deferred for a future update. + * + * - This function will not replace content inside of TEXTAREA, TITLE, SCRIPT, or STYLE elements. + * + * @since 6.5.0 + * + * @access private + * + * @param string $template The HTML template. + * @param string $args Array of key/value pairs providing substitue values for the placeholders. + * @return string The rendered HTML. + */ + public static function render( $template, $args = array() ) { + $processor = new self( $template ); + while ( $processor->next_token() ) { + $type = $processor->get_token_type(); + $text = $processor->get_modifiable_text(); + + if ( '#funky-comment' === $type && strlen( $text ) > 0 && '%' === $text[0] ) { + $name = substr( $text, 1 ); + $value = isset( $args[ $name ] ) && is_string( $args[ $name ] ) ? $args[ $name ] : null; + $processor->set_bookmark( 'here' ); + $processor->lexical_updates[] = new WP_HTML_Text_Replacement( + $processor->bookmarks['here']->start, + $processor->bookmarks['here']->length, + null === $value ? '' : esc_html( $value ) + ); + continue; + } + + if ( '#tag' === $type ) { + foreach ( $processor->get_attribute_names_with_prefix( '' ) ?? array() as $attribute_name ) { + if ( str_starts_with( $attribute_name, '...' ) ) { + $spread_name = substr( $attribute_name, 3 ); + if ( isset( $args[ $spread_name ] ) && is_array( $args[ $spread_name ] ) ) { + foreach ( $args[ $spread_name ] as $key => $value ) { + if ( true === $value || false === $value || null === $value || is_string( $value ) ) { + $processor->set_attribute( $key, $value ); + } + } + } + $processor->remove_attribute( $attribute_name ); + } + + $value = $processor->get_attribute( $attribute_name ); + + if ( ! is_string( $value ) ) { + continue; + } + + $full_match = null; + if ( preg_match( '~^]+)>$~', $value, $full_match ) ) { + $name = $full_match[1]; + + if ( array_key_exists( $name, $args ) ) { + $value = $args[ $name ]; + if ( false === $value || null === $value ) { + $processor->remove_attribute( $attribute_name ); + } elseif ( true === $value ) { + $processor->set_attribute( $attribute_name, true ); + } elseif ( is_string( $value ) ) { + $processor->set_attribute( $attribute_name, esc_attr( $args[ $name ] ) ); + } else { + $processor->remove_attribute( $attribute_name ); + } + } else { + $processor->remove_attribute( $attribute_name ); + } + + continue; + } + + $new_value = preg_replace_callback( + '~]+)>~', + static function ( $matches ) use ( $args ) { + return is_string( $args[ $matches[1] ] ) + ? esc_attr( $args[ $matches[1] ] ) + : ''; + }, + $value + ); + + if ( $new_value !== $value ) { + $processor->set_attribute( $attribute_name, $new_value ); + } + } + } + } + + return $processor->get_updated_html(); + } +} diff --git a/src/wp-includes/html-api/class-wp-html.php b/src/wp-includes/html-api/class-wp-html.php new file mode 100644 index 0000000000000..6443acac91508 --- /dev/null +++ b/src/wp-includes/html-api/class-wp-html.php @@ -0,0 +1,82 @@ +">', + * array( + * 'profile_url' => 'https://profiles.example.com/username', + * 'name' => $user->display_name + * ) + * ); + * // Outputs: Bobby Tables + * + * Do not escape the values supplied to the argument array! This function will escape each + * parameter's value as needed and additional manual escaping may lead to incorrect output. + * + * ## Syntax. + * + * ### Substitution Placeholders. + * + * - `` finds `named_arg` in the arguments array, escapes its value if possible, + * and replaces the placeholder with the escaped value. These may exist inside double-quoted + * HTML tag attributes or in HTML text content between tags. They cannot be used to output a tag + * name or content inside a comment. + * + * ### Spread Attributes. + * + * - `...named_arg` when found within an HTML tag will lookup `named_arg` in the arguments array + * and, if it's an array, will set the attribute on the tag for each key/value pair whose value + * is a string. The + * + * ## Notes. + * + * - Attributes may only be supplied for a limited set of types: a string value assigns a double-quoted + * attribute value; `true` sets the attribute as a boolean attribute; `null` removes the attribute. + * If provided any other type of value the attribute will be ignored and its existing value persists. + * + * - If multiple HTML attributes are specified for a given tag they will be applied as if calling + * `set_attribute()` in the order they are specified in the temlpate. This includes any attributes + * assigned through the attribute spread syntax. + * + * - Substitutions in text nodes may only contain string values. If provided any other type of value + * the placeholder will be removed with nothing in its place. + * + * - This function currently escapes all value provided in the arguments array. In the future + * it may provide the ability to nest pre-rendered HTML into the template, but this functionality + * is deferred for a future update. + * + * @since 6.5.0 + * + * @access private + * + * @param string $template The HTML template. + * @param array $args Array of key/value pairs providing substitue values for the placeholders. + * @return string The rendered HTML. + */ + public static function render( $template, $args ) { + return WP_HTML_Template::render( $template, $args ); + } +} diff --git a/src/wp-includes/l10n.php b/src/wp-includes/l10n.php index 632f432f62545..228c40757c3ff 100644 --- a/src/wp-includes/l10n.php +++ b/src/wp-includes/l10n.php @@ -797,7 +797,7 @@ function load_textdomain( $domain, $mofile, $locale = null ) { $locale = determine_locale(); } - $i18n_controller = WP_Translation_Controller::instance(); + $i18n_controller = WP_Translation_Controller::get_instance(); // Ensures the correct locale is set as the current one, in case it was filtered. $i18n_controller->set_locale( $locale ); @@ -911,7 +911,7 @@ function unload_textdomain( $domain, $reloadable = false ) { // Since multiple locales are supported, reloadable text domains don't actually need to be unloaded. if ( ! $reloadable ) { - WP_Translation_Controller::instance()->unload_textdomain( $domain ); + WP_Translation_Controller::get_instance()->unload_textdomain( $domain ); } if ( isset( $l10n[ $domain ] ) ) { diff --git a/src/wp-includes/l10n/class-wp-translation-controller.php b/src/wp-includes/l10n/class-wp-translation-controller.php index 616dce5793c5c..b44384c013dcd 100644 --- a/src/wp-includes/l10n/class-wp-translation-controller.php +++ b/src/wp-includes/l10n/class-wp-translation-controller.php @@ -42,20 +42,28 @@ final class WP_Translation_Controller { protected $loaded_files = array(); /** - * Returns the WP_Translation_Controller singleton. + * Container for the main instance of the class. + * + * @since 6.5.0 + * @var WP_Translation_Controller|null + */ + private static $instance = null; + + /** + * Utility method to retrieve the main instance of the class. + * + * The instance will be created if it does not exist yet. * * @since 6.5.0 * * @return WP_Translation_Controller */ - public static function instance(): WP_Translation_Controller { - static $instance; - - if ( ! $instance ) { - $instance = new self(); + public static function get_instance(): WP_Translation_Controller { + if ( null === self::$instance ) { + self::$instance = new self(); } - return $instance; + return self::$instance; } /** diff --git a/src/wp-includes/media.php b/src/wp-includes/media.php index 38ec2213b7506..ecf4fd83b5cef 100644 --- a/src/wp-includes/media.php +++ b/src/wp-includes/media.php @@ -381,14 +381,10 @@ function set_post_thumbnail_size( $width = 0, $height = 0, $crop = false ) { * @return string HTML IMG element for given image attachment. */ function get_image_tag( $id, $alt, $title, $align, $size = 'medium' ) { - list( $img_src, $width, $height ) = image_downsize( $id, $size ); - $hwstring = image_hwstring( $width, $height ); - - $title = $title ? 'title="' . esc_attr( $title ) . '" ' : ''; $size_class = is_array( $size ) ? implode( 'x', $size ) : $size; - $class = 'align' . esc_attr( $align ) . ' size-' . esc_attr( $size_class ) . ' wp-image-' . $id; + $class = "align{$align} size-{$size_class} wp-image-{$id}"; /** * Filters the value of the attachment's image tag class attribute. @@ -403,7 +399,19 @@ function get_image_tag( $id, $alt, $title, $align, $size = 'medium' ) { */ $class = apply_filters( 'get_image_tag_class', $class, $id, $align, $size ); - $html = '' . esc_attr( $alt ) . ''; + $html = WP_HTML::render( + <<<'HTML' +</%alt> +HTML, + array( + 'alt' => $alt, + 'class' => $class, + 'height' => (string) $height, + 'src' => $img_src, + 'title' => empty( $title ) ? null : $title, + 'width' => (string) $width, + ) + ); /** * Filters the HTML content for the image tag. @@ -3603,37 +3611,24 @@ function wp_video_shortcode( $attr, $content = '' ) { $html_atts = array( 'class' => $atts['class'], 'id' => sprintf( 'video-%d-%d', $post_id, $instance ), - 'width' => absint( $atts['width'] ), - 'height' => absint( $atts['height'] ), - 'poster' => esc_url( $atts['poster'] ), + 'width' => (string) absint( $atts['width'] ), + 'height' => (string) absint( $atts['height'] ), + 'poster' => empty( $atts['poster'] ) ? null : $atts['poster'], 'loop' => wp_validate_boolean( $atts['loop'] ), 'autoplay' => wp_validate_boolean( $atts['autoplay'] ), 'muted' => wp_validate_boolean( $atts['muted'] ), - 'preload' => $atts['preload'], + 'preload' => empty( $atts['preload'] ) ? null : $attr['preload'], ); - // These ones should just be omitted altogether if they are blank. - foreach ( array( 'poster', 'loop', 'autoplay', 'preload', 'muted' ) as $a ) { - if ( empty( $html_atts[ $a ] ) ) { - unset( $html_atts[ $a ] ); - } - } - - $attr_strings = array(); - foreach ( $html_atts as $k => $v ) { - $attr_strings[] = $k . '="' . esc_attr( $v ) . '"'; - } - $html = ''; if ( 'mediaelement' === $library && 1 === $instance ) { $html .= "\n"; } - $html .= sprintf( '