Update comments, remove mistaken = sign.

dmsnell · dmsnell · commit f19a5cb6ceb0 · 2023-12-31T22:25:31.000-05:00
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -249,8 +249,6 @@
  *
  * ## Tokens and finer-grained processing.
  *
- * >>> Stub documentation.
- *
  * It's also possible to scan through every lexical token in
  * the HTML document using the `next_token()` function. This
  * alternative form takes no argument and provides no built-in
@@ -261,12 +259,12 @@
  *      $title        = '(untitled)';
  *      $text_content = '';
  *      while ( $processor->next_token() ) {
- *          switch ( $processor->get_node_name() ) {
+ *          switch ( $processor->get_token_name() ) {
  *              case '#text':
  *                  $text .= $processor->get_node_text();
  *                  break;
  *
- *              case 'HR':
+ *              case 'BR':
  *                  $text .= "\n";
  *                  break;
  *
@@ -305,7 +303,7 @@
  *  - `TITLE` and `TEXTAREA` whose contents are treated as plaintext and then any
  *    character references are decoded. E.g. "1 &amp;lt; 2 < 3" becomes "1 < 2 < 3".
  *  - `IFRAME`, `NOSCRIPT`, `NOEMBED`, `NOFRAME`, `STYLE` whose contents are treated as
- *    raw plaintext and left as-si. E.g. "1 &amp;lt; 2 < 3" remains "1 &amp;lt; 2 < 3".
+ *    raw plaintext and left as-is. E.g. "1 &amp;lt; 2 < 3" remains "1 &amp;lt; 2 < 3".
  *
  * #### Other tokens with modifiable text.
  *
@@ -314,17 +312,17 @@
  *  - `#text` nodes, whose entire token _is_ the modifiable text.
  *  - Comment nodes and nodes that became comments because of some syntax error. The
  *    text for these nodes is the portion of the comment inside of the syntax. E.g. for
- *    "&lt;!-- comment -->" the text is " comment " (note that the spaces are part of it).
+ *    `<!-- comment -->` the text is `" comment "` (note that the spaces are part of it).
  *  - `CDATA` sections, whose text is the content inside of the section itself. E.g. for
- *    "&lt;![CDATA[some content]]>" the text is "some content".
+ *    `<![CDATA[some content]]>` the text is `"some content"`.
  *  - "Funky comments," which are a special case of invalid closing tags whose name is
  *    invalid. The text for these nodes is the text that a browser would transform into
- *    an HTML when parsing. E.g. for "&lt;/%post_author>" the text is "%post_author".
+ *    an HTML when parsing. E.g. for `</%post_author>` the text is `%post_author`.
  *
  * And there are non-elements which are atomic in nature but have no modifiable text.
- *  - `DOCTYPE` nodes like "&lt;DOCTYPE html>" which have no closing tag.
- *  - XML Processing instruction nodes like "&lt;<?xml charset="utf8"?>".
- *  - The empty end tag "&lt;</>" which is ignored in the browser and DOM but exposed
+ *  - `DOCTYPE` nodes like `<DOCTYPE html>` which have no closing tag.
+ *  - XML Processing instruction nodes like `<?xml charset="utf8"?>`.
+ *  - The empty end tag `</>` which is ignored in the browser and DOM but exposed
  *    to the HTML API.
  *
  * ## Design and limitations
@@ -849,9 +847,10 @@ public function next_token() {
 		}
 
 		/*
-		 * for legacy reasons the rest of this function handles tags and their
-		 * attributes. if the processor has reached the end of the document
-		 * or if it matched any other token then it should return here.
+		 * For legacy reasons the rest of this function handles tags and their
+		 * attributes. If the processor has reached the end of the document
+		 * or if it matched any other token then it should return here to avoid
+		 * attempting to process tag-specific syntax.
 		 */
 		if (
 			self::STATE_INCOMPLETE !== $this->parser_state &&
@@ -922,7 +921,12 @@ public function next_token() {
 			return true;
 		}
 
-		// Preserve the opening tag pointers.
+		/*
+		 * Preserve the opening tag pointers, as these will be overwritten
+		 * when finding the closing tag. They will be reset after finding
+		 * the closing to tag to point to the opening of the special atomic
+		 * tag sequence.
+		 */
 		$tag_name_starts_at = $this->tag_name_starts_at;
 		$tag_name_length    = $this->tag_name_length;
 		$tag_ends_at        = $this->token_starts_at + $this->token_length;
@@ -956,7 +960,7 @@ public function next_token() {
 
 		/*
 		 * The values here look like they reference the opening tag but they reference
-		 * the closing that instead. This is why the opening tag values were stored
+		 * the closing tag instead. This is why the opening tag values were stored
 		 * above in a variable. It reads confusingly here, but that's because the
 		 * functions that skip the contents have moved all the internal cursors past
 		 * the inner content of the tag.
@@ -1473,7 +1477,7 @@ private function parse_next_tag() {
 		$was_at     = $this->bytes_already_parsed;
 		$at         = $was_at;
 
-		while ( false !== $at && $at <= $doc_length ) {
+		while ( false !== $at && $at < $doc_length ) {
 			$at = strpos( $html, '<', $at );
 
 			if ( $at > $was_at ) {
@@ -1568,7 +1572,7 @@ private function parse_next_tag() {
 					// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
 					$span_of_dashes = strspn( $html, '-', $closer_at );
 					if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
-						// @todo This could go wrong if the closer is shorter than `<!---->` because there's no inside.
+						// @todo This could go wrong if the closer is shorter than `<!---->` because there's no inside content.
 						$this->parser_state         = self::STATE_COMMENT;
 						$this->token_length         = $closer_at + $span_of_dashes + 1 - $this->token_starts_at;
 						$this->text_starts_at       = $this->token_starts_at + 4;