Skip to content

Commit f19a5cb

Browse files
committed
Update comments, remove mistaken = sign.
1 parent cbb2ad4 commit f19a5cb

File tree

1 file changed

+22
-18
lines changed

1 file changed

+22
-18
lines changed

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,6 @@
249249
*
250250
* ## Tokens and finer-grained processing.
251251
*
252-
* >>> Stub documentation.
253-
*
254252
* It's also possible to scan through every lexical token in
255253
* the HTML document using the `next_token()` function. This
256254
* alternative form takes no argument and provides no built-in
@@ -261,12 +259,12 @@
261259
* $title = '(untitled)';
262260
* $text_content = '';
263261
* while ( $processor->next_token() ) {
264-
* switch ( $processor->get_node_name() ) {
262+
* switch ( $processor->get_token_name() ) {
265263
* case '#text':
266264
* $text .= $processor->get_node_text();
267265
* break;
268266
*
269-
* case 'HR':
267+
* case 'BR':
270268
* $text .= "\n";
271269
* break;
272270
*
@@ -305,7 +303,7 @@
305303
* - `TITLE` and `TEXTAREA` whose contents are treated as plaintext and then any
306304
* character references are decoded. E.g. "1 &amp;lt; 2 < 3" becomes "1 < 2 < 3".
307305
* - `IFRAME`, `NOSCRIPT`, `NOEMBED`, `NOFRAME`, `STYLE` whose contents are treated as
308-
* raw plaintext and left as-si. E.g. "1 &amp;lt; 2 < 3" remains "1 &amp;lt; 2 < 3".
306+
* raw plaintext and left as-is. E.g. "1 &amp;lt; 2 < 3" remains "1 &amp;lt; 2 < 3".
309307
*
310308
* #### Other tokens with modifiable text.
311309
*
@@ -314,17 +312,17 @@
314312
* - `#text` nodes, whose entire token _is_ the modifiable text.
315313
* - Comment nodes and nodes that became comments because of some syntax error. The
316314
* text for these nodes is the portion of the comment inside of the syntax. E.g. for
317-
* "&lt;!-- comment -->" the text is " comment " (note that the spaces are part of it).
315+
* `<!-- comment -->` the text is `" comment "` (note that the spaces are part of it).
318316
* - `CDATA` sections, whose text is the content inside of the section itself. E.g. for
319-
* "&lt;![CDATA[some content]]>" the text is "some content".
317+
* `<![CDATA[some content]]>` the text is `"some content"`.
320318
* - "Funky comments," which are a special case of invalid closing tags whose name is
321319
* invalid. The text for these nodes is the text that a browser would transform into
322-
* an HTML when parsing. E.g. for "&lt;/%post_author>" the text is "%post_author".
320+
* an HTML when parsing. E.g. for `</%post_author>` the text is `%post_author`.
323321
*
324322
* And there are non-elements which are atomic in nature but have no modifiable text.
325-
* - `DOCTYPE` nodes like "&lt;DOCTYPE html>" which have no closing tag.
326-
* - XML Processing instruction nodes like "&lt;<?xml charset="utf8"?>".
327-
* - The empty end tag "&lt;</>" which is ignored in the browser and DOM but exposed
323+
* - `DOCTYPE` nodes like `<DOCTYPE html>` which have no closing tag.
324+
* - XML Processing instruction nodes like `<?xml charset="utf8"?>`.
325+
* - The empty end tag `</>` which is ignored in the browser and DOM but exposed
328326
* to the HTML API.
329327
*
330328
* ## Design and limitations
@@ -849,9 +847,10 @@ public function next_token() {
849847
}
850848

851849
/*
852-
* for legacy reasons the rest of this function handles tags and their
853-
* attributes. if the processor has reached the end of the document
854-
* or if it matched any other token then it should return here.
850+
* For legacy reasons the rest of this function handles tags and their
851+
* attributes. If the processor has reached the end of the document
852+
* or if it matched any other token then it should return here to avoid
853+
* attempting to process tag-specific syntax.
855854
*/
856855
if (
857856
self::STATE_INCOMPLETE !== $this->parser_state &&
@@ -922,7 +921,12 @@ public function next_token() {
922921
return true;
923922
}
924923

925-
// Preserve the opening tag pointers.
924+
/*
925+
* Preserve the opening tag pointers, as these will be overwritten
926+
* when finding the closing tag. They will be reset after finding
927+
* the closing to tag to point to the opening of the special atomic
928+
* tag sequence.
929+
*/
926930
$tag_name_starts_at = $this->tag_name_starts_at;
927931
$tag_name_length = $this->tag_name_length;
928932
$tag_ends_at = $this->token_starts_at + $this->token_length;
@@ -956,7 +960,7 @@ public function next_token() {
956960

957961
/*
958962
* The values here look like they reference the opening tag but they reference
959-
* the closing that instead. This is why the opening tag values were stored
963+
* the closing tag instead. This is why the opening tag values were stored
960964
* above in a variable. It reads confusingly here, but that's because the
961965
* functions that skip the contents have moved all the internal cursors past
962966
* the inner content of the tag.
@@ -1473,7 +1477,7 @@ private function parse_next_tag() {
14731477
$was_at = $this->bytes_already_parsed;
14741478
$at = $was_at;
14751479

1476-
while ( false !== $at && $at <= $doc_length ) {
1480+
while ( false !== $at && $at < $doc_length ) {
14771481
$at = strpos( $html, '<', $at );
14781482

14791483
if ( $at > $was_at ) {
@@ -1568,7 +1572,7 @@ private function parse_next_tag() {
15681572
// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
15691573
$span_of_dashes = strspn( $html, '-', $closer_at );
15701574
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
1571-
// @todo This could go wrong if the closer is shorter than `<!---->` because there's no inside.
1575+
// @todo This could go wrong if the closer is shorter than `<!---->` because there's no inside content.
15721576
$this->parser_state = self::STATE_COMMENT;
15731577
$this->token_length = $closer_at + $span_of_dashes + 1 - $this->token_starts_at;
15741578
$this->text_starts_at = $this->token_starts_at + 4;

0 commit comments

Comments
 (0)