diff --git a/.eslintrc b/.eslintrc index 218aaddf8..3b74cadc9 100644 --- a/.eslintrc +++ b/.eslintrc @@ -17,7 +17,7 @@ "no-loop-func": 0, "no-multi-str": 2, "no-native-reassign": 2, - "no-new": 0, + "no-new": 2, "no-new-func": 2, "no-new-object": 2, "no-new-wrappers": 2, @@ -67,6 +67,7 @@ "curly": [2, "multi-or-nest"], "dot-notation": 0, "no-else-return": 2, + "linebreak-style": [2, "unix"], "no-multi-spaces": [2, { "exceptions": { "VariableDeclarator": true, diff --git a/.gitignore b/.gitignore index 73e13fddc..65d858d88 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .idea +.vscode node_modules docs/build docs/05_api_reference.md diff --git a/.travis.yml b/.travis.yml index 68bfbf776..d65e9102f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,9 @@ language: node_js sudo: false +before_install: + - cd ../ + - git clone https://github.com/HTMLParseErrorWG/html5lib-tests + - cd - + node_js: - stable diff --git a/LICENSE b/LICENSE index 120d532f4..5a65d25e6 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2013-2016 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin) +Copyright (c) 2013-2017 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/lib/common/doctype.js b/lib/common/doctype.js index ca4a86a97..bed4f82dd 100644 --- a/lib/common/doctype.js +++ b/lib/common/doctype.js @@ -4,6 +4,7 @@ var DOCUMENT_MODE = require('./html').DOCUMENT_MODE; //Const var VALID_DOCTYPE_NAME = 'html', + VALID_SYSTEM_ID = 'about:legacy-compat', QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd', QUIRKS_MODE_PUBLIC_ID_PREFIXES = [ '+//silmaril//dtd html pro v0r11 19970101//en', @@ -110,13 +111,23 @@ function hasPrefix(publicId, prefixes) { //API -exports.getDocumentMode = function (name, publicId, systemId) { - if (name !== VALID_DOCTYPE_NAME) +exports.isConforming = function (token) { + return token.name === VALID_DOCTYPE_NAME && + token.publicId === null && + (token.systemId === null || token.systemId === VALID_SYSTEM_ID); +}; + +exports.getDocumentMode = function (token) { + if (token.name !== VALID_DOCTYPE_NAME) return DOCUMENT_MODE.QUIRKS; + var systemId = token.systemId; + if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) return DOCUMENT_MODE.QUIRKS; + var publicId = token.publicId; + if (publicId !== null) { publicId = publicId.toLowerCase(); diff --git a/lib/common/error_codes.js b/lib/common/error_codes.js new file mode 100644 index 000000000..026ca1be6 --- /dev/null +++ b/lib/common/error_codes.js @@ -0,0 +1,64 @@ +'use strict'; + +module.exports = { + controlCharacterInInputStream: 'control-character-in-input-stream', + noncharacterInInputStream: 'noncharacter-in-input-stream', + surrogateInInputStream: 'surrogate-in-input-stream', + nonVoidHtmlElementStartTagWithTrailingSolidus: 'non-void-html-element-start-tag-with-trailing-solidus', + endTagWithAttributes: 'end-tag-with-attributes', + endTagWithTrailingSolidus: 'end-tag-with-trailing-solidus', + unexpectedSolidusInTag: 'unexpected-solidus-in-tag', + unexpectedNullCharacter: 'unexpected-null-character', + unexpectedQuestionMarkInsteadOfTagName: 'unexpected-question-mark-instead-of-tag-name', + invalidFirstCharacterOfTagName: 'invalid-first-character-of-tag-name', + unexpectedEqualsSignBeforeAttributeName: 'unexpected-equals-sign-before-attribute-name', + missingEndTagName: 'missing-end-tag-name', + unexpectedCharacterInAttributeName: 'unexpected-character-in-attribute-name', + unknownNamedCharacterReference: 'unknown-named-character-reference', + missingSemicolonAfterCharacterReference: 'missing-semicolon-after-character-reference', + unexpectedCharacterAfterDoctypeSystemIdentifier: 'unexpected-character-after-doctype-system-identifier', + unexpectedCharacterInUnquotedAttributeValue: 'unexpected-character-in-unquoted-attribute-value', + eofBeforeTagName: 'eof-before-tag-name', + eofInTag: 'eof-in-tag', + missingAttributeValue: 'missing-attribute-value', + missingWhitespaceBetweenAttributes: 'missing-whitespace-between-attributes', + missingWhitespaceAfterDoctypePublicKeyword: 'missing-whitespace-after-doctype-public-keyword', + missingWhitespaceBetweenDoctypePublicAndSytemIdentifier: 'missing-whitespace-between-doctype-public-and-system-identifiers', + missingWhitespaceAfterDoctypeSystemKeyword: 'missing-whitespace-after-doctype-system-keyword', + missingQuoteBeforeDoctypePublicIdentifier: 'missing-quote-before-doctype-public-identifier', + missingQuoteBeforeDoctypeSystemIdentifier: 'missing-quote-before-doctype-system-identifier', + missingDoctypePublicIdentifier: 'missing-doctype-public-identifier', + missingDoctypeSystemIdentifier: 'missing-doctype-system-identifier', + abruptDoctypePublicIdentifier: 'abrupt-doctype-public-identifier', + abruptDoctypeSystemIdentifier: 'abrupt-doctype-system-identifier', + cdataInHtmlContent: 'cdata-in-html-content', + incorrectlyOpenedComment: 'incorrectly-opened-comment', + eofInScriptHtmlCommentLikeText: 'eof-in-script-html-comment-like-text', + eofInDoctype: 'eof-in-doctype', + nestedComment: 'nested-comment', + abruptClosingOfEmptyComment: 'abrupt-closing-of-empty-comment', + eofInComment: 'eof-in-comment', + incorrectlyClosedComment: 'incorrectly-closed-comment', + eofInCdata: 'eof-in-cdata', + absenceOfDigitsInNumericCharacterReference: 'absence-of-digits-in-numeric-character-reference', + nullCharacterReference: 'null-character-reference', + surrogateCharacterReference: 'surrogate-character-reference', + characterReferenceOutsideUnicodeRange: 'character-reference-outside-unicode-range', + controlCharacterReference: 'control-character-reference', + noncharacterCharacterReference: 'noncharacter-character-reference', + missingWhitespaceBeforeDoctypeName: 'missing-whitespace-before-doctype-name', + missingDoctypeName: 'missing-doctype-name', + invalidCharacterSequenceAfterDoctypeName: 'invalid-character-sequence-after-doctype-name', + duplicateAttribute: 'duplicate-attribute', + nonConformingDoctype: 'non-conforming-doctype', + missingDoctype: 'missing-doctype', + misplacedDoctype: 'misplaced-doctype', + endTagWithoutMatchingOpenElement: 'end-tag-without-matching-open-element', + closingOfElementWithOpenChildElements: 'closing-of-element-with-open-child-elements', + disallowedContentInNoscriptInHead: 'disallowed-content-in-noscript-in-head', + openElementsLeftAfterEof: 'open-elements-left-after-eof', + abandonedHeadElementChild: 'abandoned-head-element-child', + misplacedStartTagForHeadElement: 'misplaced-start-tag-for-head-element', + nestedNoscriptInHead: 'nested-noscript-in-head', + eofInElementThatCanContainOnlyText: 'eof-in-element-that-can-contain-only-text' +}; diff --git a/lib/common/html.js b/lib/common/html.js index bd1062e7a..400360fcd 100644 --- a/lib/common/html.js +++ b/lib/common/html.js @@ -103,7 +103,6 @@ var $ = exports.TAG_NAMES = { MARQUEE: 'marquee', MATH: 'math', MENU: 'menu', - MENUITEM: 'menuitem', META: 'meta', MGLYPH: 'mglyph', MI: 'mi', diff --git a/lib/common/unicode.js b/lib/common/unicode.js index 8777e97ab..5c8a2708c 100644 --- a/lib/common/unicode.js +++ b/lib/common/unicode.js @@ -1,5 +1,12 @@ 'use strict'; +var UNDEFINED_CODE_POINTS = [ + 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, + 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, + 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, + 0x10FFFF +]; + exports.REPLACEMENT_CHARACTER = '\uFFFD'; exports.CODE_POINTS = { @@ -28,6 +35,7 @@ exports.CODE_POINTS = { LATIN_CAPITAL_F: 0x46, LATIN_CAPITAL_X: 0x58, LATIN_CAPITAL_Z: 0x5A, + RIGHT_SQUARE_BRACKET: 0x5D, GRAVE_ACCENT: 0x60, LATIN_SMALL_A: 0x61, LATIN_SMALL_F: 0x66, @@ -40,8 +48,32 @@ exports.CODE_POINT_SEQUENCES = { DASH_DASH_STRING: [0x2D, 0x2D], //-- DOCTYPE_STRING: [0x44, 0x4F, 0x43, 0x54, 0x59, 0x50, 0x45], //DOCTYPE CDATA_START_STRING: [0x5B, 0x43, 0x44, 0x41, 0x54, 0x41, 0x5B], //[CDATA[ - CDATA_END_STRING: [0x5D, 0x5D, 0x3E], //]]> SCRIPT_STRING: [0x73, 0x63, 0x72, 0x69, 0x70, 0x74], //script PUBLIC_STRING: [0x50, 0x55, 0x42, 0x4C, 0x49, 0x43], //PUBLIC SYSTEM_STRING: [0x53, 0x59, 0x53, 0x54, 0x45, 0x4D] //SYSTEM }; + + +//Surrogates +exports.isSurrogate = function (cp) { + return cp >= 0xD800 && cp <= 0xDFFF; +}; + +exports.isSurrogatePair = function (cp) { + return cp >= 0xDC00 && cp <= 0xDFFF; +}; + +exports.getSurrogatePairCodePoint = function (cp1, cp2) { + return (cp1 - 0xD800) * 0x400 + 0x2400 + cp2; +}; + +//NOTE: excluding NULL and ASCII whitespace +exports.isControlCodePoint = function (cp) { + return cp !== 0x20 && cp !== 0x0A && + cp !== 0x0D && cp !== 0x09 && cp !== 0x0C && + cp >= 0x01 && cp <= 0x1F || cp >= 0x7F && cp <= 0x9F; +}; + +exports.isUndefinedCodePoint = function (cp) { + return cp >= 0xFDD0 && cp <= 0xFDEF || UNDEFINED_CODE_POINTS.indexOf(cp) > -1; +}; diff --git a/lib/extensions/error_reporting/mixin_base.js b/lib/extensions/error_reporting/mixin_base.js new file mode 100644 index 000000000..00069485c --- /dev/null +++ b/lib/extensions/error_reporting/mixin_base.js @@ -0,0 +1,42 @@ +'use strict'; + +var Mixin = require('../../utils/mixin'), + inherits = require('util').inherits; + +var ErrorReportingMixinBase = module.exports = function (host, opts) { + Mixin.call(this, host); + + this.posTracker = null; + this.onParseError = opts.onParseError; +}; + +inherits(ErrorReportingMixinBase, Mixin); + +ErrorReportingMixinBase.prototype._setErrorLocation = function (err) { + err.startLine = err.endLine = this.posTracker.line; + err.startCol = err.endCol = this.posTracker.col; + err.startOffset = err.endOffset = this.posTracker.offset; +}; + +ErrorReportingMixinBase.prototype._reportError = function (code) { + var err = { + code: code, + startLine: -1, + startCol: -1, + startOffset: -1, + endLine: -1, + endCol: -1, + endOffset: -1 + }; + + this._setErrorLocation(err); + this.onParseError(err); +}; + +ErrorReportingMixinBase.prototype._getOverriddenMethods = function (mxn) { + return { + _err: function (code) { + mxn._reportError(code); + } + }; +}; diff --git a/lib/extensions/error_reporting/parser_mixin.js b/lib/extensions/error_reporting/parser_mixin.js new file mode 100644 index 000000000..d82e5c66b --- /dev/null +++ b/lib/extensions/error_reporting/parser_mixin.js @@ -0,0 +1,52 @@ +'use strict'; + +var ErrorReportingMixinBase = require('./mixin_base'), + ErrorReportingTokenizerMixin = require('./tokenizer_mixin'), + LocationInfoTokenizerMixin = require('../location_info/tokenizer_mixin'), + Mixin = require('../../utils/mixin'), + inherits = require('util').inherits; + + +var ErrorReportingParserMixin = module.exports = function (parser, opts) { + ErrorReportingMixinBase.call(this, parser, opts); + + this.opts = opts; + this.ctLoc = null; + this.locBeforeToken = false; +}; + +inherits(ErrorReportingParserMixin, ErrorReportingMixinBase); + +ErrorReportingParserMixin.prototype._setErrorLocation = function (err) { + if (this.ctLoc) { + err.startLine = this.ctLoc.startLine; + err.startCol = this.ctLoc.startCol; + err.startOffset = this.ctLoc.startOffset; + + err.endLine = this.locBeforeToken ? this.ctLoc.startLine : this.ctLoc.endLine; + err.endCol = this.locBeforeToken ? this.ctLoc.startCol : this.ctLoc.endCol; + err.endOffset = this.locBeforeToken ? this.ctLoc.startOffset : this.ctLoc.endOffset; + } +}; + +ErrorReportingParserMixin.prototype._getOverriddenMethods = function (mxn, orig) { + return { + _bootstrap: function (document, fragmentContext) { + orig._bootstrap.call(this, document, fragmentContext); + + Mixin.install(this.tokenizer, ErrorReportingTokenizerMixin, mxn.opts); + Mixin.install(this.tokenizer, LocationInfoTokenizerMixin); + }, + + _processInputToken: function (token) { + mxn.ctLoc = token.location; + + orig._processInputToken.call(this, token); + }, + + _err: function (code, options) { + mxn.locBeforeToken = options && options.beforeToken; + mxn._reportError(code); + } + }; +}; diff --git a/lib/extensions/error_reporting/preprocessor_mixin.js b/lib/extensions/error_reporting/preprocessor_mixin.js new file mode 100644 index 000000000..0a2732ee5 --- /dev/null +++ b/lib/extensions/error_reporting/preprocessor_mixin.js @@ -0,0 +1,24 @@ +'use strict'; + +var ErrorReportingMixinBase = require('./mixin_base'), + PositionTrackingPreprocessorMixin = require('../position_tracking/preprocessor_mixin'), + Mixin = require('../../utils/mixin'), + inherits = require('util').inherits; + + +var ErrorReportingPreprocessorMixin = module.exports = function (preprocessor, opts) { + ErrorReportingMixinBase.call(this, preprocessor, opts); + + this.posTracker = Mixin.install(preprocessor, PositionTrackingPreprocessorMixin); + this.lastErrOffset = -1; +}; + +inherits(ErrorReportingPreprocessorMixin, ErrorReportingMixinBase); + +ErrorReportingPreprocessorMixin.prototype._reportError = function (code) { + //NOTE: avoid reporting error twice on advance/retreat + if (this.lastErrOffset !== this.posTracker.offset) { + this.lastErrOffset = this.posTracker.offset; + ErrorReportingMixinBase.prototype._reportError.call(this, code); + } +}; diff --git a/lib/extensions/error_reporting/tokenizer_mixin.js b/lib/extensions/error_reporting/tokenizer_mixin.js new file mode 100644 index 000000000..0527cee36 --- /dev/null +++ b/lib/extensions/error_reporting/tokenizer_mixin.js @@ -0,0 +1,17 @@ +'use strict'; + +var ErrorReportingMixinBase = require('./mixin_base'), + ErrorReportingPreprocessorMixin = require('./preprocessor_mixin'), + Mixin = require('../../utils/mixin'), + inherits = require('util').inherits; + + +var ErrorReportingTokenizerMixin = module.exports = function (tokenizer, opts) { + ErrorReportingMixinBase.call(this, tokenizer, opts); + + var preprocessorMixin = Mixin.install(tokenizer.preprocessor, ErrorReportingPreprocessorMixin, opts); + + this.posTracker = preprocessorMixin.posTracker; +}; + +inherits(ErrorReportingTokenizerMixin, ErrorReportingMixinBase); diff --git a/lib/extensions/location_info/parser_mixin.js b/lib/extensions/location_info/parser_mixin.js index 7b9ee7542..8477d9107 100644 --- a/lib/extensions/location_info/parser_mixin.js +++ b/lib/extensions/location_info/parser_mixin.js @@ -3,7 +3,6 @@ var Mixin = require('../../utils/mixin'), Tokenizer = require('../../tokenizer'), LocationInfoTokenizerMixin = require('./tokenizer_mixin'), - PositionTrackingPreprocessorMixin = require('../position_tracking/preprocessor_mixin'), LocationInfoOpenElementStackMixin = require('./open_element_stack_mixin'), HTML = require('../../common/html'), inherits = require('util').inherits; @@ -27,7 +26,7 @@ inherits(LocationInfoParserMixin, Mixin); LocationInfoParserMixin.prototype._setStartLocation = function (element) { if (this.lastStartTagToken) { - element.__location = Object.create(this.lastStartTagToken.location); + element.__location = Object.assign({}, this.lastStartTagToken.location); element.__location.startTag = this.lastStartTagToken.location; } else @@ -47,16 +46,18 @@ LocationInfoParserMixin.prototype._setEndLocation = function (element, closingTo var isClosingEndTag = closingToken.type === Tokenizer.END_TAG_TOKEN && tn === closingToken.tagName; if (isClosingEndTag) { - loc.endTag = Object.create(ctLoc); + loc.endTag = Object.assign({}, ctLoc); + loc.endLine = ctLoc.endLine; + loc.endCol = ctLoc.endCol; loc.endOffset = ctLoc.endOffset; } - else + else { + loc.endLine = ctLoc.startLine; + loc.endCol = ctLoc.startCol; loc.endOffset = ctLoc.startOffset; + } } - - else if (closingToken.type === Tokenizer.EOF_TOKEN) - loc.endOffset = this.posTracker.offset; } }; @@ -68,11 +69,12 @@ LocationInfoParserMixin.prototype._getOverriddenMethods = function (mxn, orig) { mxn.lastStartTagToken = null; mxn.lastFosterParentingLocation = null; mxn.currentToken = null; - mxn.posTracker = new PositionTrackingPreprocessorMixin(this.tokenizer.preprocessor); - new LocationInfoTokenizerMixin(this.tokenizer); + var tokenizerMixin = Mixin.install(this.tokenizer, LocationInfoTokenizerMixin); - new LocationInfoOpenElementStackMixin(this.openElements, { + mxn.posTracker = tokenizerMixin.posTracker; + + Mixin.install(this.openElements, LocationInfoOpenElementStackMixin, { onItemPop: function (element) { mxn._setEndLocation(element, mxn.currentToken); } @@ -102,8 +104,8 @@ LocationInfoParserMixin.prototype._getOverriddenMethods = function (mxn, orig) { //NOTE: and are never popped from the stack, so we need to updated //their end location explicitly. var requireExplicitUpdate = token.type === Tokenizer.END_TAG_TOKEN && - (token.tagName === $.HTML || - token.tagName === $.BODY && this.openElements.hasInScope($.BODY)); + (token.tagName === $.HTML || + token.tagName === $.BODY && this.openElements.hasInScope($.BODY)); if (requireExplicitUpdate) { for (var i = this.openElements.stackTop; i >= 0; i--) { @@ -193,17 +195,22 @@ LocationInfoParserMixin.prototype._getOverriddenMethods = function (mxn, orig) { var hasFosterParent = this._shouldFosterParentOnInsertion(), parent = hasFosterParent && mxn.lastFosterParentingLocation.parent || - this.openElements.currentTmplContent || - this.openElements.current, + this.openElements.currentTmplContent || + this.openElements.current, siblings = this.treeAdapter.getChildNodes(parent), textNodeIdx = hasFosterParent && mxn.lastFosterParentingLocation.beforeElement ? - siblings.indexOf(mxn.lastFosterParentingLocation.beforeElement) - 1 : - siblings.length - 1, + siblings.indexOf(mxn.lastFosterParentingLocation.beforeElement) - 1 : + siblings.length - 1, textNode = siblings[textNodeIdx]; //NOTE: if we have location assigned by another token, then just update end position - if (textNode.__location) - textNode.__location.endOffset = token.location.endOffset; + var tnLoc = textNode.__location; + + if (tnLoc) { + tnLoc.endLine = token.location.endLine; + tnLoc.endCol = token.location.endCol; + tnLoc.endOffset = token.location.endOffset; + } else textNode.__location = token.location; diff --git a/lib/extensions/location_info/tokenizer_mixin.js b/lib/extensions/location_info/tokenizer_mixin.js index 859d690b8..3ac30b8fd 100644 --- a/lib/extensions/location_info/tokenizer_mixin.js +++ b/lib/extensions/location_info/tokenizer_mixin.js @@ -9,23 +9,27 @@ var LocationInfoTokenizerMixin = module.exports = function (tokenizer) { Mixin.call(this, tokenizer); this.tokenizer = tokenizer; - this.posTracker = new PositionTrackingPreprocessorMixin(tokenizer.preprocessor); + this.posTracker = Mixin.install(tokenizer.preprocessor, PositionTrackingPreprocessorMixin); this.currentAttrLocation = null; - this.currentTokenLocation = null; + this.ctLoc = null; }; inherits(LocationInfoTokenizerMixin, Mixin); LocationInfoTokenizerMixin.prototype._getCurrentLocation = function () { return { - line: this.posTracker.line, - col: this.posTracker.col, + startLine: this.posTracker.line, + startCol: this.posTracker.col, startOffset: this.posTracker.offset, + endLine: -1, + endCol: -1, endOffset: -1 }; }; LocationInfoTokenizerMixin.prototype._attachCurrentAttrLocationInfo = function () { + this.currentAttrLocation.endLine = this.posTracker.line; + this.currentAttrLocation.endCol = this.posTracker.col; this.currentAttrLocation.endOffset = this.posTracker.offset; var currentToken = this.tokenizer.currentToken, @@ -41,27 +45,32 @@ LocationInfoTokenizerMixin.prototype._getOverriddenMethods = function (mxn, orig var methods = { _createStartTagToken: function () { orig._createStartTagToken.call(this); - this.currentToken.location = mxn.currentTokenLocation; + this.currentToken.location = mxn.ctLoc; }, _createEndTagToken: function () { orig._createEndTagToken.call(this); - this.currentToken.location = mxn.currentTokenLocation; + this.currentToken.location = mxn.ctLoc; }, _createCommentToken: function () { orig._createCommentToken.call(this); - this.currentToken.location = mxn.currentTokenLocation; + this.currentToken.location = mxn.ctLoc; }, _createDoctypeToken: function (initialName) { orig._createDoctypeToken.call(this, initialName); - this.currentToken.location = mxn.currentTokenLocation; + this.currentToken.location = mxn.ctLoc; }, _createCharacterToken: function (type, ch) { orig._createCharacterToken.call(this, type, ch); - this.currentCharacterToken.location = mxn.currentTokenLocation; + this.currentCharacterToken.location = mxn.ctLoc; + }, + + _createEOFToken: function () { + orig._createEOFToken.call(this); + this.currentToken.location = mxn._getCurrentLocation(); }, _createAttr: function (attrNameFirstCh) { @@ -80,23 +89,44 @@ LocationInfoTokenizerMixin.prototype._getOverriddenMethods = function (mxn, orig }, _emitCurrentToken: function () { + var ctLoc = this.currentToken.location; + //NOTE: if we have pending character token make it's end location equal to the //current token's start location. - if (this.currentCharacterToken) - this.currentCharacterToken.location.endOffset = this.currentToken.location.startOffset; + if (this.currentCharacterToken) { + this.currentCharacterToken.location.endLine = ctLoc.startLine; + this.currentCharacterToken.location.endCol = ctLoc.startCol; + this.currentCharacterToken.location.endOffset = ctLoc.startOffset; + } + + if (this.currentToken.type === Tokenizer.EOF_TOKEN) { + ctLoc.endLine = ctLoc.startLine; + ctLoc.endCol = ctLoc.startCol; + ctLoc.endOffset = ctLoc.startOffset; + } + + else { + ctLoc.endLine = mxn.posTracker.line; + ctLoc.endCol = mxn.posTracker.col + 1; + ctLoc.endOffset = mxn.posTracker.offset + 1; + } - this.currentToken.location.endOffset = mxn.posTracker.offset + 1; orig._emitCurrentToken.call(this); }, _emitCurrentCharacterToken: function () { + var ctLoc = this.currentCharacterToken && this.currentCharacterToken.location; + //NOTE: if we have character token and it's location wasn't set in the _emitCurrentToken(), //then set it's location at the current preprocessor position. //We don't need to increment preprocessor position, since character token //emission is always forced by the start of the next character token here. //So, we already have advanced position. - if (this.currentCharacterToken && this.currentCharacterToken.location.endOffset === -1) - this.currentCharacterToken.location.endOffset = mxn.posTracker.offset; + if (ctLoc && ctLoc.endOffset === -1) { + ctLoc.endLine = mxn.posTracker.line; + ctLoc.endCol = mxn.posTracker.col; + ctLoc.endOffset = mxn.posTracker.offset; + } orig._emitCurrentCharacterToken.call(this); } @@ -107,7 +137,7 @@ LocationInfoTokenizerMixin.prototype._getOverriddenMethods = function (mxn, orig var state = Tokenizer.MODE[modeName]; methods[state] = function (cp) { - mxn.currentTokenLocation = mxn._getCurrentLocation(); + mxn.ctLoc = mxn._getCurrentLocation(); orig[state].call(this, cp); }; }); diff --git a/lib/extensions/position_tracking/preprocessor_mixin.js b/lib/extensions/position_tracking/preprocessor_mixin.js index 81995c67d..f4ccddd1a 100644 --- a/lib/extensions/position_tracking/preprocessor_mixin.js +++ b/lib/extensions/position_tracking/preprocessor_mixin.js @@ -1,64 +1,51 @@ 'use strict'; var Mixin = require('../../utils/mixin'), - inherits = require('util').inherits, - UNICODE = require('../../common/unicode'); + inherits = require('util').inherits; -//Aliases -var $ = UNICODE.CODE_POINTS; var PositionTrackingPreprocessorMixin = module.exports = function (preprocessor) { - // NOTE: avoid installing tracker twice - if (!preprocessor.__locTracker) { - preprocessor.__locTracker = this; + Mixin.call(this, preprocessor); - Mixin.call(this, preprocessor); + this.preprocessor = preprocessor; + this.isEol = false; + this.lineStartPos = 0; + this.droppedBufferSize = 0; - this.preprocessor = preprocessor; - this.isEol = false; - this.lineStartPos = 0; - this.droppedBufferSize = 0; - - this.col = -1; - this.line = 1; - } - - return preprocessor.__locTracker; + this.offset = 0; + this.col = 0; + this.line = 1; }; inherits(PositionTrackingPreprocessorMixin, Mixin); -Object.defineProperty(PositionTrackingPreprocessorMixin.prototype, 'offset', { - get: function () { - return this.droppedBufferSize + this.preprocessor.pos; - } -}); - PositionTrackingPreprocessorMixin.prototype._getOverriddenMethods = function (mxn, orig) { return { advance: function () { - var cp = orig.advance.call(this); + var pos = this.pos + 1, + ch = this.html[pos]; //NOTE: LF should be in the last column of the line if (mxn.isEol) { mxn.isEol = false; mxn.line++; - mxn.lineStartPos = mxn.offset; + mxn.lineStartPos = pos; } - if (cp === $.LINE_FEED) + if (ch === '\n' || ch === '\r' && this.html[pos + 1] !== '\n') mxn.isEol = true; - mxn.col = mxn.offset - mxn.lineStartPos + 1; + mxn.col = pos - mxn.lineStartPos + 1; + mxn.offset = mxn.droppedBufferSize + pos; - return cp; + return orig.advance.call(this); }, retreat: function () { orig.retreat.call(this); - mxn.isEol = false; - mxn.col = mxn.offset - mxn.lineStartPos + 1; + mxn.isEol = false; + mxn.col = this.pos - mxn.lineStartPos + 1; }, dropParsedChunk: function () { @@ -66,7 +53,11 @@ PositionTrackingPreprocessorMixin.prototype._getOverriddenMethods = function (mx orig.dropParsedChunk.call(this); - mxn.droppedBufferSize += prevPos - this.pos; + var reduction = prevPos - this.pos; + + mxn.lineStartPos -= reduction; + mxn.droppedBufferSize += reduction; + mxn.offset = mxn.droppedBufferSize + this.pos; } }; }; diff --git a/lib/index.d.ts b/lib/index.d.ts index 0fb173d59..4f5309bc5 100644 --- a/lib/index.d.ts +++ b/lib/index.d.ts @@ -53,6 +53,13 @@ declare namespace MarkupData { //----------------------------------------------------------------------------------- declare namespace Options { export interface ParserOptions { + /** + * [Scripting flag](https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag). + * When enabled the parser treats `