diff --git a/package-lock.json b/package-lock.json index ff8814b9f3..96acf60f84 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3283,9 +3283,9 @@ } }, "should-util": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/should-util/-/should-util-1.0.0.tgz", - "integrity": "sha1-yYzaN0qmsZDfi6h8mInCtNtiAGM=", + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/should-util/-/should-util-1.0.1.tgz", + "integrity": "sha512-oXF8tfxx5cDk8r2kYqlkUJzZpDBqVY/II2WhvU0n9Y3XYvAYRmeaf1PvvIvTgPnv4KJ+ES5M0PyDq5Jp+Ygy2g==", "dev": true }, "sigmund": { diff --git a/package.json b/package.json index 90233e270f..83ce33c3bd 100644 --- a/package.json +++ b/package.json @@ -32,7 +32,8 @@ "test": "mocha test", "test-markup": "mocha test/markup", "test-detect": "mocha test/detect", - "test-browser": "mocha test/browser" + "test-browser": "mocha test/browser", + "test-parser": "mocha test/parser" }, "engines": { "node": "*" diff --git a/src/highlight.js b/src/highlight.js index 53026738c6..cbd0ae9fe5 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -498,20 +498,13 @@ const HLJS = function(hljs) { // considered for a potential match resumeScanAtSamePosition = false; } else { - top.matcher.lastIndex = index; top.matcher.considerAll(); } + top.matcher.lastIndex = index; const match = top.matcher.exec(codeToHighlight); // console.log("match", match[0], match.rule && match.rule.begin) - // if our failure to match was the result of a "resumed scan" then we - // need to advance one position and revert to full scanning before we - // decide there are truly no more matches at all to be had - if (!match && top.matcher.resumingScanAtSamePosition()) { - advanceOne(); - continue; - } if (!match) break; const beforeMatch = codeToHighlight.substring(index, match.index); diff --git a/src/lib/mode_compiler.js b/src/lib/mode_compiler.js index 56c0c20c82..70837c0a0f 100644 --- a/src/lib/mode_compiler.js +++ b/src/lib/mode_compiler.js @@ -143,7 +143,7 @@ export function compileLanguage(language) { } resumingScanAtSamePosition() { - return this.regexIndex != 0; + return this.regexIndex !== 0; } considerAll() { @@ -160,15 +160,58 @@ export function compileLanguage(language) { exec(s) { const m = this.getMatcher(this.regexIndex); m.lastIndex = this.lastIndex; - const result = m.exec(s); + let result = m.exec(s); + + // The following is because we have no easy way to say "resume scanning at the + // existing position but also skip the current rule ONLY". What happens is + // all prior rules are also skipped which can result in matching the wrong + // thing. Example of matching "booger": + + // our matcher is [string, "booger", number] + // + // ....booger.... + + // if "booger" is ignored then we'd really need a regex to scan from the + // SAME position for only: [string, number] but ignoring "booger" (if it + // was the first match), a simple resume would scan ahead who knows how + // far looking only for "number", ignoring potential string matches (or + // future "booger" matches that might be valid.) + + // So what we do: We execute two matchers, one resuming at the same + // position, but the second full matcher starting at the position after: + + // /--- resume first regex match here (for [number]) + // |/---- full match here for [string, "booger", number] + // vv + // ....booger.... + + // Which ever results in a match first is then used. So this 3-4 step + // process essentially allows us to say "match at this position, excluding + // a prior rule that was ignored". + // + // 1. Match "booger" first, ignore. Also proves that [string] does non match. + // 2. Resume matching for [number] + // 3. Match at index + 1 for [string, "booger", number] + // 4. If #2 and #3 result in matches, which came first? + if (this.resumingScanAtSamePosition()) { + if (result && result.index === this.lastIndex) { + // result is position +0 and therefore a valid + // "resume" match so result stays result + } else { // use the second matcher result + const m2 = this.getMatcher(0); + m2.lastIndex = this.lastIndex + 1; + result = m2.exec(s); + } + } + if (result) { this.regexIndex += result.position + 1; - if (this.regexIndex === this.count) { // wrap-around - this.regexIndex = 0; + if (this.regexIndex === this.count) { + // wrap-around to considering all matches again + this.considerAll(); } } - // this.regexIndex = 0; return result; } } diff --git a/test/index.js b/test/index.js index 4fbf3b2edb..57923ed69c 100644 --- a/test/index.js +++ b/test/index.js @@ -1,6 +1,6 @@ 'use strict'; -const hljs = require('../build'); +const hljs = require('../build'); hljs.debugMode(); // tests run in debug mode so errors are raised // Tests specific to the API exposed inside the hljs object. @@ -8,7 +8,7 @@ hljs.debugMode(); // tests run in debug mode so errors are raised require('./api'); // Test weird bugs we've fixed over time -require("./parser") +require("./parser"); // Tests for auto detection of languages via `highlightAuto`. require('./detect'); @@ -26,4 +26,3 @@ require('./markup'); // isn't actually used to test inside a browser but `jsdom` acts as a virtual // browser inside of node.js and runs together with all the other tests. require('./special'); - diff --git a/test/parser/index.js b/test/parser/index.js index 8763b67b7a..cb36144839 100644 --- a/test/parser/index.js +++ b/test/parser/index.js @@ -1,5 +1,7 @@ 'use strict'; +const should = require('should'); + describe('hljs', function() { require('./look-ahead-end-matchers'); require('./resume-scan'); diff --git a/test/parser/resume-scan.js b/test/parser/resume-scan.js index 9ed2bb9120..fb50a47f37 100644 --- a/test/parser/resume-scan.js +++ b/test/parser/resume-scan.js @@ -1,13 +1,26 @@ -const hljs = require('../../build'); +'use strict'; -describe("bugs", function () { +const hljs = require('../../build'); +hljs.debugMode(); // tests run in debug mode so errors are raised +describe("bugs", function() { describe("resume scan when a match is ignored", () => { it("should continue to highlight later matches", () => { - hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar")').value - .should.equal( - 'ImmutablePair.of(Stuff.class, "bar")' - ) - }) - }) -}) + const result = hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar")'); + result.value.should.equal( + 'ImmutablePair.of(Stuff.class, "bar")' + ); + }); + // previously the match rule was resumed but it would scan ahead too far and ignore + // later matches that matched the PRIOR rules... this is because when we "skip" (ignore) a + // rule we really only want to skip searching for THAT rule at that same location, we + // do not want to stop searching for ALL the prior rules at that location... + it("BUT should not skip ahead too far", () => { + const result = hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar");\n23'); + result.value.should.equal( + 'ImmutablePair.of(Stuff.class, "bar");\n' + + '23' + ); + }); + }); +});