diff --git a/package-lock.json b/package-lock.json
index ff8814b9f3..96acf60f84 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -3283,9 +3283,9 @@
}
},
"should-util": {
- "version": "1.0.0",
- "resolved": "https://registry.npmjs.org/should-util/-/should-util-1.0.0.tgz",
- "integrity": "sha1-yYzaN0qmsZDfi6h8mInCtNtiAGM=",
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/should-util/-/should-util-1.0.1.tgz",
+ "integrity": "sha512-oXF8tfxx5cDk8r2kYqlkUJzZpDBqVY/II2WhvU0n9Y3XYvAYRmeaf1PvvIvTgPnv4KJ+ES5M0PyDq5Jp+Ygy2g==",
"dev": true
},
"sigmund": {
diff --git a/package.json b/package.json
index 90233e270f..83ce33c3bd 100644
--- a/package.json
+++ b/package.json
@@ -32,7 +32,8 @@
"test": "mocha test",
"test-markup": "mocha test/markup",
"test-detect": "mocha test/detect",
- "test-browser": "mocha test/browser"
+ "test-browser": "mocha test/browser",
+ "test-parser": "mocha test/parser"
},
"engines": {
"node": "*"
diff --git a/src/highlight.js b/src/highlight.js
index 53026738c6..cbd0ae9fe5 100644
--- a/src/highlight.js
+++ b/src/highlight.js
@@ -498,20 +498,13 @@ const HLJS = function(hljs) {
// considered for a potential match
resumeScanAtSamePosition = false;
} else {
- top.matcher.lastIndex = index;
top.matcher.considerAll();
}
+ top.matcher.lastIndex = index;
const match = top.matcher.exec(codeToHighlight);
// console.log("match", match[0], match.rule && match.rule.begin)
- // if our failure to match was the result of a "resumed scan" then we
- // need to advance one position and revert to full scanning before we
- // decide there are truly no more matches at all to be had
- if (!match && top.matcher.resumingScanAtSamePosition()) {
- advanceOne();
- continue;
- }
if (!match) break;
const beforeMatch = codeToHighlight.substring(index, match.index);
diff --git a/src/lib/mode_compiler.js b/src/lib/mode_compiler.js
index 56c0c20c82..70837c0a0f 100644
--- a/src/lib/mode_compiler.js
+++ b/src/lib/mode_compiler.js
@@ -143,7 +143,7 @@ export function compileLanguage(language) {
}
resumingScanAtSamePosition() {
- return this.regexIndex != 0;
+ return this.regexIndex !== 0;
}
considerAll() {
@@ -160,15 +160,58 @@ export function compileLanguage(language) {
exec(s) {
const m = this.getMatcher(this.regexIndex);
m.lastIndex = this.lastIndex;
- const result = m.exec(s);
+ let result = m.exec(s);
+
+ // The following is because we have no easy way to say "resume scanning at the
+ // existing position but also skip the current rule ONLY". What happens is
+ // all prior rules are also skipped which can result in matching the wrong
+ // thing. Example of matching "booger":
+
+ // our matcher is [string, "booger", number]
+ //
+ // ....booger....
+
+ // if "booger" is ignored then we'd really need a regex to scan from the
+ // SAME position for only: [string, number] but ignoring "booger" (if it
+ // was the first match), a simple resume would scan ahead who knows how
+ // far looking only for "number", ignoring potential string matches (or
+ // future "booger" matches that might be valid.)
+
+ // So what we do: We execute two matchers, one resuming at the same
+ // position, but the second full matcher starting at the position after:
+
+ // /--- resume first regex match here (for [number])
+ // |/---- full match here for [string, "booger", number]
+ // vv
+ // ....booger....
+
+ // Which ever results in a match first is then used. So this 3-4 step
+ // process essentially allows us to say "match at this position, excluding
+ // a prior rule that was ignored".
+ //
+ // 1. Match "booger" first, ignore. Also proves that [string] does non match.
+ // 2. Resume matching for [number]
+ // 3. Match at index + 1 for [string, "booger", number]
+ // 4. If #2 and #3 result in matches, which came first?
+ if (this.resumingScanAtSamePosition()) {
+ if (result && result.index === this.lastIndex) {
+ // result is position +0 and therefore a valid
+ // "resume" match so result stays result
+ } else { // use the second matcher result
+ const m2 = this.getMatcher(0);
+ m2.lastIndex = this.lastIndex + 1;
+ result = m2.exec(s);
+ }
+ }
+
if (result) {
this.regexIndex += result.position + 1;
- if (this.regexIndex === this.count) { // wrap-around
- this.regexIndex = 0;
+ if (this.regexIndex === this.count) {
+ // wrap-around to considering all matches again
+ this.considerAll();
}
}
- // this.regexIndex = 0;
return result;
}
}
diff --git a/test/index.js b/test/index.js
index 4fbf3b2edb..57923ed69c 100644
--- a/test/index.js
+++ b/test/index.js
@@ -1,6 +1,6 @@
'use strict';
-const hljs = require('../build');
+const hljs = require('../build');
hljs.debugMode(); // tests run in debug mode so errors are raised
// Tests specific to the API exposed inside the hljs object.
@@ -8,7 +8,7 @@ hljs.debugMode(); // tests run in debug mode so errors are raised
require('./api');
// Test weird bugs we've fixed over time
-require("./parser")
+require("./parser");
// Tests for auto detection of languages via `highlightAuto`.
require('./detect');
@@ -26,4 +26,3 @@ require('./markup');
// isn't actually used to test inside a browser but `jsdom` acts as a virtual
// browser inside of node.js and runs together with all the other tests.
require('./special');
-
diff --git a/test/parser/index.js b/test/parser/index.js
index 8763b67b7a..cb36144839 100644
--- a/test/parser/index.js
+++ b/test/parser/index.js
@@ -1,5 +1,7 @@
'use strict';
+const should = require('should');
+
describe('hljs', function() {
require('./look-ahead-end-matchers');
require('./resume-scan');
diff --git a/test/parser/resume-scan.js b/test/parser/resume-scan.js
index 9ed2bb9120..fb50a47f37 100644
--- a/test/parser/resume-scan.js
+++ b/test/parser/resume-scan.js
@@ -1,13 +1,26 @@
-const hljs = require('../../build');
+'use strict';
-describe("bugs", function () {
+const hljs = require('../../build');
+hljs.debugMode(); // tests run in debug mode so errors are raised
+describe("bugs", function() {
describe("resume scan when a match is ignored", () => {
it("should continue to highlight later matches", () => {
- hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar")').value
- .should.equal(
- 'ImmutablePair.of(Stuff.class, "bar")'
- )
- })
- })
-})
+ const result = hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar")');
+ result.value.should.equal(
+ 'ImmutablePair.of(Stuff.class, "bar")'
+ );
+ });
+ // previously the match rule was resumed but it would scan ahead too far and ignore
+ // later matches that matched the PRIOR rules... this is because when we "skip" (ignore) a
+ // rule we really only want to skip searching for THAT rule at that same location, we
+ // do not want to stop searching for ALL the prior rules at that location...
+ it("BUT should not skip ahead too far", () => {
+ const result = hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar");\n23');
+ result.value.should.equal(
+ 'ImmutablePair.of(Stuff.class, "bar");\n' +
+ '23'
+ );
+ });
+ });
+});