Skip to content

Commit 72d7fde

Browse files
committed
Optimizer: Unescape ^ - { } when safe
1 parent f108923 commit 72d7fde

File tree

2 files changed

+153
-16
lines changed

2 files changed

+153
-16
lines changed

src/optimizer/transforms/__tests__/char-escape-unescape-transform-test.js

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,72 @@ describe('\e -> e', () => {
1818
});
1919

2020
it('preserve escape', () => {
21-
const re = transform(/\*\^\$\(\)\[\{\}\|/, [
21+
const re = transform(/\*\^\$\(\)\[\|/, [
2222
charUnescape,
2323
]);
24-
expect(re.toString()).toBe(/\*\^\$\(\)\[\{\}\|/.toString());
24+
expect(re.toString()).toBe(/\*\^\$\(\)\[\|/.toString());
25+
});
26+
27+
it('unescapes curly braces', () => {
28+
const re = transform(/\{\}/, [
29+
charUnescape,
30+
]);
31+
expect(re.toString()).toBe(/{}/.toString());
32+
});
33+
34+
it('does not unescape \{ when looking like a quantifier', () => {
35+
let re = transform(/a\{3}/, [
36+
charUnescape,
37+
]);
38+
expect(re.toString()).toBe(/a\{3}/.toString());
39+
40+
re = transform(/a\{3,}/, [
41+
charUnescape,
42+
]);
43+
expect(re.toString()).toBe(/a\{3,}/.toString());
44+
45+
re = transform(/a\{10,12}/, [
46+
charUnescape,
47+
]);
48+
expect(re.toString()).toBe(/a\{10,12}/.toString());
49+
});
50+
51+
it('does not unescape \} when looking like a quantifier', () => {
52+
let re = transform(/a{3\}/, [
53+
charUnescape,
54+
]);
55+
expect(re.toString()).toBe(/a{3\}/.toString());
56+
57+
re = transform(/a{3,\}/, [
58+
charUnescape,
59+
]);
60+
expect(re.toString()).toBe(/a{3,\}/.toString());
61+
62+
re = transform(/a{10,12\}/, [
63+
charUnescape,
64+
]);
65+
expect(re.toString()).toBe(/a{10,12\}/.toString());
2566
});
2667

2768
it('char class', () => {
2869
const re = transform(/[\e\*\(\]\ \^\$\-]\(\n/, [
2970
charUnescape,
3071
]);
31-
expect(re.toString()).toBe(/[e*(\] \^$\-]\(\n/.toString());
72+
expect(re.toString()).toBe(/[e*(\] ^$-]\(\n/.toString());
73+
});
74+
75+
it('does not unescape \^ in char class when in first position', () => {
76+
const re = transform(/[\^a]/, [
77+
charUnescape,
78+
]);
79+
expect(re.toString()).toBe(/[\^a]/.toString());
80+
});
81+
82+
it('does not unescape \- in char class when not in first or last position', () => {
83+
const re = transform(/[a\-z]/, [
84+
charUnescape,
85+
]);
86+
expect(re.toString()).toBe(/[a\-z]/.toString());
3287
});
3388

3489
});

src/optimizer/transforms/char-escape-unescape-transform.js

Lines changed: 95 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,30 +27,112 @@ module.exports = {
2727
};
2828

2929
function shouldUnescape(path) {
30-
const {node: {value}, parent} = path;
30+
const {node: {value}, index, parent} = path;
3131

3232
// In char class (, etc are allowed.
3333
if (parent.type !== 'CharacterClass') {
34-
return !preservesEscape(value);
34+
return !preservesEscape(value, index, parent);
3535
}
3636

37-
return !preservesInCharClass(value);
37+
return !preservesInCharClass(value, index, parent);
3838
}
3939

4040
/**
4141
* \], \\, \^, \-
42-
*
43-
* Note: \- always preserved to avoid `[a\-z]` turning into `[a-z]`.
44-
* Note: \^ always preserved to avoid `[\^a]` turning into `[^a]`.
45-
* TODO: more sophisticated analisys.
4642
*/
47-
function preservesInCharClass(value) {
48-
return /[\]\\^-]/.test(value);
43+
function preservesInCharClass(value, index, parent) {
44+
if (value === '^') {
45+
// Avoid [\^a] turning into [^a]
46+
return index === 0 && !parent.negative;
47+
}
48+
if (value === '-') {
49+
// Avoid [a\-z] turning into [a-z]
50+
return index !== 0 && index !== parent.expressions.length - 1;
51+
}
52+
return /[\]\\]/.test(value);
53+
}
54+
55+
function preservesEscape(value, index, parent) {
56+
if (value === '{') {
57+
return preservesOpeningCurlyBraceEscape(index, parent);
58+
}
59+
60+
if (value === '}') {
61+
return preservesClosingCurlyBraceEscape(index, parent);
62+
}
63+
64+
return /[*[\]()+?^$./\\|]/.test(value);
65+
}
66+
67+
function consumeNumbers(startIndex, parent, rtl) {
68+
let i = startIndex;
69+
let siblingNode = (rtl ? i >= 0 : i < parent.expressions.length) && parent.expressions[i];
70+
71+
while (
72+
siblingNode &&
73+
siblingNode.type === 'Char' &&
74+
siblingNode.kind === 'simple' &&
75+
!siblingNode.escaped &&
76+
/\d/.test(siblingNode.value)
77+
) {
78+
rtl ? i-- : i++;
79+
siblingNode = (rtl ? i >= 0 : i < parent.expressions.length) && parent.expressions[i];
80+
}
81+
82+
return Math.abs(startIndex - i);
83+
}
84+
85+
function isSimpleChar(node, value) {
86+
return node &&
87+
node.type === 'Char' &&
88+
node.kind === 'simple' &&
89+
!node.escaped &&
90+
node.value === value;
4991
}
5092

51-
// Note: \{ and \} are always preserved to avoid `a\{2\}` turning
52-
// into `a{2}`. TODO: more sophisticated analisys.
93+
function preservesOpeningCurlyBraceEscape(index, parent) {
94+
let nbFollowingNumbers = consumeNumbers(index + 1, parent);
95+
let i = index + nbFollowingNumbers + 1;
96+
let nextSiblingNode = i < parent.expressions.length && parent.expressions[i];
5397

54-
function preservesEscape(value) {
55-
return /[*\[\]()+?^$.\/\\\{\}\|]/.test(value);
98+
if (nbFollowingNumbers) {
99+
100+
// Avoid \{3} turning into {3}
101+
if (isSimpleChar(nextSiblingNode, '}')) {
102+
return true;
103+
}
104+
105+
if (isSimpleChar(nextSiblingNode, ',')) {
106+
107+
nbFollowingNumbers = consumeNumbers(i + 1, parent);
108+
i = i + nbFollowingNumbers + 1;
109+
nextSiblingNode = i < parent.expressions.length && parent.expressions[i];
110+
111+
// Avoid \{3,} turning into {3,}
112+
return isSimpleChar(nextSiblingNode, '}');
113+
}
114+
}
115+
return false;
116+
}
117+
118+
function preservesClosingCurlyBraceEscape(index, parent) {
119+
let nbPrecedingNumbers = consumeNumbers(index - 1, parent, true);
120+
let i = index - nbPrecedingNumbers - 1;
121+
let previousSiblingNode = i >= 0 && parent.expressions[i];
122+
123+
// Avoid {3\} turning into {3}
124+
if (nbPrecedingNumbers && isSimpleChar(previousSiblingNode, '{')) {
125+
return true;
126+
}
127+
128+
if (isSimpleChar(previousSiblingNode, ',')) {
129+
130+
nbPrecedingNumbers = consumeNumbers(i - 1, parent, true);
131+
i = i - nbPrecedingNumbers - 1;
132+
previousSiblingNode = i < parent.expressions.length && parent.expressions[i];
133+
134+
// Avoid {3,\} turning into {3,}
135+
return nbPrecedingNumbers && isSimpleChar(previousSiblingNode, '{');
136+
}
137+
return false;
56138
}

0 commit comments

Comments
 (0)