Skip to content

Commit 201b056

Browse files
Improved canReorder by factoring out prefixes and suffixes
Fixes #2
1 parent 325a8fa commit 201b056

File tree

2 files changed

+120
-46
lines changed

2 files changed

+120
-46
lines changed

src/reorder.ts

Lines changed: 100 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import {
66
getMatchingDirection,
77
hasSomeDescendant,
88
isEmptyBackreference,
9-
isPotentiallyZeroLength,
109
MatchingDirection,
1110
OptionalMatchingDirection,
1211
} from "./basic";
@@ -233,31 +232,121 @@ function canReorderBasedOnLength(slice: readonly Alternative[]): boolean {
233232
* Returns whether alternatives can be reordered because the characters
234233
* consumed.
235234
*
236-
* If the given alternatives are preceded and followed by characters not
237-
* consumed by the alternatives, then the order order of the alternatives
238-
* doesn't matter.
235+
* If the given alternatives are followed (in their current matching direction)
236+
* by characters not consumed by the alternatives, then the order order of the
237+
* alternatives doesn't matter.
238+
*
239+
* Furthermore, we can factor out common prefixes and suffixes. E.g. checking
240+
* whether `(?:foobar|footwear)` can be reordered is the same as checking
241+
* whether `foo(?:b|twe)ar` can be reordered. Using this idea, we can narrow
242+
* down the consumed characters and find additional characters that also have
243+
* to be disjoint with the consumed characters.
239244
*/
240245
function canReorderBasedOnConsumedChars(
241246
slice: readonly Alternative[],
242247
direction: MatchingDirection,
243248
flags: ReadonlyFlags
244249
): boolean {
245-
// we assume that at least one character is consumed in each alternative
246-
if (slice.some(isPotentiallyZeroLength)) {
247-
return false;
250+
const factoredOut = factorOutCommon(
251+
slice.map(a => a.elements),
252+
flags
253+
);
254+
255+
const elements: Element[] = [];
256+
for (const alternative of factoredOut.rest) {
257+
elements.push(...alternative);
258+
}
259+
260+
const consumedChars = Chars.empty(flags).union(...elements.map(e => getConsumedChars(e, flags)));
261+
262+
// we first check all suffix characters because we get them for free when factoring out.
263+
const suffix = direction === "ltr" ? factoredOut.right : factoredOut.left;
264+
if (suffix.some(cs => cs.isDisjointWith(consumedChars))) {
265+
return true;
248266
}
249267

268+
// now we check the character after the parent of the given alternatives
250269
const parent = slice[0].parent;
251270
if (parent.type === "Pattern" || parent.type === "Assertion") {
252271
return false;
253272
}
254273

255-
const consumedChars = Chars.empty(flags).union(...slice.map(a => getConsumedChars(a, flags)));
256-
257-
// If we know the current direction, then it is enough to prove that the char after the alternatives is
258-
// different from the chars that could possibly be consumed by the alternatives.
259274
return getFirstCharAfter(parent, direction, flags).char.isDisjointWith(consumedChars);
260275
}
276+
interface FactoredOut {
277+
left: CharSet[];
278+
right: CharSet[];
279+
rest: readonly (readonly Element[])[];
280+
}
281+
function factorOutCommon(alternatives: readonly (readonly Element[])[], flags: ReadonlyFlags): FactoredOut {
282+
const prefix = factorOutCommonPrefix(alternatives, "ltr", flags);
283+
const suffix = factorOutCommonPrefix(prefix.rest, "rtl", flags);
284+
return { left: prefix.prefix, right: suffix.prefix, rest: suffix.rest };
285+
}
286+
interface FactoredOutPrefix {
287+
prefix: CharSet[];
288+
rest: readonly (readonly Element[])[];
289+
}
290+
function factorOutCommonPrefix(
291+
alternatives: readonly (readonly Element[])[],
292+
direction: MatchingDirection,
293+
flags: ReadonlyFlags
294+
): FactoredOutPrefix {
295+
const prefix = getLongestPureCharPrefix(alternatives, direction, flags);
296+
if (prefix.length === 0) {
297+
return { prefix, rest: alternatives };
298+
} else {
299+
// remove prefix
300+
return {
301+
prefix,
302+
rest: alternatives.map(elements => {
303+
const start = direction === "ltr" ? prefix.length : 0;
304+
const end = direction === "ltr" ? elements.length : elements.length - prefix.length;
305+
return elements.slice(start, end);
306+
}),
307+
};
308+
}
309+
}
310+
function getLongestPureCharPrefix(
311+
alternatives: readonly (readonly Element[])[],
312+
direction: MatchingDirection,
313+
flags: ReadonlyFlags
314+
): CharSet[] {
315+
const prefix: CharSet[] = [];
316+
317+
for (let i = 0; ; i++) {
318+
let char: CharSet | null = null;
319+
320+
for (const elements of alternatives) {
321+
const current = direction === "ltr" ? i : elements.length - 1 - i;
322+
323+
if (i >= 0 && i < elements.length) {
324+
const element = elements[current];
325+
switch (element.type) {
326+
case "Character":
327+
case "CharacterClass":
328+
case "CharacterSet":
329+
if (char === null) {
330+
char = toCharSet(element, flags);
331+
} else {
332+
if (!char.equals(toCharSet(element, flags))) {
333+
return prefix;
334+
}
335+
}
336+
break;
337+
338+
default:
339+
return prefix;
340+
}
341+
} else {
342+
return prefix;
343+
}
344+
}
345+
346+
if (char === null) throw new Error();
347+
prefix.push(char);
348+
}
349+
}
261350

262351
/**
263352
* Returns the smallest slice of alternatives that contains all given

tests/__snapshots__/reorder.ts.snap

Lines changed: 20 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,10 @@ Object {
9696
"js|jso?n?": Object {
9797
"dir:ltr ignoreCG:false": true,
9898
"dir:ltr ignoreCG:true ": true,
99-
"dir:rtl ignoreCG:false": false,
100-
"dir:rtl ignoreCG:true ": false,
101-
"dir:unknown ignoreCG:false": false,
102-
"dir:unknown ignoreCG:true ": false,
99+
"dir:rtl ignoreCG:false": true,
100+
"dir:rtl ignoreCG:true ": true,
101+
"dir:unknown ignoreCG:false": true,
102+
"dir:unknown ignoreCG:true ": true,
103103
},
104104
},
105105
}
@@ -166,10 +166,10 @@ Object {
166166
"yml|ya?ml": Object {
167167
"dir:ltr ignoreCG:false": true,
168168
"dir:ltr ignoreCG:true ": true,
169-
"dir:rtl ignoreCG:false": false,
170-
"dir:rtl ignoreCG:true ": false,
171-
"dir:unknown ignoreCG:false": false,
172-
"dir:unknown ignoreCG:true ": false,
169+
"dir:rtl ignoreCG:false": true,
170+
"dir:rtl ignoreCG:true ": true,
171+
"dir:unknown ignoreCG:false": true,
172+
"dir:unknown ignoreCG:true ": true,
173173
},
174174
},
175175
}
@@ -179,12 +179,12 @@ exports[`canReorder /(?:yml|ya?ml)/ 1`] = `
179179
Object {
180180
"(?:yml|ya?ml)": Object {
181181
"yml|ya?ml": Object {
182-
"dir:ltr ignoreCG:false": false,
183-
"dir:ltr ignoreCG:true ": false,
184-
"dir:rtl ignoreCG:false": false,
185-
"dir:rtl ignoreCG:true ": false,
186-
"dir:unknown ignoreCG:false": false,
187-
"dir:unknown ignoreCG:true ": false,
182+
"dir:ltr ignoreCG:false": true,
183+
"dir:ltr ignoreCG:true ": true,
184+
"dir:rtl ignoreCG:false": true,
185+
"dir:rtl ignoreCG:true ": true,
186+
"dir:unknown ignoreCG:false": true,
187+
"dir:unknown ignoreCG:true ": true,
188188
},
189189
},
190190
}
@@ -279,8 +279,8 @@ exports[`canReorder /\\d*\\.\\d+_|\\d+\\.\\d*_/ 1`] = `
279279
Object {
280280
"\\\\d*\\\\.\\\\d+_|\\\\d+\\\\.\\\\d*_": Object {
281281
"\\\\d*\\\\.\\\\d+_|\\\\d+\\\\.\\\\d*_": Object {
282-
"dir:ltr ignoreCG:false": false,
283-
"dir:ltr ignoreCG:true ": false,
282+
"dir:ltr ignoreCG:false": true,
283+
"dir:ltr ignoreCG:true ": true,
284284
"dir:rtl ignoreCG:false": false,
285285
"dir:rtl ignoreCG:true ": false,
286286
"dir:unknown ignoreCG:false": false,
@@ -350,21 +350,6 @@ Object {
350350
}
351351
`;
352352

353-
exports[`canReorder /\\w|abc|123|_|[A-Z]|\\$| / 2`] = `
354-
Object {
355-
"\\\\w|abc|123|_|[A-Z]|\\\\$| ": Object {
356-
"\\\\w|abc|123|_|[A-Z]|\\\\$| ": Object {
357-
"dir:ltr ignoreCG:false": false,
358-
"dir:ltr ignoreCG:true ": false,
359-
"dir:rtl ignoreCG:false": false,
360-
"dir:rtl ignoreCG:true ": false,
361-
"dir:unknown ignoreCG:false": false,
362-
"dir:unknown ignoreCG:true ": false,
363-
},
364-
},
365-
}
366-
`;
367-
368353
exports[`canReorder /0|(1)|2|(3)/ 1`] = `
369354
Object {
370355
"0|(1)|2|(3)": Object {
@@ -429,8 +414,8 @@ exports[`canReorder /A+_|A*_/ 1`] = `
429414
Object {
430415
"A+_|A*_": Object {
431416
"A+_|A*_": Object {
432-
"dir:ltr ignoreCG:false": false,
433-
"dir:ltr ignoreCG:true ": false,
417+
"dir:ltr ignoreCG:false": true,
418+
"dir:ltr ignoreCG:true ": true,
434419
"dir:rtl ignoreCG:false": false,
435420
"dir:rtl ignoreCG:true ": false,
436421
"dir:unknown ignoreCG:false": false,
@@ -446,8 +431,8 @@ Object {
446431
"FOO|foo(?:bar)?": Object {
447432
"dir:ltr ignoreCG:false": false,
448433
"dir:ltr ignoreCG:true ": false,
449-
"dir:rtl ignoreCG:false": false,
450-
"dir:rtl ignoreCG:true ": false,
434+
"dir:rtl ignoreCG:false": true,
435+
"dir:rtl ignoreCG:true ": true,
451436
"dir:unknown ignoreCG:false": false,
452437
"dir:unknown ignoreCG:true ": false,
453438
},

0 commit comments

Comments
 (0)