Skip to content
This repository was archived by the owner on Apr 8, 2020. It is now read-only.

Commit 5050a47

Browse files
committed
WIP
1 parent da4a864 commit 5050a47

File tree

3 files changed

+78
-161
lines changed

3 files changed

+78
-161
lines changed

lib/configLexer.js

Lines changed: 43 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,39 @@
11
import { Token, Lexer } from 'chevrotain';
22

3-
// \s without \n
4-
const lineSpaceRegex = '[ \f\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]';
5-
6-
const lexingSpec = {
3+
const lexicalGrammar = {
74
defaultMode: 'initMode',
85
modes: {}
96
};
107

118
class WhiteSpaceT extends Token {
12-
static PATTERN = /\s+/;
9+
static PATTERN = /[^\S\r\n]+/;
1310
static GROUP = Lexer.SKIPPED;
11+
}
12+
13+
class EndOfLineT extends Token {
14+
static PATTERN = /(?:\r\n|\n)+/;
1415
static LINE_BREAKS = true;
16+
static GROUP = Lexer.SKIPPED;
17+
}
18+
19+
class CommentT extends Token {
20+
static PATTERN = /[#;].*/;
21+
static GROUP = Lexer.SKIPPED;
1522
}
1623

17-
class InitLSquareT extends Token {
24+
class HeaderEnterT extends Token {
1825
static PATTERN = /\[/;
1926
static PUSH_MODE = 'headerMode';
2027
}
2128

22-
lexingSpec.modes.initMode = [
29+
lexicalGrammar.modes.initMode = [
2330
WhiteSpaceT,
24-
InitLSquareT
31+
EndOfLineT,
32+
CommentT,
33+
HeaderEnterT
2534
];
2635

27-
class RSquareT extends Token {
36+
class BodyEnterT extends Token {
2837
static PATTERN = /]/;
2938
static PUSH_MODE = 'bodyMode';
3039
}
@@ -44,80 +53,73 @@ class HeaderIncludeT extends Token {
4453
}
4554

4655
class HeaderSubNameT extends Token {
47-
static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\])*"/;
56+
static PATTERN = /"(?:[^\\"\r\n)]|\\[bnt"\\])*"/;
4857
}
4958

50-
lexingSpec.modes.headerMode = [
59+
lexicalGrammar.modes.headerMode = [
5160
WhiteSpaceT,
52-
RSquareT,
61+
BodyEnterT,
5362
HeaderIncludeIfT,
5463
HeaderIncludeT,
5564
HeaderNameT,
5665
HeaderSubNameT
5766
];
5867

59-
class LSquareT extends Token {
68+
class BodyExitT extends Token {
6069
static PATTERN = /\[/;
6170
static POP_MODE = true;
6271
}
6372

64-
// must ignore leading spaces
65-
class BodyEqualT extends Token {
66-
static PATTERN = new RegExp('=' + lineSpaceRegex + '*');
67-
static PUSH_MODE = 'valueMode';
68-
}
69-
7073
class BodyKeyT extends Token {
7174
static PATTERN = /[a-zA-Z][a-zA-Z0-9-]*/;
7275
}
7376

74-
lexingSpec.modes.bodyMode = [
77+
class ValueEnterT extends Token {
78+
static PATTERN = /=[^\S\r\n]*/;
79+
static PUSH_MODE = 'valueMode';
80+
}
81+
82+
lexicalGrammar.modes.bodyMode = [
7583
WhiteSpaceT,
76-
LSquareT,
84+
EndOfLineT,
85+
CommentT,
86+
BodyExitT,
7787
BodyKeyT,
78-
BodyEqualT
88+
ValueEnterT
7989
];
8090

8191
class ValueLineContinuationT extends Token {
82-
static PATTERN = /\\\n/;
92+
static PATTERN = /(?:\\\r\n|\\\n)/;
8393
static LINE_BREAKS = true;
8494
}
8595

86-
class NewlineT extends Token {
87-
static PATTERN = /\n/;
96+
class ValueExitT extends Token {
97+
static PATTERN = /(?:\r\n|\n)+/;
8898
static POP_MODE = true;
8999
static LINE_BREAKS = true;
90100
}
91101

92102
class ValueSpaceT extends Token {
93-
static PATTERN = new RegExp(lineSpaceRegex + '+');
94-
}
95-
96-
class ValueTrailingSpaceT extends Token {
97-
static PATTERN = new RegExp(lineSpaceRegex + '+(?=\n)');
98-
static GROUP = Lexer.SKIPPED;
103+
static PATTERN = /[^\S\r\n]+(?=\b|"|\\)/;
99104
}
100105

101106
class ValueStringT extends Token {
102107
static PATTERN = /(?:[^\\"\s]|\\[bnt"\\])+/;
103108
}
104109

105110
class ValueQuotedStringT extends Token {
106-
static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\\n])*"/;
111+
static PATTERN = /"(?:[^\\"\r\n]|\\(?:\r\n|[bnt"\\\n]))*"/;
107112
static LINE_BREAKS = true;
108113
}
109114

110-
lexingSpec.modes.valueMode = [
111-
ValueLineContinuationT,
112-
NewlineT,
113-
ValueTrailingSpaceT,
115+
lexicalGrammar.modes.valueMode = [
116+
CommentT,
114117
ValueSpaceT,
118+
WhiteSpaceT,
119+
ValueLineContinuationT,
120+
ValueExitT,
115121
ValueStringT,
116122
ValueQuotedStringT
117123
];
118124

119-
const lexer = new Lexer(lexingSpec, {
120-
debug: true
121-
});
122-
123-
export default lexer;
125+
export default lexicalGrammar;

test.js

Lines changed: 11 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1,129 +1,20 @@
11
import fs from 'fs';
2-
import { Token, Lexer, Parser } from 'chevrotain';
2+
import { Lexer } from 'chevrotain';
3+
import lexingGrammar from './lib/configLexer.js';
34

4-
// \s without \n
5-
const lineSpaceRegex = '[ \f\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]';
6-
7-
const lexingSpec = {
8-
defaultMode: 'initMode',
9-
modes: {}
10-
};
11-
12-
class WhiteSpaceT extends Token {
13-
static PATTERN = /\s+/;
14-
static GROUP = Lexer.SKIPPED;
15-
static LINE_BREAKS = true;
16-
}
17-
18-
class InitLSquareT extends Token {
19-
static PATTERN = /\[/;
20-
static PUSH_MODE = 'headerMode';
21-
}
22-
23-
lexingSpec.modes.initMode = [
24-
WhiteSpaceT,
25-
InitLSquareT
26-
];
27-
28-
class RSquareT extends Token {
29-
static PATTERN = /]/;
30-
static PUSH_MODE = 'bodyMode';
31-
}
32-
33-
class HeaderNameT extends Token {
34-
static PATTERN = /[0-9a-zA-Z.-]+/;
35-
}
36-
37-
class HeaderIncludeIfT extends Token {
38-
static PATTERN = /includeIf/;
39-
static LONGER_ALT = HeaderNameT;
40-
}
41-
42-
class HeaderIncludeT extends Token {
43-
static PATTERN = /include/;
44-
static LONGER_ALT = HeaderNameT;
45-
}
46-
47-
class HeaderSubNameT extends Token {
48-
static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\])*"/;
49-
}
50-
51-
lexingSpec.modes.headerMode = [
52-
WhiteSpaceT,
53-
RSquareT,
54-
HeaderIncludeIfT,
55-
HeaderIncludeT,
56-
HeaderNameT,
57-
HeaderSubNameT
58-
];
59-
60-
class LSquareT extends Token {
61-
static PATTERN = /\[/;
62-
static POP_MODE = true;
63-
}
64-
65-
// must ignore leading spaces
66-
class BodyEqualT extends Token {
67-
static PATTERN = new RegExp('=' + lineSpaceRegex + '*');
68-
static PUSH_MODE = 'valueMode';
69-
}
70-
71-
class BodyKeyT extends Token {
72-
static PATTERN = /[a-zA-Z][a-zA-Z0-9-]*/;
73-
}
74-
75-
lexingSpec.modes.bodyMode = [
76-
WhiteSpaceT,
77-
LSquareT,
78-
BodyKeyT,
79-
BodyEqualT
80-
];
81-
82-
class ValueLineContinuation extends Token {
83-
static PATTERN = /\\\n/;
84-
static LINE_BREAKS = true;
85-
}
86-
87-
class NewlineT extends Token {
88-
static PATTERN = /\n/;
89-
static POP_MODE = true;
90-
static LINE_BREAKS = true;
91-
}
92-
93-
class ValueSpaceT extends Token {
94-
static PATTERN = new RegExp(lineSpaceRegex + '+');
95-
}
96-
97-
class ValueTrailingSpaceT extends Token {
98-
static PATTERN = new RegExp(lineSpaceRegex + '+(?=\n)');
99-
static GROUP = Lexer.SKIPPED;
100-
}
101-
102-
class ValueStringT extends Token {
103-
static PATTERN = /(?:[^\\"\s]|\\[bnt"\\])+/;
104-
}
105-
106-
class ValueQuotedStringT extends Token {
107-
static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\\n])*"/;
108-
static LINE_BREAKS = true;
109-
}
110-
111-
lexingSpec.modes.valueMode = [
112-
ValueLineContinuation,
113-
NewlineT,
114-
ValueTrailingSpaceT,
115-
ValueSpaceT,
116-
ValueStringT,
117-
ValueQuotedStringT
118-
];
119-
120-
const lexer = new Lexer(lexingSpec, {
5+
const lexer = new Lexer(lexingGrammar, {
1216
debug: true
1227
});
1238

124-
const result = lexer.tokenize(fs.readFileSync('./test.config', 'utf8'));
9+
const text = fs.readFileSync('./test.config', 'utf8');
10+
const results = lexer.tokenize(text);
11+
12+
results.tokens = results.tokens.map((token) => {
13+
return [token.image, token.tokenClassName];
14+
});
12515

126-
console.log(result);
16+
console.log(text);
17+
console.log(results);
12718

12819
// class CommentT extends Token {
12920
// static NAME = "CommentT";

test/configLexing.js

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import test from 'ava';
2+
3+
// test tests require a common language, and we are testing its token vector actually
4+
// so instead of testing against the class name, we can check against the token image itself
5+
6+
test('section headers', t => {
7+
8+
});
9+
10+
test('subsection headers', t => {
11+
12+
});
13+
14+
test('trailing whitespace', t => {
15+
16+
});
17+
18+
test('comments', t => {
19+
20+
});
21+
22+
test('newline and carriage returns', t => {
23+
24+
});

0 commit comments

Comments
 (0)