Skip to content
This repository was archived by the owner on Apr 8, 2020. It is now read-only.

Commit 1c614a6

Browse files
committed
WIP
1 parent effbd5e commit 1c614a6

File tree

2 files changed

+264
-36
lines changed

2 files changed

+264
-36
lines changed

lib/configParser.js

Lines changed: 109 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import { Token, Lexer, Parser } from 'chevrotain';
22

3-
class SpaceT extends Token {
4-
static PATTERN = / /;
5-
}
3+
const lexingSpec = {
4+
defaultMode: 'initMode',
5+
modes: {}
6+
};
67

78
class WhiteSpaceT extends Token {
89
static PATTERN = /\s+/;
@@ -12,65 +13,145 @@ class WhiteSpaceT extends Token {
1213

1314
class LSquareT extends Token {
1415
static PATTERN = /\[/;
16+
// static POP_MODE = true;
17+
static PUSH_MODE = 'sectionHeaderMode';
1518
}
1619

1720
class RSquareT extends Token {
1821
static PATTERN = /]/;
22+
// static POP_MODE = true;
23+
static PUSH_MODE = 'sectionBodyMode';
1924
}
2025

21-
class EqualT extends Token {
22-
static PATTERN = /=/;
23-
}
24-
25-
class CommentT extends Token {
26-
static PATTERN = /[#;].*$/;
26+
class SectionBodyLSquareT extends Token {
27+
static PATTERN = /\[/;
28+
static POP_MODE = true;
2729
}
2830

29-
// alphanumeric and - and .
3031
class SectionNameT extends Token {
3132
static PATTERN = /[0-9a-zA-Z.-]+/;
3233
}
3334

34-
// quoted string with no newlines
35+
class SectionIncludeIfT extends Token {
36+
static PATTERN = /includeIf/;
37+
static LONGER_ALT = SectionNameT;
38+
}
39+
40+
class SectionIncludeT extends Token {
41+
static PATTERN = /include/;
42+
static LONGER_ALT = SectionIncludeIfT;
43+
}
44+
3545
class SubSectionNameT extends Token {
3646
static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\])*"/;
3747
}
3848

39-
// alphanumeric with -, but must start with alphabetical
4049
class VariableT extends Token {
41-
static PATTERN = /[a-z][0-9a-zA-Z-]+/;
50+
static PATTERN = /[a-zA-Z][0-9a-zA-Z-]+/;
4251
}
4352

44-
// string with no space separation with \b, \n \t \\ \"
45-
class ValueStringT extends Token {
46-
static PATTERN = /(?:[^\\"\n]|\\[bnt"\\\n])*/;
53+
class EqualT extends Token {
54+
static PATTERN = /=/;
55+
// static POP_MODE = true;
56+
static PUSH_MODE = 'valueMode';
4757
}
4858

49-
// value string with quotes
50-
class ValueStringQuotedT extends Token {
59+
class QuotedStringT extends Token {
5160
static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\\n])*"/;
5261
}
5362

54-
const allTokens = [
55-
SpaceT,
63+
class NewlineT extends Token {
64+
static PATTERN = /\n/;
65+
static POP_MODE = true;
66+
// static PUSH_MODE = 'sectionBodyMode';
67+
}
68+
69+
lexingSpec.modes.initMode = [
70+
WhiteSpaceT,
71+
LSquareT // special initial LSquareT
72+
];
73+
74+
// one way to do this is on encountering RSquareT
75+
// is to push into sectionBodyMode
76+
lexingSpec.modes.sectionHeaderMode = [
5677
WhiteSpaceT,
57-
LSquareT,
5878
RSquareT,
59-
EqualT,
60-
CommentT,
79+
SectionIncludeT,
80+
SectionIncludeIfT,
6181
SectionNameT,
62-
SubSectionNameT,
63-
VariableT,
64-
ValueStringT,
65-
ValueStringQuotedT
82+
SubSectionNameT
83+
];
84+
85+
// then on encountering LSquareT
86+
// pop out of sectionBodyMode (and assume we are in sectionHeaderMode)
87+
// this requires a different kind of LSquareT from the original
88+
lexingSpec.modes.sectionBodyMode = [
89+
WhiteSpaceT,
90+
EqualT,
91+
SectionBodyLSquareT,
92+
VariableT
93+
];
94+
95+
lexingSpec.modes.valueMode = [
96+
NewlineT,
97+
WhiteSpaceT,
98+
QuotedStringT
6699
];
67100

68-
const lexer = new Lexer(allTokens);
101+
// a nonbackslashed newline pops out of the value mode
102+
103+
const lexer = new Lexer(lexingSpec, {
104+
debug: true
105+
});
106+
107+
const result = lexer.tokenize(fs.readFileSync('./test.config', 'utf8'));
108+
109+
console.log(result);
110+
111+
112+
// i have an idea how to make this work
113+
// you have the starting state
114+
// and this has whitespacet and [ which transitions (pushes)
115+
// to section header mode
116+
// after ], it pushes into section body mode
117+
// if in section body mode it encounters [
118+
// then it pops out (thus into section header mode)
119+
// if it encounters a variable and then =
120+
// then it pushes into value mode
121+
// once value encounters \n, then it pops out into section body mode
122+
// section header <-> (transit by []) section body <-> (transit by \n) value
123+
// should make this a diagram
69124

70125
class ConfigParser extends Parser {
71126
constructor (input) {
72127
super(input, allTokens);
128+
// main entry rule (non-terminal)
129+
this.RULE('config', () => {
130+
131+
132+
// parse section names first
133+
// so we have a section and potential subsection
134+
// within it, we have various key = value entries
135+
// but also we need semantic actions for inclusions, which we need to expand in order
136+
// so we can continue parsing?
137+
138+
139+
});
140+
141+
this.RULE('section', () => {
142+
143+
// does the existence of this allow the possibility of subsection as well?
144+
145+
});
146+
147+
148+
149+
Parser.performSelfAnalysis(this);
73150

151+
// a semantic action of encountering an inclusion is recursion
152+
// expand the inclusion and rerun the same parser on it
153+
// this should be possible by just pointing it at a non-terminal
154+
// however i'm not sure how to translate BNF grammars to chevrotain atm
74155
}
75156
}
76157

test.js

Lines changed: 155 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,159 @@
1-
import VirtualGit from './lib/VirtualGit.js';
2-
import vfs from 'virtualfs';
1+
import fs from 'fs';
2+
import { Token, Lexer, Parser } from 'chevrotain';
33

4-
const vgit = new VirtualGit(vfs);
4+
const lexingSpec = {
5+
defaultMode: 'topMode',
6+
modes: {}
7+
};
58

6-
// here we deal with these capabilities
7-
// but we can also use just a function
8-
// no bothering with version either
9+
class WhiteSpaceT extends Token {
10+
static PATTERN = /\s+/;
11+
static GROUP = Lexer.SKIPPED;
12+
static LINE_BREAKS = true;
13+
}
914

10-
const repo = vgit.init('/testrepo', {
11-
mode: 0o777
15+
class LSquareT extends Token {
16+
static PATTERN = /\[/;
17+
// static POP_MODE = true;
18+
static PUSH_MODE = 'sectionHeaderMode';
19+
}
20+
21+
class RSquareT extends Token {
22+
static PATTERN = /]/;
23+
// static POP_MODE = true;
24+
static PUSH_MODE = 'sectionBodyMode';
25+
}
26+
27+
class SectionBodyLSquareT extends Token {
28+
static PATTERN = /\[/;
29+
static POP_MODE = true;
30+
}
31+
32+
class SectionNameT extends Token {
33+
static PATTERN = /[0-9a-zA-Z.-]+/;
34+
}
35+
36+
class SectionIncludeIfT extends Token {
37+
static PATTERN = /includeIf/;
38+
static LONGER_ALT = SectionNameT;
39+
}
40+
41+
class SectionIncludeT extends Token {
42+
static PATTERN = /include/;
43+
static LONGER_ALT = SectionIncludeIfT;
44+
}
45+
46+
class SubSectionNameT extends Token {
47+
static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\])*"/;
48+
}
49+
50+
class VariableT extends Token {
51+
static PATTERN = /[a-zA-Z][0-9a-zA-Z-]+/;
52+
}
53+
54+
class EqualT extends Token {
55+
static PATTERN = /=/;
56+
// static POP_MODE = true;
57+
static PUSH_MODE = 'valueMode';
58+
}
59+
60+
class QuotedStringT extends Token {
61+
static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\\n])*"/;
62+
}
63+
64+
class NewlineT extends Token {
65+
static PATTERN = /\n/;
66+
static POP_MODE = true;
67+
// static PUSH_MODE = 'sectionBodyMode';
68+
}
69+
70+
lexingSpec.modes.topMode = [
71+
WhiteSpaceT,
72+
LSquareT
73+
];
74+
75+
// one way to do this is on encountering RSquareT
76+
// is to push into sectionBodyMode
77+
lexingSpec.modes.sectionHeaderMode = [
78+
WhiteSpaceT,
79+
RSquareT,
80+
SectionIncludeT,
81+
SectionIncludeIfT,
82+
SectionNameT,
83+
SubSectionNameT
84+
];
85+
86+
// then on encountering LSquareT
87+
// pop out of sectionBodyMode (and assume we are in sectionHeaderMode)
88+
// this requires a different kind of LSquareT from the original
89+
lexingSpec.modes.sectionBodyMode = [
90+
WhiteSpaceT,
91+
EqualT,
92+
SectionBodyLSquareT,
93+
VariableT
94+
];
95+
96+
lexingSpec.modes.valueMode = [
97+
NewlineT,
98+
WhiteSpaceT,
99+
QuotedStringT
100+
];
101+
102+
// a nonbackslashed newline pops out of the value mode
103+
104+
const lexer = new Lexer(lexingSpec, {
105+
debug: true
12106
});
107+
108+
const result = lexer.tokenize(fs.readFileSync('./test.config', 'utf8'));
109+
110+
console.log(result);
111+
112+
// i have an idea how to make this work
113+
// you have the starting state
114+
// and this has whitespacet and [ which transitions (pushes)
115+
// to section header mode
116+
// after ], it pushes into section body mode
117+
// if in section body mode it encounters [
118+
// then it pops out (thus into section header mode)
119+
// if it encounters a variable and then =
120+
// then it pushes into value mode
121+
// once value encounters \n, then it pops out into section body mode
122+
// section header <-> (transit by []) section body <-> (transit by \n) value
123+
124+
125+
// class EqualT extends Token {
126+
// static NAME = "EqualT";
127+
// static PATTERN = /=/;
128+
// }
129+
130+
// class CommentT extends Token {
131+
// static NAME = "CommentT";
132+
// static PATTERN = /[#;].+/;
133+
// static GROUP = "singleLineComments";
134+
// }
135+
136+
// class QuotedStringT extends Token {
137+
// static NAME = "QuotedStringT";
138+
// static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\\n])*"/;
139+
// }
140+
141+
// // alphanumeic string
142+
// class IdentifierT extends Token {
143+
// static NAME = "IdentifierT";
144+
// static PATTERN = /[0-9a-zA-Z.-]+/;
145+
// }
146+
147+
// class StringT extends Token {
148+
// static NAME = "StringT";
149+
// static PATTERN = /(?:[^\\"\n]|\\[bnt"\\])+/;
150+
// }
151+
152+
// // in the order of most specific to least specific
153+
// const lexer = new Lexer({
154+
155+
156+
// });
157+
158+
// the chevrotain lexer is stateless, only a single one per grammar should be created
159+
// how do i create contextual lexers, do we use lexing groups in some way?

0 commit comments

Comments
 (0)