11import { Token , Lexer , Parser } from 'chevrotain' ;
22
3- class SpaceT extends Token {
4- static PATTERN = / / ;
5- }
3+ const lexingSpec = {
4+ defaultMode : 'initMode' ,
5+ modes : { }
6+ } ;
67
78class WhiteSpaceT extends Token {
89 static PATTERN = / \s + / ;
@@ -12,65 +13,145 @@ class WhiteSpaceT extends Token {
1213
1314class LSquareT extends Token {
1415 static PATTERN = / \[ / ;
16+ // static POP_MODE = true;
17+ static PUSH_MODE = 'sectionHeaderMode' ;
1518}
1619
1720class RSquareT extends Token {
1821 static PATTERN = / ] / ;
22+ // static POP_MODE = true;
23+ static PUSH_MODE = 'sectionBodyMode' ;
1924}
2025
21- class EqualT extends Token {
22- static PATTERN = / = / ;
23- }
24-
25- class CommentT extends Token {
26- static PATTERN = / [ # ; ] .* $ / ;
26+ class SectionBodyLSquareT extends Token {
27+ static PATTERN = / \[ / ;
28+ static POP_MODE = true ;
2729}
2830
29- // alphanumeric and - and .
3031class SectionNameT extends Token {
3132 static PATTERN = / [ 0 - 9 a - z A - Z . - ] + / ;
3233}
3334
34- // quoted string with no newlines
35+ class SectionIncludeIfT extends Token {
36+ static PATTERN = / i n c l u d e I f / ;
37+ static LONGER_ALT = SectionNameT ;
38+ }
39+
40+ class SectionIncludeT extends Token {
41+ static PATTERN = / i n c l u d e / ;
42+ static LONGER_ALT = SectionIncludeIfT ;
43+ }
44+
3545class SubSectionNameT extends Token {
3646 static PATTERN = / " (?: [ ^ \\ " \n ] | \\ [ b n t " \\ ] ) * " / ;
3747}
3848
39- // alphanumeric with -, but must start with alphabetical
4049class VariableT extends Token {
41- static PATTERN = / [ a - z ] [ 0 - 9 a - z A - Z - ] + / ;
50+ static PATTERN = / [ a - z A - Z ] [ 0 - 9 a - z A - Z - ] + / ;
4251}
4352
44- // string with no space separation with \b, \n \t \\ \"
45- class ValueStringT extends Token {
46- static PATTERN = / (?: [ ^ \\ " \n ] | \\ [ b n t " \\ \n ] ) * / ;
53+ class EqualT extends Token {
54+ static PATTERN = / = / ;
55+ // static POP_MODE = true;
56+ static PUSH_MODE = 'valueMode' ;
4757}
4858
49- // value string with quotes
50- class ValueStringQuotedT extends Token {
59+ class QuotedStringT extends Token {
5160 static PATTERN = / " (?: [ ^ \\ " \n ] | \\ [ b n t " \\ \n ] ) * " / ;
5261}
5362
54- const allTokens = [
55- SpaceT ,
63+ class NewlineT extends Token {
64+ static PATTERN = / \n / ;
65+ static POP_MODE = true ;
66+ // static PUSH_MODE = 'sectionBodyMode';
67+ }
68+
69+ lexingSpec . modes . initMode = [
70+ WhiteSpaceT ,
71+ LSquareT // special initial LSquareT
72+ ] ;
73+
74+ // one way to do this is on encountering RSquareT
75+ // is to push into sectionBodyMode
76+ lexingSpec . modes . sectionHeaderMode = [
5677 WhiteSpaceT ,
57- LSquareT ,
5878 RSquareT ,
59- EqualT ,
60- CommentT ,
79+ SectionIncludeT ,
80+ SectionIncludeIfT ,
6181 SectionNameT ,
62- SubSectionNameT ,
63- VariableT ,
64- ValueStringT ,
65- ValueStringQuotedT
82+ SubSectionNameT
83+ ] ;
84+
85+ // then on encountering LSquareT
86+ // pop out of sectionBodyMode (and assume we are in sectionHeaderMode)
87+ // this requires a different kind of LSquareT from the original
88+ lexingSpec . modes . sectionBodyMode = [
89+ WhiteSpaceT ,
90+ EqualT ,
91+ SectionBodyLSquareT ,
92+ VariableT
93+ ] ;
94+
95+ lexingSpec . modes . valueMode = [
96+ NewlineT ,
97+ WhiteSpaceT ,
98+ QuotedStringT
6699] ;
67100
68- const lexer = new Lexer ( allTokens ) ;
101+ // a nonbackslashed newline pops out of the value mode
102+
103+ const lexer = new Lexer ( lexingSpec , {
104+ debug : true
105+ } ) ;
106+
107+ const result = lexer . tokenize ( fs . readFileSync ( './test.config' , 'utf8' ) ) ;
108+
109+ console . log ( result ) ;
110+
111+
112+ // i have an idea how to make this work
113+ // you have the starting state
114+ // and this has whitespacet and [ which transitions (pushes)
115+ // to section header mode
116+ // after ], it pushes into section body mode
117+ // if in section body mode it encounters [
118+ // then it pops out (thus into section header mode)
119+ // if it encounters a variable and then =
120+ // then it pushes into value mode
121+ // once value encounters \n, then it pops out into section body mode
122+ // section header <-> (transit by []) section body <-> (transit by \n) value
123+ // should make this a diagram
69124
70125class ConfigParser extends Parser {
71126 constructor ( input ) {
72127 super ( input , allTokens ) ;
128+ // main entry rule (non-terminal)
129+ this . RULE ( 'config' , ( ) => {
130+
131+
132+ // parse section names first
133+ // so we have a section and potential subsection
134+ // within it, we have various key = value entries
135+ // but also we need semantic actions for inclusions, which we need to expand in order
136+ // so we can continue parsing?
137+
138+
139+ } ) ;
140+
141+ this . RULE ( 'section' , ( ) => {
142+
143+ // does the existence of this allow the possibility of subsection as well?
144+
145+ } ) ;
146+
147+
148+
149+ Parser . performSelfAnalysis ( this ) ;
73150
151+ // a semantic action of encountering an inclusion is recursion
152+ // expand the inclusion and rerun the same parser on it
153+ // this should be possible by just pointing it at a non-terminal
154+ // however i'm not sure how to translate BNF grammars to chevrotain atm
74155 }
75156}
76157
0 commit comments