WIP

CMCDragonkai · CMCDragonkai · commit 1c614a6aff3d · 2017-11-14T02:38:58.000+11:00
diff --git a/lib/configParser.js b/lib/configParser.js
@@ -1,8 +1,9 @@
 import { Token, Lexer, Parser } from 'chevrotain';
 
-class SpaceT extends Token {
-  static PATTERN = / /;
-}
+const lexingSpec = {
+  defaultMode: 'initMode',
+  modes: {}
+};
 
 class WhiteSpaceT extends Token {
   static PATTERN = /\s+/;
@@ -12,65 +13,145 @@ class WhiteSpaceT extends Token {
 
 class LSquareT extends Token {
   static PATTERN = /\[/;
+  // static POP_MODE = true;
+  static PUSH_MODE = 'sectionHeaderMode';
 }
 
 class RSquareT extends Token {
   static PATTERN = /]/;
+  // static POP_MODE = true;
+  static PUSH_MODE = 'sectionBodyMode';
 }
 
-class EqualT extends Token {
-  static PATTERN = /=/;
-}
-
-class CommentT extends Token {
-  static PATTERN = /[#;].*$/;
+class SectionBodyLSquareT extends Token {
+  static PATTERN = /\[/;
+  static POP_MODE = true;
 }
 
-// alphanumeric and - and .
 class SectionNameT extends Token {
   static PATTERN = /[0-9a-zA-Z.-]+/;
 }
 
-// quoted string with no newlines
+class SectionIncludeIfT extends Token {
+  static PATTERN = /includeIf/;
+  static LONGER_ALT = SectionNameT;
+}
+
+class SectionIncludeT extends Token {
+  static PATTERN = /include/;
+  static LONGER_ALT = SectionIncludeIfT;
+}
+
 class SubSectionNameT extends Token {
   static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\])*"/;
 }
 
-// alphanumeric with -, but must start with alphabetical
 class VariableT extends Token {
-  static PATTERN = /[a-z][0-9a-zA-Z-]+/;
+  static PATTERN = /[a-zA-Z][0-9a-zA-Z-]+/;
 }
 
-// string with no space separation with \b, \n \t \\ \"
-class ValueStringT extends Token {
-  static PATTERN = /(?:[^\\"\n]|\\[bnt"\\\n])*/;
+class EqualT extends Token {
+  static PATTERN = /=/;
+  // static POP_MODE = true;
+  static PUSH_MODE = 'valueMode';
 }
 
-// value string with quotes
-class ValueStringQuotedT extends Token {
+class QuotedStringT extends Token {
   static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\\n])*"/;
 }
 
-const allTokens = [
-  SpaceT,
+class NewlineT extends Token {
+  static PATTERN = /\n/;
+  static POP_MODE = true;
+  // static PUSH_MODE = 'sectionBodyMode';
+}
+
+lexingSpec.modes.initMode = [
+  WhiteSpaceT,
+  LSquareT // special initial LSquareT
+];
+
+// one way to do this is on encountering RSquareT
+// is to push into sectionBodyMode
+lexingSpec.modes.sectionHeaderMode = [
   WhiteSpaceT,
-  LSquareT,
   RSquareT,
-  EqualT,
-  CommentT,
+  SectionIncludeT,
+  SectionIncludeIfT,
   SectionNameT,
-  SubSectionNameT,
-  VariableT,
-  ValueStringT,
-  ValueStringQuotedT
+  SubSectionNameT
+];
+
+// then on encountering LSquareT
+// pop out of sectionBodyMode (and assume we are in sectionHeaderMode)
+// this requires a different kind of LSquareT from the original
+lexingSpec.modes.sectionBodyMode = [
+  WhiteSpaceT,
+  EqualT,
+  SectionBodyLSquareT,
+  VariableT
+];
+
+lexingSpec.modes.valueMode = [
+  NewlineT,
+  WhiteSpaceT,
+  QuotedStringT
 ];
 
-const lexer = new Lexer(allTokens);
+// a nonbackslashed newline pops out of the value mode
+
+const lexer = new Lexer(lexingSpec, {
+  debug: true
+});
+
+const result = lexer.tokenize(fs.readFileSync('./test.config', 'utf8'));
+
+console.log(result);
+
+
+// i have an idea how to make this work
+// you have the starting state
+// and this has whitespacet and [ which transitions (pushes)
+// to section header mode
+// after ], it pushes into section body mode
+// if in section body mode it encounters [
+// then it pops out (thus into section header mode)
+// if it encounters a variable and then =
+// then it pushes into value mode
+// once value encounters \n, then it pops out into section body mode
+// section header <-> (transit by []) section body  <-> (transit by \n) value
+// should make this a diagram
 
 class ConfigParser extends Parser {
   constructor (input) {
     super(input, allTokens);
+    // main entry rule (non-terminal)
+    this.RULE('config', () => {
+
+
+      // parse section names first
+      // so we have a section and potential subsection
+      // within it, we have various key = value entries
+      // but also we need semantic actions for inclusions, which we need to expand in order
+      // so we can continue parsing?
+
+
+    });
+
+    this.RULE('section', () => {
+
+      // does the existence of this allow the possibility of subsection as well?
+
+    });
+
+
+
+    Parser.performSelfAnalysis(this);
 
+    // a semantic action of encountering an inclusion is recursion
+    // expand the inclusion and rerun the same parser on it
+    // this should be possible by just pointing it at a non-terminal
+    // however i'm not sure how to translate BNF grammars to chevrotain atm
   }
 }
 
diff --git a/test.js b/test.js
@@ -1,12 +1,159 @@
-import VirtualGit from './lib/VirtualGit.js';
-import vfs from 'virtualfs';
+import fs from 'fs';
+import { Token, Lexer, Parser } from 'chevrotain';
 
-const vgit = new VirtualGit(vfs);
+const lexingSpec = {
+  defaultMode: 'topMode',
+  modes: {}
+};
 
-// here we deal with these capabilities
-// but we can also use just a function
-// no bothering with version either
+class WhiteSpaceT extends Token {
+  static PATTERN = /\s+/;
+  static GROUP = Lexer.SKIPPED;
+  static LINE_BREAKS = true;
+}
 
-const repo = vgit.init('/testrepo', {
-  mode: 0o777
+class LSquareT extends Token {
+  static PATTERN = /\[/;
+  // static POP_MODE = true;
+  static PUSH_MODE = 'sectionHeaderMode';
+}
+
+class RSquareT extends Token {
+  static PATTERN = /]/;
+  // static POP_MODE = true;
+  static PUSH_MODE = 'sectionBodyMode';
+}
+
+class SectionBodyLSquareT extends Token {
+  static PATTERN = /\[/;
+  static POP_MODE = true;
+}
+
+class SectionNameT extends Token {
+  static PATTERN = /[0-9a-zA-Z.-]+/;
+}
+
+class SectionIncludeIfT extends Token {
+  static PATTERN = /includeIf/;
+  static LONGER_ALT = SectionNameT;
+}
+
+class SectionIncludeT extends Token {
+  static PATTERN = /include/;
+  static LONGER_ALT = SectionIncludeIfT;
+}
+
+class SubSectionNameT extends Token {
+  static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\])*"/;
+}
+
+class VariableT extends Token {
+  static PATTERN = /[a-zA-Z][0-9a-zA-Z-]+/;
+}
+
+class EqualT extends Token {
+  static PATTERN = /=/;
+  // static POP_MODE = true;
+  static PUSH_MODE = 'valueMode';
+}
+
+class QuotedStringT extends Token {
+  static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\\n])*"/;
+}
+
+class NewlineT extends Token {
+  static PATTERN = /\n/;
+  static POP_MODE = true;
+  // static PUSH_MODE = 'sectionBodyMode';
+}
+
+lexingSpec.modes.topMode = [
+  WhiteSpaceT,
+  LSquareT
+];
+
+// one way to do this is on encountering RSquareT
+// is to push into sectionBodyMode
+lexingSpec.modes.sectionHeaderMode = [
+  WhiteSpaceT,
+  RSquareT,
+  SectionIncludeT,
+  SectionIncludeIfT,
+  SectionNameT,
+  SubSectionNameT
+];
+
+// then on encountering LSquareT
+// pop out of sectionBodyMode (and assume we are in sectionHeaderMode)
+// this requires a different kind of LSquareT from the original
+lexingSpec.modes.sectionBodyMode = [
+  WhiteSpaceT,
+  EqualT,
+  SectionBodyLSquareT,
+  VariableT
+];
+
+lexingSpec.modes.valueMode = [
+  NewlineT,
+  WhiteSpaceT,
+  QuotedStringT
+];
+
+// a nonbackslashed newline pops out of the value mode
+
+const lexer = new Lexer(lexingSpec, {
+  debug: true
 });
+
+const result = lexer.tokenize(fs.readFileSync('./test.config', 'utf8'));
+
+console.log(result);
+
+// i have an idea how to make this work
+// you have the starting state
+// and this has whitespacet and [ which transitions (pushes)
+// to section header mode
+// after ], it pushes into section body mode
+// if in section body mode it encounters [
+// then it pops out (thus into section header mode)
+// if it encounters a variable and then =
+// then it pushes into value mode
+// once value encounters \n, then it pops out into section body mode
+// section header <-> (transit by []) section body  <-> (transit by \n) value
+
+
+// class EqualT extends Token {
+//   static NAME = "EqualT";
+//   static PATTERN = /=/;
+// }
+
+// class CommentT extends Token {
+//   static NAME = "CommentT";
+//   static PATTERN = /[#;].+/;
+//   static GROUP = "singleLineComments";
+// }
+
+// class QuotedStringT extends Token {
+//   static NAME = "QuotedStringT";
+//   static PATTERN = /"(?:[^\\"\n]|\\[bnt"\\\n])*"/;
+// }
+
+// // alphanumeic string
+// class IdentifierT extends Token {
+//   static NAME = "IdentifierT";
+//   static PATTERN = /[0-9a-zA-Z.-]+/;
+// }
+
+// class StringT extends Token {
+//   static NAME = "StringT";
+//   static PATTERN = /(?:[^\\"\n]|\\[bnt"\\])+/;
+// }
+
+// // in the order of most specific to least specific
+// const lexer = new Lexer({
+
+
+// });
+
+// the chevrotain lexer is stateless, only a single one per grammar should be created
+// how do i create contextual lexers, do we use lexing groups in some way?