Skip to content

Commit 5df37ea

Browse files
committed
CommandLineParser improvements
Tokenizing used to snip the line and knit the args, this commit denits the deknitting. Adds a benchmark showing linear behavior and a unit test to show correctness. Departing from the previous notion of correctness, internal quotes are respected. `"abc"xyz` is `abcxyz`.
1 parent 7cb938b commit 5df37ea

File tree

4 files changed

+165
-64
lines changed

4 files changed

+165
-64
lines changed

build.sbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ lazy val partest = configureAsSubproject(project)
568568
)
569569

570570
lazy val bench = project.in(file("test") / "benchmarks")
571-
.dependsOn(library)
571+
.dependsOn(library, compiler)
572572
.settings(instanceSettings)
573573
.settings(disableDocs)
574574
.settings(disablePublishing)
Lines changed: 84 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,82 +1,103 @@
11
/* NEST (New Scala Test)
2-
* Copyright 2007-2013 LAMP/EPFL
2+
* Copyright 2007-2018 LAMP/EPFL
33
* @author Paul Phillips
44
*/
5-
6-
package scala.tools
7-
package cmd
5+
package scala.tools.cmd
86

97
import scala.annotation.tailrec
108

11-
/** A simple (overly so) command line parser.
12-
* !!! This needs a thorough test suite to make sure quoting is
13-
* done correctly and portably.
9+
/** A simple enough command line parser.
1410
*/
1511
object CommandLineParser {
16-
// splits a string into a quoted prefix and the rest of the string,
17-
// taking escaping into account (using \)
18-
// `"abc"def` will match as `DoubleQuoted(abc, def)`
19-
private class QuotedExtractor(quote: Char) {
20-
def unapply(in: String): Option[(String, String)] = {
21-
val del = quote.toString
22-
if (in startsWith del) {
23-
var escaped = false
24-
val (quoted, next) = (in substring 1) span {
25-
case `quote` if !escaped => false
26-
case '\\' if !escaped => escaped = true; true
27-
case _ => escaped = false; true
28-
}
29-
// the only way to get out of the above loop is with an empty next or !escaped
30-
// require(next.isEmpty || !escaped)
31-
if (next startsWith del) Some((quoted, next substring 1))
32-
else None
33-
} else None
12+
private final val DQ = '"'
13+
private final val SQ = '\''
14+
15+
/** Split the line into tokens separated by whitespace or quotes.
16+
*
17+
* @return either an error message or reverse list of tokens
18+
*/
19+
private def tokens(in: String) = {
20+
import Character.isWhitespace
21+
import java.lang.{StringBuilder => Builder}
22+
import collection.mutable.ArrayBuffer
23+
24+
var accum: List[String] = Nil
25+
var pos = 0
26+
var start = 0
27+
val qpos = new ArrayBuffer[Int](16) // positions of paired quotes
28+
29+
def cur: Int = if (done) -1 else in.charAt(pos)
30+
def bump() = pos += 1
31+
def done = pos >= in.length
32+
33+
def skipToQuote(q: Int) = {
34+
var escaped = false
35+
def terminal = in.charAt(pos) match {
36+
case _ if escaped => escaped = false ; false
37+
case '\\' => escaped = true ; false
38+
case `q` => true
39+
case _ => false
40+
}
41+
while (!done && !terminal) pos += 1
42+
!done
3443
}
35-
}
36-
private object DoubleQuoted extends QuotedExtractor('"')
37-
private object SingleQuoted extends QuotedExtractor('\'')
38-
object Word {
39-
private val regex = """(\S+)""".r
40-
def unapply(s: String): Option[(String, String)] = {
41-
regex.findPrefixOf(s) match {
42-
case Some(prefix) => Some(prefix, s.substring(prefix.length))
43-
case None => None
44+
def skipToDelim(): Boolean =
45+
cur match {
46+
case q @ (DQ | SQ) => { qpos.append(pos); bump(); skipToQuote(q) } && { qpos.append(pos); bump(); skipToDelim() }
47+
case -1 => true
48+
case c if isWhitespace(c) => true
49+
case _ => bump(); skipToDelim()
50+
}
51+
def skipWhitespace() = while (isWhitespace(cur)) pos += 1
52+
def copyText() = {
53+
val buf = new Builder
54+
var p = start
55+
var i = 0
56+
while (p < pos) {
57+
if (i >= qpos.size) {
58+
buf.append(in, p, pos)
59+
p = pos
60+
} else if (p == qpos(i)) {
61+
buf.append(in, qpos(i)+1, qpos(i+1))
62+
p = qpos(i+1)+1
63+
i += 2
64+
} else {
65+
buf.append(in, p, qpos(i))
66+
p = qpos(i)
67+
}
4468
}
69+
buf.toString
4570
}
46-
}
47-
48-
// parse `in` for an argument, return it and the remainder of the input (or an error message)
49-
// (argument may be in single/double quotes, taking escaping into account, quotes are stripped)
50-
private def argument(in: String): Either[String, (String, String)] = in match {
51-
case DoubleQuoted(arg, rest) => Right((arg, rest))
52-
case SingleQuoted(arg, rest) => Right((arg, rest))
53-
case Word(arg, rest) => Right((arg, rest))
54-
case _ => Left(s"Illegal argument: $in")
55-
}
71+
def text() = {
72+
val res =
73+
if (qpos.isEmpty) in.substring(start, pos)
74+
else if (qpos(0) == start && qpos(1) == pos) in.substring(start+1, pos-1)
75+
else copyText()
76+
qpos.clear()
77+
res
78+
}
79+
def badquote = Left("Unmatched quote")
5680

57-
// parse a list of whitespace-separated arguments (ignoring whitespace in quoted arguments)
58-
@tailrec private def commandLine(in: String, accum: List[String] = Nil): Either[String, (List[String], String)] = {
59-
val trimmed = in.trim
60-
if (trimmed.isEmpty) Right((accum.reverse, ""))
61-
else argument(trimmed) match {
62-
case Right((arg, next)) =>
63-
val leadingWhitespaceLen = next.prefixLength(Character.isWhitespace)
64-
val rest = next.substring(leadingWhitespaceLen)
65-
if (leadingWhitespaceLen == 0 && rest.nonEmpty)
66-
Left("Arguments should be separated by whitespace.") // TODO: can this happen?
67-
else
68-
commandLine(rest, arg :: accum)
69-
case Left(msg) => Left(msg)
81+
@tailrec def loop(): Either[String, List[String]] = {
82+
skipWhitespace()
83+
start = pos
84+
if (done) Right(accum)
85+
else if (!skipToDelim()) badquote
86+
else {
87+
accum = text() :: accum
88+
loop()
89+
}
7090
}
91+
loop()
7192
}
7293

7394
class ParseException(msg: String) extends RuntimeException(msg)
7495

75-
def tokenize(line: String): List[String] = tokenize(line, x => throw new ParseException(x))
76-
def tokenize(line: String, errorFn: String => Unit): List[String] = {
77-
commandLine(line) match {
78-
case Right((args, _)) => args
79-
case Left(msg) => errorFn(msg) ; Nil
96+
def tokenize(line: String, errorFn: String => Unit): List[String] =
97+
tokens(line) match {
98+
case Right(args) => args.reverse
99+
case Left(msg) => errorFn(msg) ; Nil
80100
}
81-
}
101+
102+
def tokenize(line: String): List[String] = tokenize(line, x => throw new ParseException(x))
82103
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
2+
package scala.tools.cmd
3+
4+
import java.util.concurrent.TimeUnit
5+
6+
import org.openjdk.jmh.annotations._
7+
import org.openjdk.jmh.infra.Blackhole
8+
9+
@BenchmarkMode(Array(Mode.AverageTime))
10+
@Fork(2)
11+
@Threads(1)
12+
@Warmup(iterations = 10)
13+
@Measurement(iterations = 10)
14+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
15+
@State(Scope.Benchmark)
16+
class CommandLineParserBenchmark {
17+
18+
import CommandLineParser.tokenize
19+
20+
@Param(Array("1000", "10000", "100000"))
21+
var argCount: Int = _
22+
var line: String = _
23+
var quotyline: String = _
24+
var embedded: String = _
25+
26+
@Setup(Level.Trial) def init(): Unit = {
27+
line = List.tabulate(argCount)(n => s"arg$n").mkString(" ")
28+
val q = "\""
29+
quotyline = List.tabulate(argCount)(n => s"${q}arg${n}${q}").mkString(" ")
30+
embedded = List.tabulate(argCount)(n => s"${n}${q}arg${q}${n}").mkString(" ")
31+
}
32+
@Benchmark def parsingBenchmark = tokenize(line)
33+
@Benchmark def quoteUnquoteParsingBenchmark = tokenize(quotyline)
34+
@Benchmark def embeddedQuoteParsingBenchmark = tokenize(embedded)
35+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package scala.tools.cmd
2+
3+
import org.junit.Assert._
4+
import org.junit.Test
5+
import org.junit.runner.RunWith
6+
import org.junit.runners.JUnit4
7+
import scala.tools.testing.AssertUtil.assertThrows
8+
9+
@RunWith(classOf[JUnit4])
10+
class CommandLineParserTest {
11+
import CommandLineParser.{tokenize, ParseException}
12+
13+
@Test
14+
def parserTokenizes(): Unit = {
15+
assertEquals(Nil, tokenize(""))
16+
assertEquals(List("x"), tokenize("x"))
17+
assertEquals(List("x"), tokenize(" x "))
18+
assertEquals(List("x","y"), tokenize("x y"))
19+
assertEquals(List("x","y","z"), tokenize("x y z"))
20+
}
21+
@Test
22+
def parserTrims(): Unit = {
23+
assertEquals(Nil, tokenize(" "))
24+
assertEquals(List("x"), tokenize(" x "))
25+
assertEquals(List("x"), tokenize("\nx\n"))
26+
assertEquals(List("x","y","z"), tokenize(" x y z "))
27+
}
28+
@Test
29+
def parserQuotes(): Unit = {
30+
assertEquals(List("x"), tokenize("'x'"))
31+
assertEquals(List("x"), tokenize(""""x""""))
32+
assertEquals(List("x","y","z"), tokenize("x 'y' z"))
33+
assertEquals(List("x"," y ","z"), tokenize("x ' y ' z"))
34+
assertEquals(List("x","y","z"), tokenize("""x "y" z"""))
35+
assertEquals(List("x"," y ","z"), tokenize("""x " y " z"""))
36+
// interior quotes
37+
assertEquals(List("x y z"), tokenize("x' y 'z")) // was assertEquals(List("x'","y","'z"), tokenize("x' y 'z"))
38+
assertEquals(List("x\ny\nz"), tokenize("x'\ny\n'z"))
39+
assertEquals(List("x'y'z"), tokenize("""x"'y'"z"""))
40+
assertEquals(List("abcxyz"), tokenize(""""abc"xyz"""))
41+
// missing quotes
42+
assertThrows[ParseException](tokenize(""""x""")) // was assertEquals(List("\"x"), tokenize(""""x"""))
43+
assertThrows[ParseException](tokenize("""x'"""))
44+
}
45+
}

0 commit comments

Comments
 (0)