Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update
  • Loading branch information
cloud-fan committed Oct 23, 2019
commit 33ceedcedeafbe11070630ef3f47db37b5599ed0
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ public static CalendarInterval fromDayTimeString(String s, String from, String t
return result;
}

public static CalendarInterval fromUnitString(String[] units, String[] values)
public static CalendarInterval fromUnitStrings(String[] units, String[] values)
throws IllegalArgumentException {
assert units.length == values.length;
int months = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
}.toArray
val values = ctx.intervalValue().asScala.map(getIntervalValue).toArray
try {
CalendarInterval.fromUnitString(units, values)
CalendarInterval.fromUnitStrings(units, values)
} catch {
case i: IllegalArgumentException =>
val e = new ParseException(i.getMessage, ctx)
Expand Down Expand Up @@ -1953,7 +1953,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
val unitText = unit.getText.toLowerCase(Locale.ROOT)
val interval = (unitText, Option(to).map(_.getText.toLowerCase(Locale.ROOT))) match {
case (u, None) =>
CalendarInterval.fromUnitString(Array(normalizeInternalUnit(u)), Array(s))
CalendarInterval.fromUnitStrings(Array(normalizeInternalUnit(u)), Array(s))
case ("year", Some("month")) =>
CalendarInterval.fromYearMonthString(s)
case ("day", Some("hour")) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ object IntervalUtils {
Decimal(result, 18, 6)
}

/**
* Converts a string to [[CalendarInterval]] case-insensitively.
*
* @throws IllegalArgumentException if the input string is not in valid interval format.
*/
def fromString(str: String): CalendarInterval = {
if (str == null) throw new IllegalArgumentException("Interval string cannot be null")
try {
Expand All @@ -104,6 +109,9 @@ object IntervalUtils {
}
}

/**
* A safe version of `fromString`. It returns null for invalid input string.
*/
def safeFromString(str: String): CalendarInterval = {
try {
fromString(str)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,7 @@ class ExpressionParserSuite extends AnalysisTest {
"microsecond")

def intervalLiteral(u: String, s: String): Literal = {
Literal(CalendarInterval.fromUnitString(Array(u), Array(s)))
Literal(CalendarInterval.fromUnitStrings(Array(u), Array(s)))
}

test("intervals") {
Expand Down
38 changes: 20 additions & 18 deletions sql/core/benchmarks/IntervalBenchmark-results.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,24 @@ Java HotSpot(TM) 64-Bit Server VM 1.8.0_161-b12 on Mac OS X 10.14
Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
1 units w/ interval 5026 5090 73 0.2 5026.2 1.0X
1 units w/o interval 4724 4744 23 0.2 4723.9 1.1X
2 units w/ interval 6002 6066 55 0.2 6002.0 0.8X
2 units w/o interval 5657 5689 47 0.2 5657.3 0.9X
3 units w/ interval 6803 6810 7 0.1 6802.7 0.7X
3 units w/o interval 6606 6611 4 0.2 6606.4 0.8X
4 units w/ interval 7739 7760 21 0.1 7738.8 0.6X
4 units w/o interval 7539 7546 7 0.1 7538.7 0.7X
5 units w/ interval 8698 8700 3 0.1 8697.9 0.6X
5 units w/o interval 8513 8520 12 0.1 8512.9 0.6X
6 units w/ interval 9696 9778 119 0.1 9696.1 0.5X
6 units w/o interval 9625 9693 115 0.1 9625.0 0.5X
7 units w/ interval 10762 10769 8 0.1 10762.0 0.5X
7 units w/o interval 10796 10886 95 0.1 10796.4 0.5X
8 units w/ interval 11791 11802 16 0.1 11790.5 0.4X
8 units w/o interval 11589 11630 42 0.1 11589.3 0.4X
9 units w/ interval 12953 13007 63 0.1 12952.8 0.4X
9 units w/o interval 12739 12760 32 0.1 12738.8 0.4X
prepare string w/ interval 403 419 18 2.5 403.1 1.0X
prepare string w/o interval 341 353 21 2.9 341.1 1.2X
1 units w/ interval 5154 5159 8 0.2 5153.5 0.1X
1 units w/o interval 4818 4833 20 0.2 4817.6 0.1X
2 units w/ interval 6191 6223 41 0.2 6190.6 0.1X
2 units w/o interval 6236 6264 25 0.2 6235.7 0.1X
3 units w/ interval 7397 7567 170 0.1 7397.0 0.1X
3 units w/o interval 7280 7367 76 0.1 7279.6 0.1X
4 units w/ interval 8197 8228 27 0.1 8197.3 0.0X
4 units w/o interval 7977 7989 17 0.1 7977.3 0.1X
5 units w/ interval 9089 9192 101 0.1 9088.8 0.0X
5 units w/o interval 8853 8858 5 0.1 8852.8 0.0X
6 units w/ interval 9696 9720 23 0.1 9695.6 0.0X
6 units w/o interval 9509 9518 9 0.1 9509.4 0.0X
7 units w/ interval 10738 10791 55 0.1 10737.8 0.0X
7 units w/o interval 10556 10719 146 0.1 10555.6 0.0X
8 units w/ interval 11787 11992 339 0.1 11786.7 0.0X
8 units w/o interval 11666 11720 56 0.1 11665.8 0.0X
9 units w/ interval 12878 12908 42 0.1 12877.7 0.0X
9 units w/o interval 12696 12738 36 0.1 12696.1 0.0X
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, is this the 6x regression, 2212 -> 12738?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe Parser approach is a right direction for code maintainability. Just cc @gatorsmile since this seems inevitable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My comment is hidden because the code changes: #26190 (comment)

Let me copy it again.

It's about 6 times slower, but perf doesn't matter too much here. It's better to keep a single parser, and make the behavior consistent whenever we parse an interval string. For example

  1. select interval +1 day works but select interval '+1 day' does not.
  2. select interval 1 day 1 year works (fields can be any order) but select interval '1 day 1 year' does not.

In general, the handwritten parser is more efficient but is less powerful. I think it's fragile to maintain the handwritten parser and make sure it's consistent with the antlr one.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it.