-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-39469][SQL] Infer date type for CSV schema inference #36871
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
5048253
5058d92
f16e5e1
c2b5fdc
9514c2c
be7aabd
db3b442
7d98686
966bdb6
638064b
50a91a6
d71558d
601dfc8
5aa4ab6
2282c59
2484b77
762e0d8
2c93af5
41fa8eb
e1170d0
1e8f938
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -141,8 +141,18 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { | |
| case _: DecimalType => tryParseDecimal(field) | ||
| case DoubleType => tryParseDouble(field) | ||
| case DateType => tryParseDateTime(field) | ||
|
||
| case TimestampNTZType => tryParseTimestampNTZ(field) | ||
| case TimestampType => tryParseTimestamp(field) | ||
| case TimestampNTZType => | ||
| if (options.inferDate) { | ||
| tryParseDateTime(field) | ||
| } else { | ||
| tryParseTimestampNTZ(field) | ||
| } | ||
| case TimestampType => | ||
| if (options.inferDate) { | ||
| tryParseDateTime(field) | ||
| } else { | ||
| tryParseTimestamp(field) | ||
| } | ||
| case BooleanType => tryParseBoolean(field) | ||
| case StringType => StringType | ||
| case other: DataType => | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,12 +19,12 @@ package org.apache.spark.sql.catalyst.csv | |
|
|
||
| import java.math.BigDecimal | ||
| import java.text.{DecimalFormat, DecimalFormatSymbols} | ||
| import java.time.{ZoneId, ZoneOffset} | ||
| import java.time.{ZoneOffset} | ||
| import java.util.{Locale, TimeZone} | ||
|
|
||
| import org.apache.commons.lang3.time.FastDateFormat | ||
| import org.apache.spark.SparkFunSuite | ||
|
|
||
| import org.apache.spark.SparkFunSuite | ||
| import org.apache.spark.sql.catalyst.InternalRow | ||
| import org.apache.spark.sql.catalyst.plans.SQLHelper | ||
| import org.apache.spark.sql.catalyst.util.DateTimeConstants._ | ||
|
|
@@ -367,11 +367,15 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper { | |
| "timestampNTZFormat" -> "dd-MM-yyyy HH:mm", "dateFormat" -> "dd_MM_yyyy"), | ||
|
||
| false, "UTC") | ||
| val dateString = "08_09_2001" | ||
| val expected = date(2001, 9, 8, 0, 0, 0, 0, ZoneOffset.UTC) | ||
| val expected = dataType match { | ||
| case TimestampType | TimestampNTZType => date(2001, 9, 8, 0, 0, 0, 0, ZoneOffset.UTC) | ||
| case DateType => days(2001, 9, 8) | ||
| } | ||
| val parser = new UnivocityParser(new StructType(), timestampsOptions) | ||
| assert(parser.makeConverter("d", dataType).apply(dateString) == expected) | ||
| } | ||
| checkDate(TimestampType) | ||
| checkDate(TimestampNTZType) | ||
Jonathancui123 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| checkDate(DateType) | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.