-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-39469][SQL] Infer date type for CSV schema inference #36871
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
5048253
5058d92
f16e5e1
c2b5fdc
9514c2c
be7aabd
db3b442
7d98686
966bdb6
638064b
50a91a6
d71558d
601dfc8
5aa4ab6
2282c59
2484b77
762e0d8
2c93af5
41fa8eb
e1170d0
1e8f938
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -125,7 +125,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { | |
| case _: DecimalType => tryParseDecimal(field) | ||
| case DoubleType => tryParseDouble(field) | ||
| // Temporary NOTE: DateTimeType is private to [sql] package | ||
| case DateType | TimestampNTZType | TimestampType => tryParseDate(field) | ||
| case DateType | TimestampNTZType | TimestampType => tryParseDateTime(field) | ||
| case BooleanType => tryParseBoolean(field) | ||
| case StringType => StringType | ||
| case other: DataType => | ||
|
|
@@ -177,11 +177,11 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { | |
| if ((allCatch opt field.toDouble).isDefined || isInfOrNan(field)) { | ||
| DoubleType | ||
| } else { | ||
| tryParseDate(field) | ||
| tryParseDateTime(field) | ||
| } | ||
| } | ||
|
|
||
| private def tryParseDate(field: String): DataType = { | ||
| private def tryParseDateTime(field: String): DataType = { | ||
| if ((allCatch opt dateFormatter.parse(field)).isDefined) { | ||
|
||
| DateType | ||
| } else { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -358,4 +358,19 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper { | |
| Map("timestampFormat" -> "invalid", "dateFormat" -> "invalid"), false, "UTC") | ||
| check(new UnivocityParser(StructType(Seq.empty), optionsWithPattern)) | ||
| } | ||
|
|
||
| test("dates should be parsed correctly in a timestamp column") { | ||
Jonathancui123 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| def checkDate(dataType: DataType): Unit = { | ||
| val timestampsOptions = | ||
| new CSVOptions(Map("timestampFormat" -> "dd/MM/yyyy HH:mm", | ||
| "timestampNTZFormat" -> "dd-MM-yyyy HH:mm", "dateFormat" -> "dd_MM_yyyy"), | ||
|
||
| false, "UTC") | ||
| val dateString = "08_09_2001" | ||
| val date = days(2001, 9, 8) | ||
| val parser = new UnivocityParser(new StructType(), timestampsOptions) | ||
| assert(parser.makeConverter("d", dataType).apply(dateString) == date) | ||
| } | ||
| checkDate(TimestampType) | ||
| checkDate(TimestampNTZType) | ||
Jonathancui123 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.