-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-31238][SQL] Rebase dates to/from Julian calendar in write/read for ORC datasource #28016
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
a2a081a
121b4c6
ecef05d
47d8588
29d7966
3b1b791
c8a897a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,7 +15,7 @@ | |
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.hive | ||
| package org.apache.spark.sql.execution.datasources | ||
|
|
||
| import java.io.{DataInput, DataOutput, IOException} | ||
| import java.sql.Date | ||
|
|
@@ -35,11 +35,12 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils.{rebaseGregorianToJulian | |
| * @param julianDays The number of days since the epoch 1970-01-01 in | ||
| * Julian calendar. | ||
| */ | ||
| private[hive] class DaysWritable( | ||
| class DaysWritable( | ||
| var gregorianDays: Int, | ||
| var julianDays: Int) | ||
| extends DateWritable { | ||
|
|
||
| def this() = this(0, 0) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume that |
||
| def this(gregorianDays: Int) = | ||
| this(gregorianDays, rebaseGregorianToJulianDays(gregorianDays)) | ||
| def this(dateWritable: DateWritable) = { | ||
|
|
@@ -55,6 +56,11 @@ private[hive] class DaysWritable( | |
| override def getDays: Int = julianDays | ||
| override def get(): Date = new Date(DateWritable.daysToMillis(julianDays)) | ||
|
|
||
| override def set(d: Int): Unit = { | ||
| gregorianDays = d | ||
| julianDays = rebaseGregorianToJulianDays(d) | ||
| } | ||
|
|
||
| @throws[IOException] | ||
| override def write(out: DataOutput): Unit = { | ||
| WritableUtils.writeVInt(out, julianDays) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |
|
|
||
| import org.apache.spark.sql.catalyst.util.DateTimeUtils; | ||
| import org.apache.spark.sql.types.DataType; | ||
| import org.apache.spark.sql.types.DateType; | ||
| import org.apache.spark.sql.types.Decimal; | ||
| import org.apache.spark.sql.types.TimestampType; | ||
| import org.apache.spark.sql.vectorized.ColumnarArray; | ||
|
|
@@ -42,6 +43,7 @@ public class OrcColumnVector extends org.apache.spark.sql.vectorized.ColumnVecto | |
| private DecimalColumnVector decimalData; | ||
| private TimestampColumnVector timestampData; | ||
dongjoon-hyun marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| private final boolean isTimestamp; | ||
| private final boolean isDate; | ||
|
|
||
| private int batchSize; | ||
|
|
||
|
|
@@ -54,6 +56,12 @@ public class OrcColumnVector extends org.apache.spark.sql.vectorized.ColumnVecto | |
| isTimestamp = false; | ||
| } | ||
|
|
||
| if (type instanceof DateType) { | ||
| isDate = true; | ||
| } else { | ||
| isDate = false; | ||
| } | ||
|
|
||
| baseData = vector; | ||
| if (vector instanceof LongColumnVector) { | ||
| longData = (LongColumnVector) vector; | ||
|
|
@@ -130,7 +138,13 @@ public short getShort(int rowId) { | |
|
|
||
| @Override | ||
| public int getInt(int rowId) { | ||
| return (int) longData.vector[getRowIndex(rowId)]; | ||
| int index = getRowIndex(rowId); | ||
| int value = (int) longData.vector[index]; | ||
|
||
| if (isDate) { | ||
| return DateTimeUtils.rebaseJulianToGregorianDays(value); | ||
| } else { | ||
| return value; | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.{Operator => OrcOperator} | |
| import org.apache.hadoop.hive.serde2.io.{DateWritable, HiveDecimalWritable} | ||
|
|
||
| import org.apache.spark.sql.catalyst.expressions.SpecializedGetters | ||
| import org.apache.spark.sql.execution.datasources.DaysWritable | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After moving |
||
| import org.apache.spark.sql.types.Decimal | ||
|
|
||
| /** | ||
|
|
@@ -47,13 +48,13 @@ private[sql] object OrcShimUtils { | |
|
|
||
| def getDateWritable(reuseObj: Boolean): (SpecializedGetters, Int) => DateWritable = { | ||
| if (reuseObj) { | ||
| val result = new DateWritable() | ||
| val result = new DaysWritable() | ||
| (getter, ordinal) => | ||
| result.set(getter.getInt(ordinal)) | ||
| result | ||
| } else { | ||
| (getter: SpecializedGetters, ordinal: Int) => | ||
| new DateWritable(getter.getInt(ordinal)) | ||
| new DaysWritable(getter.getInt(ordinal)) | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,7 @@ import org.apache.spark.sql.AnalysisException | |
| import org.apache.spark.sql.catalyst.InternalRow | ||
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.util._ | ||
| import org.apache.spark.sql.execution.datasources.DaysWritable | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. unnecessary change
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I remove it, the build fails with: I moved |
||
| import org.apache.spark.sql.types | ||
| import org.apache.spark.sql.types._ | ||
| import org.apache.spark.unsafe.types.UTF8String | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.