Skip to content

Commit 6f5fa4a

Browse files
viiryanemccarthy
authored andcommitted
[SPARK-7199] [SQL] Add date and timestamp support to UnsafeRow
JIRA: https://issues.apache.org/jira/browse/SPARK-7199 Author: Liang-Chi Hsieh <[email protected]> Closes apache#5984 from viirya/add_date_timestamp and squashes the following commits: 7f21ce9 [Liang-Chi Hsieh] For comment. 0b89698 [Liang-Chi Hsieh] Add timestamp to settableFieldTypes. c30d490 [Liang-Chi Hsieh] Use default IntUnsafeColumnWriter and LongUnsafeColumnWriter. 672ef17 [Liang-Chi Hsieh] Remove getter/setter for Date and Timestamp and use Int and Long for them. 9f3e577 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into add_date_timestamp 281e844 [Liang-Chi Hsieh] Fix scala style. fb532b5 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into add_date_timestamp 80af342 [Liang-Chi Hsieh] Fix compiling error. f4f5de6 [Liang-Chi Hsieh] Fix scala style. a463e83 [Liang-Chi Hsieh] Use Long to store timestamp for rows. 635388a [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into add_date_timestamp 46946c6 [Liang-Chi Hsieh] Adapt for moved DateUtils. b16994e [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into add_date_timestamp 752251f [Liang-Chi Hsieh] Support setDate. Fix failed test. fcf8db9 [Liang-Chi Hsieh] Add functions for Date and Timestamp to SpecificRow. e42a809 [Liang-Chi Hsieh] Fix style. 4c07b57 [Liang-Chi Hsieh] Add date and timestamp support to UnsafeRow.
1 parent beb7f62 commit 6f5fa4a

File tree

4 files changed

+39
-4
lines changed

4 files changed

+39
-4
lines changed

sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,9 @@ public static int calculateBitSetWidthInBytes(int numFields) {
103103
IntegerType,
104104
LongType,
105105
FloatType,
106-
DoubleType
106+
DoubleType,
107+
DateType,
108+
TimestampType
107109
})));
108110

109111
// We support get() on a superset of the types for which we support set():
@@ -331,8 +333,6 @@ public String getString(int i) {
331333
return getUTF8String(i).toString();
332334
}
333335

334-
335-
336336
@Override
337337
public InternalRow copy() {
338338
throw new UnsupportedOperationException();

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverter.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.sql.catalyst.expressions
1919

20+
import org.apache.spark.sql.catalyst.util.DateUtils
21+
import org.apache.spark.sql.catalyst.InternalRow
2022
import org.apache.spark.sql.types._
2123
import org.apache.spark.unsafe.PlatformDependent
2224
import org.apache.spark.unsafe.array.ByteArrayMethods
@@ -120,6 +122,8 @@ private object UnsafeColumnWriter {
120122
case FloatType => FloatUnsafeColumnWriter
121123
case DoubleType => DoubleUnsafeColumnWriter
122124
case StringType => StringUnsafeColumnWriter
125+
case DateType => IntUnsafeColumnWriter
126+
case TimestampType => LongUnsafeColumnWriter
123127
case t =>
124128
throw new UnsupportedOperationException(s"Do not know how to write columns of type $t")
125129
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,10 @@ class GenericMutableRow(v: Array[Any]) extends GenericRow(v) with MutableRow {
197197
override def setFloat(ordinal: Int, value: Float): Unit = { values(ordinal) = value }
198198
override def setInt(ordinal: Int, value: Int): Unit = { values(ordinal) = value }
199199
override def setLong(ordinal: Int, value: Long): Unit = { values(ordinal) = value }
200-
override def setString(ordinal: Int, value: String) {
200+
override def setString(ordinal: Int, value: String): Unit = {
201201
values(ordinal) = UTF8String.fromString(value)
202202
}
203+
203204
override def setNullAt(i: Int): Unit = { values(i) = null }
204205

205206
override def setShort(ordinal: Int, value: Short): Unit = { values(ordinal) = value }

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@
1717

1818
package org.apache.spark.sql.catalyst.expressions
1919

20+
import java.sql.{Date, Timestamp}
2021
import java.util.Arrays
2122

2223
import org.scalatest.Matchers
2324

2425
import org.apache.spark.SparkFunSuite
2526
import org.apache.spark.sql.types._
27+
import org.apache.spark.sql.catalyst.util.DateUtils
2628
import org.apache.spark.unsafe.PlatformDependent
2729
import org.apache.spark.unsafe.array.ByteArrayMethods
2830

@@ -74,6 +76,34 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers {
7476
unsafeRow.getString(2) should be ("World")
7577
}
7678

79+
test("basic conversion with primitive, string, date and timestamp types") {
80+
val fieldTypes: Array[DataType] = Array(LongType, StringType, DateType, TimestampType)
81+
val converter = new UnsafeRowConverter(fieldTypes)
82+
83+
val row = new SpecificMutableRow(fieldTypes)
84+
row.setLong(0, 0)
85+
row.setString(1, "Hello")
86+
row.update(2, DateUtils.fromJavaDate(Date.valueOf("1970-01-01")))
87+
row.update(3, DateUtils.fromJavaTimestamp(Timestamp.valueOf("2015-05-08 08:10:25")))
88+
89+
val sizeRequired: Int = converter.getSizeRequirement(row)
90+
sizeRequired should be (8 + (8 * 4) +
91+
ByteArrayMethods.roundNumberOfBytesToNearestWord("Hello".getBytes.length + 8))
92+
val buffer: Array[Long] = new Array[Long](sizeRequired / 8)
93+
val numBytesWritten = converter.writeRow(row, buffer, PlatformDependent.LONG_ARRAY_OFFSET)
94+
numBytesWritten should be (sizeRequired)
95+
96+
val unsafeRow = new UnsafeRow()
97+
unsafeRow.pointTo(buffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length, null)
98+
unsafeRow.getLong(0) should be (0)
99+
unsafeRow.getString(1) should be ("Hello")
100+
// Date is represented as Int in unsafeRow
101+
DateUtils.toJavaDate(unsafeRow.getInt(2)) should be (Date.valueOf("1970-01-01"))
102+
// Timestamp is represented as Long in unsafeRow
103+
DateUtils.toJavaTimestamp(unsafeRow.getLong(3)) should be
104+
(Timestamp.valueOf("2015-05-08 08:10:25"))
105+
}
106+
77107
test("null handling") {
78108
val fieldTypes: Array[DataType] = Array(
79109
NullType,

0 commit comments

Comments
 (0)