Skip to content
Prev Previous commit
Next Next commit
create UnsafeArrayData from a primitive array in CatalystTypeConverters
  • Loading branch information
kiszk committed Nov 2, 2016
commit 5d5ccd6785ddcac1ef4be82aef9e8dcdbcbfab65
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,20 @@ object CatalystTypeConverters {

override def toCatalystImpl(scalaValue: Any): ArrayData = {
scalaValue match {
case a: Array[Boolean] =>
UnsafeArrayData.fromPrimitiveArray(a)
case a: Array[Byte] =>
UnsafeArrayData.fromPrimitiveArray(a)
case a: Array[Short] =>
UnsafeArrayData.fromPrimitiveArray(a)
case a: Array[Int] =>
UnsafeArrayData.fromPrimitiveArray(a)
case a: Array[Long] =>
UnsafeArrayData.fromPrimitiveArray(a)
case a: Array[Float] =>
UnsafeArrayData.fromPrimitiveArray(a)
case a: Array[Double] =>
UnsafeArrayData.fromPrimitiveArray(a)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to do this? The CatalystTypeConverter will be removed and replaced by encoder eventually. Does your benchmark cover this branch?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My benchmark does not cover this branch.
This test causes an failure without this branch. Should we drop this test and this branch?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea let's remove them, CatalystTypeConverter will not be in critical path and we don't need to optimize it.

case a: Array[_] =>
new GenericArrayData(a.map(elementConverter.toCatalyst))
case s: Seq[_] =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData
import org.apache.spark.sql.catalyst.util.GenericArrayData
import org.apache.spark.sql.types._

class CatalystTypeConvertersSuite extends SparkFunSuite {
Expand Down Expand Up @@ -61,4 +63,39 @@ class CatalystTypeConvertersSuite extends SparkFunSuite {
test("option handling in createToCatalystConverter") {
assert(CatalystTypeConverters.createToCatalystConverter(IntegerType)(Some(123)) === 123)
}

test("primitive array handing") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: handing => handling

val intArray = Array(1, 100, 10000)
val intUnsafeArray = UnsafeArrayData.fromPrimitiveArray(intArray)
val intArrayType = ArrayType(IntegerType, false)
assert(CatalystTypeConverters.createToScalaConverter(intArrayType)(intUnsafeArray) === intArray)
assert(CatalystTypeConverters.createToCatalystConverter(intArrayType)(intArray)
== intUnsafeArray)

val doubleArray = Array(1.1, 111.1, 11111.1)
val doubleUnsafeArray = UnsafeArrayData.fromPrimitiveArray(doubleArray)
val doubleArrayType = ArrayType(DoubleType, false)
assert(CatalystTypeConverters.createToScalaConverter(doubleArrayType)(doubleUnsafeArray)
=== doubleArray)
assert(CatalystTypeConverters.createToCatalystConverter(doubleArrayType)(doubleArray)
== doubleUnsafeArray)
}

test("An array with null handing") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: handing => handling

Copy link
Member Author

@kiszk kiszk Oct 14, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for a good catch. Addressed both of them.

val intArray = Array(1, null, 100, null, 10000)
val intGenericArray = new GenericArrayData(intArray)
val intArrayType = ArrayType(IntegerType, true)
assert(CatalystTypeConverters.createToScalaConverter(intArrayType)(intGenericArray)
=== intArray)
assert(CatalystTypeConverters.createToCatalystConverter(intArrayType)(intArray)
== intGenericArray)

val doubleArray = Array(1.1, null, 111.1, null, 11111.1)
val doubleGenericArray = new GenericArrayData(doubleArray)
val doubleArrayType = ArrayType(DoubleType, true)
assert(CatalystTypeConverters.createToScalaConverter(doubleArrayType)(doubleGenericArray)
=== doubleArray)
assert(CatalystTypeConverters.createToCatalystConverter(doubleArrayType)(doubleArray)
== doubleGenericArray)
}
}