update

apache · cloud-fan · Apr 23, 2016 · Apr 24, 2016 · Apr 28, 2016 · Apr 28, 2016
commit f4d2cbbefabdd7e42317835cc168ea92c26e040c
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -27,8 +27,7 @@ import com.github.fommil.netlib.BLAS.{getInstance => blas}
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{linalg => newlinalg}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
-import org.apache.spark.sql.catalyst.util.{DoubleArrayData, IntArrayData}
+import org.apache.spark.sql.catalyst.expressions.{GenericMutableRow, UnsafeArrayData}
 import org.apache.spark.sql.types._
 
 /**
@@ -194,9 +193,9 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
         row.setByte(0, 0)
         row.setInt(1, sm.numRows)
         row.setInt(2, sm.numCols)
-        row.update(3, new IntArrayData(sm.colPtrs))
-        row.update(4, new IntArrayData(sm.rowIndices))
-        row.update(5, new DoubleArrayData(sm.values))
+        row.update(3, UnsafeArrayData.fromPrimitiveArray(sm.colPtrs))
+        row.update(4, UnsafeArrayData.fromPrimitiveArray(sm.rowIndices))
+        row.update(5, UnsafeArrayData.fromPrimitiveArray(sm.values))
         row.setBoolean(6, sm.isTransposed)
 
       case dm: DenseMatrix =>
@@ -205,7 +204,7 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
         row.setInt(2, dm.numCols)
         row.setNullAt(3)
         row.setNullAt(4)
-        row.update(5, new DoubleArrayData(dm.values))
+        row.update(5, UnsafeArrayData.fromPrimitiveArray(dm.values))
         row.setBoolean(6, dm.isTransposed)
     }
     row
@@ -219,12 +218,21 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
         val tpe = row.getByte(0)
         val numRows = row.getInt(1)
         val numCols = row.getInt(2)
-        val values = row.getArray(5).toDoubleArray()
+        val values = row.getArray(5) match {
+          case u: UnsafeArrayData => u.toPrimitiveDoubleArray
+          case a => a.toDoubleArray()
+        }
         val isTransposed = row.getBoolean(6)
         tpe match {
           case 0 =>
-            val colPtrs = row.getArray(3).toIntArray()
-            val rowIndices = row.getArray(4).toIntArray()
+            val colPtrs = row.getArray(3) match {
+              case u: UnsafeArrayData => u.toPrimitiveIntArray
+              case a => a.toIntArray()
+            }
+            val rowIndices = row.getArray(4) match {
+              case u: UnsafeArrayData => u.toPrimitiveIntArray
+              case a => a.toIntArray()
+            }
             new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values, isTransposed)
           case 1 =>
             new DenseMatrix(numRows, numCols, values, isTransposed)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -33,8 +33,7 @@ import org.apache.spark.annotation.{AlphaComponent, Since}
 import org.apache.spark.ml.{linalg => newlinalg}
 import org.apache.spark.mllib.util.NumericParser
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
-import org.apache.spark.sql.catalyst.util.{DoubleArrayData, IntArrayData}
+import org.apache.spark.sql.catalyst.expressions.{GenericMutableRow, UnsafeArrayData}
 import org.apache.spark.sql.types._
 
 /**
@@ -216,15 +215,15 @@ class VectorUDT extends UserDefinedType[Vector] {
         val row = new GenericMutableRow(4)
         row.setByte(0, 0)
         row.setInt(1, size)
-        row.update(2, new IntArrayData(indices))
-        row.update(3, new DoubleArrayData(values))
+        row.update(2, UnsafeArrayData.fromPrimitiveArray(indices))
+        row.update(3, UnsafeArrayData.fromPrimitiveArray(values))
         row
       case DenseVector(values) =>
         val row = new GenericMutableRow(4)
         row.setByte(0, 1)
         row.setNullAt(1)
         row.setNullAt(2)
-        row.update(3, new DoubleArrayData(values))
+        row.update(3, UnsafeArrayData.fromPrimitiveArray(values))
         row
     }
   }
@@ -238,11 +237,20 @@ class VectorUDT extends UserDefinedType[Vector] {
         tpe match {
           case 0 =>
             val size = row.getInt(1)
-            val indices = row.getArray(2).toIntArray()
-            val values = row.getArray(3).toDoubleArray()
+            val indices = row.getArray(2) match {
+              case u: UnsafeArrayData => u.toPrimitiveIntArray
+              case a => a.toIntArray()
+            }
+            val values = row.getArray(3) match {
+              case u: UnsafeArrayData => u.toPrimitiveDoubleArray
+              case a => a.toDoubleArray()
+            }
             new SparseVector(size, indices, values)
           case 1 =>
-            val values = row.getArray(3).toDoubleArray()
+            val values = row.getArray(3) match {
+              case u: UnsafeArrayData => u.toPrimitiveDoubleArray
+              case a => a.toDoubleArray()
+            }
             new DenseVector(values)
         }
     }

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -81,7 +81,7 @@ private void assertIndexIsValid(int ordinal) {
   }
 
   public Object[] array() {
-    throw new UnsupportedOperationException("Only supported on GenericArrayData.");
+    throw new UnsupportedOperationException("Not supported on UnsafeArrayData.");
   }
 
   /**
@@ -336,4 +336,62 @@ public UnsafeArrayData copy() {
     arrayCopy.pointTo(arrayDataCopy, Platform.BYTE_ARRAY_OFFSET, sizeInBytes);
     return arrayCopy;
   }
+
+  public int[] toPrimitiveIntArray() {
+    int[] result = new int[numElements];
+    Platform.copyMemory(baseObject, baseOffset + 4 + 4 * numElements,
+      result, Platform.INT_ARRAY_OFFSET, 4 * numElements);
+    return result;
+  }
+
+  public double[] toPrimitiveDoubleArray() {
+    double[] result = new double[numElements];
+    Platform.copyMemory(baseObject, baseOffset + 4 + 4 * numElements,
+      result, Platform.DOUBLE_ARRAY_OFFSET, 8 * numElements);
+    return result;
+  }
+
+  public static UnsafeArrayData fromPrimitiveArray(int[] arr) {
+    int offsetRegionSize = 4 * arr.length;
+    int valueRegionSize = 4 * arr.length;
+    int totalSize = 4 + offsetRegionSize + valueRegionSize;
+    byte[] data = new byte[totalSize];
+
+    Platform.putInt(data, Platform.BYTE_ARRAY_OFFSET, arr.length);
+
+    int elementOffsetStart = 4 + offsetRegionSize;
+    for (int i = 0; i < arr.length; i++) {
+      Platform.putInt(data, Platform.BYTE_ARRAY_OFFSET + 4 + i * 4, elementOffsetStart + i * 4);
+    }
+
+    Platform.copyMemory(arr, Platform.INT_ARRAY_OFFSET, data,
+      Platform.BYTE_ARRAY_OFFSET + elementOffsetStart, valueRegionSize);
+
+    UnsafeArrayData result = new UnsafeArrayData();
+    result.pointTo(data, Platform.BYTE_ARRAY_OFFSET, totalSize);
+    return result;
+  }
+
+  public static UnsafeArrayData fromPrimitiveArray(double[] arr) {
+    int offsetRegionSize = 4 * arr.length;
+    int valueRegionSize = 8 * arr.length;
+    int totalSize = 4 + offsetRegionSize + valueRegionSize;
+    byte[] data = new byte[totalSize];
+
+    Platform.putInt(data, Platform.BYTE_ARRAY_OFFSET, arr.length);
+
+    int elementOffsetStart = 4 + offsetRegionSize;
+    for (int i = 0; i < arr.length; i++) {
+      Platform.putInt(data, Platform.BYTE_ARRAY_OFFSET + 4 + i * 4, elementOffsetStart + i * 8);
+    }
+
+    Platform.copyMemory(arr, Platform.DOUBLE_ARRAY_OFFSET, data,
+      Platform.BYTE_ARRAY_OFFSET + elementOffsetStart, valueRegionSize);
+
+    UnsafeArrayData result = new UnsafeArrayData();
+    result.pointTo(data, Platform.BYTE_ARRAY_OFFSET, totalSize);
+    return result;
+  }
+
+  // TODO: add more specialized methods.
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
@@ -137,61 +137,3 @@ class GenericArrayData(val array: Array[Any]) extends ArrayData {
     result
   }
 }
-
-abstract class SpecializedArrayData extends ArrayData {
-  // Primitive arrays can't haven null elements.
-  override def isNullAt(ordinal: Int): Boolean = false
-
-  private def fail() = {
-    throw new UnsupportedOperationException(
-      "Specialized array data should implement its corresponding get method")
-  }
-
-  override def get(ordinal: Int, elementType: DataType): AnyRef = fail()
-  override def getBoolean(ordinal: Int): Boolean = fail()
-  override def getByte(ordinal: Int): Byte = fail()
-  override def getShort(ordinal: Int): Short = fail()
-  override def getInt(ordinal: Int): Int = fail()
-  override def getLong(ordinal: Int): Long = fail()
-  override def getFloat(ordinal: Int): Float = fail()
-  override def getDouble(ordinal: Int): Double = fail()
-  override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = fail()
-  override def getUTF8String(ordinal: Int): UTF8String = fail()
-  override def getBinary(ordinal: Int): Array[Byte] = fail()
-  override def getInterval(ordinal: Int): CalendarInterval = fail()
-  override def getStruct(ordinal: Int, numFields: Int): InternalRow = fail()
-  override def getArray(ordinal: Int): ArrayData = fail()
-  override def getMap(ordinal: Int): MapData = fail()
-}
-
-class IntArrayData(val values: Array[Int]) extends SpecializedArrayData {
-
-  override def array(): Array[Any] = values.map(_.asInstanceOf[Any])
-
-  override def numElements(): Int = values.length
-
-  override def get(ordinal: Int, elementType: DataType): AnyRef =
-    values(ordinal).asInstanceOf[AnyRef]
-
-  override def getInt(ordinal: Int): Int = values(ordinal)
-
-  override def toIntArray(): Array[Int] = values
-
-  override def copy(): IntArrayData = new IntArrayData(values.clone())
-}
-
-class DoubleArrayData(val values: Array[Double]) extends SpecializedArrayData {
-
-  override def array(): Array[Any] = values.map(_.asInstanceOf[Any])
-
-  override def numElements(): Int = values.length
-
-  override def get(ordinal: Int, elementType: DataType): AnyRef =
-    values(ordinal).asInstanceOf[AnyRef]
-
-  override def getDouble(ordinal: Int): Double = values(ordinal)
-
-  override def toDoubleArray(): Array[Double] = values
-
-  override def copy(): DoubleArrayData = new DoubleArrayData(values.clone())
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData
+
+class UnsafeArraySuite extends SparkFunSuite {
+
+  test("from primitive int array") {
+    val array = Array(1, 10, 100)
+    val unsafe = UnsafeArrayData.fromPrimitiveArray(array)
+    assert(unsafe.numElements == 3)
+    assert(unsafe.getSizeInBytes == 4 + 4 * 3 + 4 * 3)
+    assert(unsafe.getInt(0) == 1)
+    assert(unsafe.getInt(1) == 10)
+    assert(unsafe.getInt(2) == 100)
+  }
+
+  test("from primitive double array") {
+    val array = Array(1.1, 2.2, 3.3)
+    val unsafe = UnsafeArrayData.fromPrimitiveArray(array)
+    assert(unsafe.numElements == 3)
+    assert(unsafe.getSizeInBytes == 4 + 4 * 3 + 8 * 3)
+    assert(unsafe.getDouble(0) == 1.1)
+    assert(unsafe.getDouble(1) == 2.2)
+    assert(unsafe.getDouble(2) == 3.3)
+  }
+
+  test("to primitive int array") {
+    val array = Array(1, 10, 100)
+    val unsafe = UnsafeArrayData.fromPrimitiveArray(array)
+    val array2 = unsafe.toPrimitiveIntArray
+    assert(array.toSeq == array2.toSeq)
+  }
+
+  test("to primitive double array") {
+    val array = Array(1.1, 2.2, 3.3)
+    val unsafe = UnsafeArrayData.fromPrimitiveArray(array)
+    val array2 = unsafe.toPrimitiveDoubleArray
+    assert(array.toSeq == array2.toSeq)
+  }
+}