Fixes a bug when writing small decimals coming from rows that are not…

… UnsafeRow
apache · liancheng · Oct 5, 2015 · Oct 6, 2015 · Oct 6, 2015 · Oct 6, 2015
commit f03ef93e06c1241c69b49dd89d0b155b7ef87019
diff --git a/.../main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystWriteSupport.scala b/.../main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystWriteSupport.scala
@@ -197,12 +197,16 @@ private[parquet] class CatalystWriteSupport extends WriteSupport[InternalRow] wi
     val numBytes = minBytesForPrecision(precision)
 
     val int32Writer =
-      (row: SpecializedGetters, ordinal: Int) =>
-        recordConsumer.addInteger(row.getLong(ordinal).toInt)
+      (row: SpecializedGetters, ordinal: Int) => {
+        val unscaledLong = row.getDecimal(ordinal, precision, scale).toUnscaledLong
+        recordConsumer.addInteger(unscaledLong.toInt)
+      }
 
     val int64Writer =
-      (row: SpecializedGetters, ordinal: Int) =>
-        recordConsumer.addLong(row.getLong(ordinal))
+      (row: SpecializedGetters, ordinal: Int) => {
+        val unscaledLong = row.getDecimal(ordinal, precision, scale).toUnscaledLong
+        recordConsumer.addLong(unscaledLong)
+      }
 
     val binaryWriterUsingUnscaledLong =
       (row: SpecializedGetters, ordinal: Int) => {

diff --git a/...re/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/...re/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -100,13 +100,14 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
   }
 
   testStandardAndLegacyModes("fixed-length decimals") {
-    def makeDecimalRDD(decimal: DecimalType): DataFrame =
-      sparkContext
-        .parallelize(0 to 1000)
-        .map(i => Tuple1((i - 500) / 100.0))
-        .toDF()
-        // Parquet doesn't allow column names with spaces, have to add an alias here
-        .select($"_1" cast decimal as "dec")
+    def makeDecimalRDD(decimal: DecimalType): DataFrame = {
+      sqlContext
+        .range(1000)
+        // Parquet doesn't allow column names with spaces, have to add an alias here.
+        // Minus 500 here so that negative decimals are also tested.
+        .select((('id - 500) / 100.0) cast decimal as 'dec)
+        .coalesce(1)
+    }
 
     val combinations = Seq((5, 2), (1, 0), (1, 1), (18, 10), (18, 17), (19, 0), (38, 37))
     for ((precision, scale) <- combinations) {