Skip to content
Closed
Prev Previous commit
Next Next commit
Fixes a bug when writing small decimals coming from rows that are not…
… UnsafeRow
  • Loading branch information
liancheng committed Oct 7, 2015
commit f03ef93e06c1241c69b49dd89d0b155b7ef87019
Original file line number Diff line number Diff line change
Expand Up @@ -197,12 +197,16 @@ private[parquet] class CatalystWriteSupport extends WriteSupport[InternalRow] wi
val numBytes = minBytesForPrecision(precision)

val int32Writer =
(row: SpecializedGetters, ordinal: Int) =>
recordConsumer.addInteger(row.getLong(ordinal).toInt)
(row: SpecializedGetters, ordinal: Int) => {
val unscaledLong = row.getDecimal(ordinal, precision, scale).toUnscaledLong
recordConsumer.addInteger(unscaledLong.toInt)
}

val int64Writer =
(row: SpecializedGetters, ordinal: Int) =>
recordConsumer.addLong(row.getLong(ordinal))
(row: SpecializedGetters, ordinal: Int) => {
val unscaledLong = row.getDecimal(ordinal, precision, scale).toUnscaledLong
recordConsumer.addLong(unscaledLong)
}

val binaryWriterUsingUnscaledLong =
(row: SpecializedGetters, ordinal: Int) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,14 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
}

testStandardAndLegacyModes("fixed-length decimals") {
def makeDecimalRDD(decimal: DecimalType): DataFrame =
sparkContext
.parallelize(0 to 1000)
.map(i => Tuple1((i - 500) / 100.0))
.toDF()
// Parquet doesn't allow column names with spaces, have to add an alias here
.select($"_1" cast decimal as "dec")
def makeDecimalRDD(decimal: DecimalType): DataFrame = {
sqlContext
.range(1000)
// Parquet doesn't allow column names with spaces, have to add an alias here.
// Minus 500 here so that negative decimals are also tested.
.select((('id - 500) / 100.0) cast decimal as 'dec)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default decimal type will be (10, 0), should we use a larger scale (or the numbers will be rounded)?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nwm, we already specify the precision and scale.

.coalesce(1)
}

val combinations = Seq((5, 2), (1, 0), (1, 1), (18, 10), (18, 17), (19, 0), (38, 37))
for ((precision, scale) <- combinations) {
Expand Down