Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ private[parquet] class CatalystRowConverter(
val scale = decimalType.scale
val bytes = value.getBytes

if (precision <= 8) {
if (precision <= CatalystSchemaConverter.MAX_PRECISION_FOR_INT64) {
// Constructs a `Decimal` with an unscaled `Long` value if possible.
var unscaled = 0L
var i = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
import org.apache.parquet.schema.Type.Repetition._
import org.apache.parquet.schema._

import org.apache.spark.sql.parquet.CatalystSchemaConverter.{MAX_PRECISION_FOR_INT32, MAX_PRECISION_FOR_INT64, maxPrecisionForBytes}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{AnalysisException, SQLConf}

Expand Down Expand Up @@ -155,15 +156,15 @@ private[parquet] class CatalystSchemaConverter(
case INT_16 => ShortType
case INT_32 | null => IntegerType
case DATE => DateType
case DECIMAL => makeDecimalType(maxPrecisionForBytes(4))
case DECIMAL => makeDecimalType(MAX_PRECISION_FOR_INT32)
case TIME_MILLIS => typeNotImplemented()
case _ => illegalType()
}

case INT64 =>
originalType match {
case INT_64 | null => LongType
case DECIMAL => makeDecimalType(maxPrecisionForBytes(8))
case DECIMAL => makeDecimalType(MAX_PRECISION_FOR_INT64)
case TIMESTAMP_MILLIS => typeNotImplemented()
case _ => illegalType()
}
Expand Down Expand Up @@ -405,7 +406,7 @@ private[parquet] class CatalystSchemaConverter(

// Uses INT32 for 1 <= precision <= 9
case DecimalType.Fixed(precision, scale)
if precision <= maxPrecisionForBytes(4) && followParquetFormatSpec =>
if precision <= MAX_PRECISION_FOR_INT32 && followParquetFormatSpec =>
Types
.primitive(INT32, repetition)
.as(DECIMAL)
Expand All @@ -415,7 +416,7 @@ private[parquet] class CatalystSchemaConverter(

// Uses INT64 for 1 <= precision <= 18
case DecimalType.Fixed(precision, scale)
if precision <= maxPrecisionForBytes(8) && followParquetFormatSpec =>
if precision <= MAX_PRECISION_FOR_INT64 && followParquetFormatSpec =>
Types
.primitive(INT64, repetition)
.as(DECIMAL)
Expand Down Expand Up @@ -534,14 +535,6 @@ private[parquet] class CatalystSchemaConverter(
throw new AnalysisException(s"Unsupported data type $field.dataType")
}
}

// Max precision of a decimal value stored in `numBytes` bytes
private def maxPrecisionForBytes(numBytes: Int): Int = {
Math.round( // convert double to long
Math.floor(Math.log10( // number of base-10 digits
Math.pow(2, 8 * numBytes - 1) - 1))) // max value stored in numBytes
.asInstanceOf[Int]
}
}


Expand Down Expand Up @@ -584,4 +577,16 @@ private[parquet] object CatalystSchemaConverter {
computeMinBytesForPrecision(precision)
}
}

val MAX_PRECISION_FOR_INT32 = maxPrecisionForBytes(4)

val MAX_PRECISION_FOR_INT64 = maxPrecisionForBytes(8)

// Max precision of a decimal value stored in `numBytes` bytes
def maxPrecisionForBytes(numBytes: Int): Int = {
Math.round( // convert double to long
Math.floor(Math.log10( // number of base-10 digits
Math.pow(2, 8 * numBytes - 1) - 1))) // max value stored in numBytes
.asInstanceOf[Int]
}
}