[SPARK-17477][SQL] SparkSQL cannot handle schema evolution from Int -…

…> Long when parquet files have Int as its type while hive metastore has Long as its type
apache · wgtmac · Sep 9, 2016 · Sep 9, 2016 · Sep 10, 2016 · Sep 10, 2016
commit 9fc18a419633a24fc90acaf82378a46404671b34
diff --git a/...c/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/...c/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -214,9 +214,25 @@ private[parquet] class ParquetRowConverter(
       updater: ParentContainerUpdater): Converter with HasParentContainerUpdater = {
 
     catalystType match {
-      case BooleanType | IntegerType | LongType | FloatType | DoubleType | BinaryType =>
+      case BooleanType | IntegerType | FloatType | DoubleType | BinaryType =>
         new ParquetPrimitiveConverter(updater)
 
+      /**
+        * When reading a hive table of parquet files with schema evolution from
+        * Int to Long, if hive metastore has Long as its type while parquet files
+        * have Int, SparkSQL need to differentiate the actual type in the parquet
+        * files. Otherwise, it will result in java.lang.ClassCastException:
+        * [[MutableLong]] cannot be cast to [[MutableInt]].
+        */
+      case LongType if parquetType == INT64 =>
+        new ParquetPrimitiveConverter(updater)
+
+      case LongType =>
+        new ParquetPrimitiveConverter(updater) {
+          override def addInt(value: Int): Unit =
+            updater.setLong(value.asInstanceOf[Long])
+        }
+
       case ByteType =>
         new ParquetPrimitiveConverter(updater) {
           override def addInt(value: Int): Unit =