apache · ghost · Aug 13, 2014 · Aug 14, 2014 · Aug 15, 2014 · Aug 15, 2014
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala
@@ -373,9 +373,11 @@ private[parquet] object ParquetTypesConverter extends Logging {
     }
     ParquetRelation.enableLogForwarding()
 
+    // NOTE: Explicitly list "_temporary" because hadoop 0.23 removed the variable TEMP_DIR_NAME
+    // from FileOutputCommitter. Check MAPREDUCE-5229 for the detail.
     val children = fs.listStatus(path).filterNot { status =>
       val name = status.getPath.getName
-      name(0) == '.' || name == FileOutputCommitter.SUCCEEDED_FILE_NAME
+      name(0) == '.' || name == FileOutputCommitter.SUCCEEDED_FILE_NAME || name == "_temporary"
     }
 
     // NOTE (lian): Parquet "_metadata" file can be very slow if the file consists of lots of row