Add a manual test case.

apache · dongjoon-hyun · Feb 12, 2018 · Feb 13, 2018 · Feb 13, 2018 · Feb 13, 2018
commit 3b8cb0a1def32924afd3e4b9e4fc702e1d53d36a
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -20,9 +20,11 @@ package org.apache.spark.sql.execution.datasources.orc
 import java.io.File
 import java.util.Locale
 
+import org.apache.hadoop.fs.Path
 import org.apache.orc.OrcConf.COMPRESS
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
@@ -160,6 +162,25 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
       }
     }
   }
+
+  // This should be tested manually because it raises OOM intentionally
+  // in order to cause `Leaked filesystem connection`. The test suite dies, too.
+  ignore("SPARK-23399 Register a task completion listner first for OrcColumnarBatchReader") {
+    withSQLConf(SQLConf.ORC_VECTORIZED_READER_BATCH_SIZE.key -> s"${Int.MaxValue}") {
+      withTempDir { dir =>
+        val basePath = dir.getCanonicalPath
+        Seq(0).toDF("a").write.format("orc").save(new Path(basePath, "first").toString)
+        Seq(1).toDF("a").write.format("orc").save(new Path(basePath, "second").toString)
+        val df = spark.read.orc(
+          new Path(basePath, "first").toString,
+          new Path(basePath, "second").toString)
+        val e = intercept[SparkException] {
+          df.collect()
+        }
+        assert(e.getCause.isInstanceOf[OutOfMemoryError])
+      }
+    }
+  }
 }
 
 class OrcSourceSuite extends OrcSuite with SharedSQLContext {