Address comment. Revert change of InferSchema.

apache · viirya · Nov 17, 2015 · Nov 17, 2015 · Nov 20, 2015 · Nov 27, 2015
commit 4fca52c68ae15eb792dab08cea0dc725c3437397
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/InferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/InferSchema.scala
@@ -114,16 +114,7 @@ private[sql] object InferSchema {
         // record fields' types have been combined.
         NullType
 
-      case VALUE_STRING =>
-        // If there is only one row, the following non-numeric numbers will be incorrectly
-        // recognized as StringType.
-        val value = parser.getText
-        if (value.equals("NaN") ||
-          value.equals("Infinity") ||
-          value.equals("-Infinity")) {
-          return DoubleType
-        }
-        return StringType
+      case VALUE_STRING => StringType
       case START_OBJECT =>
         val builder = Array.newBuilder[StructField]
         while (nextUntil(parser, END_OBJECT)) {

diff --git a/.../test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/.../test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.json
 
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
 
 /**
  * Test cases for various [[JSONOptions]].
@@ -107,10 +108,11 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSQLContext {
     // quoted non-numeric numbers should still work even allowNonNumericNumbers is off.
     testCases = Seq("""{"age": "NaN"}""", """{"age": "Infinity"}""", """{"age": "-Infinity"}""")
     val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isNegInfinity)
+    val schema = StructType(StructField("age", DoubleType, true) :: Nil)
 
     testCases.zipWithIndex.foreach { case (str, idx) =>
       val rdd = spark.sparkContext.parallelize(Seq(str))
-      val df = spark.read.option("allowNonNumericNumbers", "false").json(rdd)
+      val df = spark.read.option("allowNonNumericNumbers", "false").schema(schema).json(rdd)
 
       assert(df.schema.head.name == "age")
       assert(tests(idx)(df.first().getDouble(0)))
@@ -124,10 +126,10 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSQLContext {
     val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isNegInfinity,
       _.isPosInfinity, _.isNegInfinity, _.isNaN, _.isPosInfinity, _.isNegInfinity,
       _.isPosInfinity, _.isNegInfinity)
-
+    val schema = StructType(StructField("age", DoubleType, true) :: Nil)
     testCases.zipWithIndex.foreach { case (str, idx) =>
       val rdd = spark.sparkContext.parallelize(Seq(str))
-      val df = spark.read.option("allowNonNumericNumbers", "true").json(rdd)
+      val df = spark.read.option("allowNonNumericNumbers", "true").schema(schema).json(rdd)
 
       assert(df.schema.head.name == "age")
       assert(tests(idx)(df.first().getDouble(0)))