apache · viirya · Nov 17, 2015 · Nov 17, 2015 · Nov 20, 2015 · Nov 27, 2015
diff --git a/pom.xml b/pom.xml
@@ -161,7 +161,7 @@
     <jline.version>${scala.version}</jline.version>
     <jline.groupid>org.scala-lang</jline.groupid>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.5.3</fasterxml.jackson.version>
+    <fasterxml.jackson.version>2.7.3</fasterxml.jackson.version>
     <snappy.version>1.1.2.4</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <calcite.version>1.2.0-incubating</calcite.version>

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/InferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/InferSchema.scala
@@ -114,7 +114,16 @@ private[sql] object InferSchema {
         // record fields' types have been combined.
         NullType
 
-      case VALUE_STRING => StringType
+      case VALUE_STRING =>
+        // If there is only one row, the following non-numeric numbers will be incorrectly
+        // recognized as StringType.
+        val value = parser.getText
+        if (value.equals("NaN") ||
+          value.equals("Infinity") ||
+          value.equals("-Infinity")) {
+          return DoubleType
+        }
+        return StringType
       case START_OBJECT =>
         val builder = Array.newBuilder[StructField]
         while (nextUntil(parser, END_OBJECT)) {

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
@@ -129,12 +129,9 @@ object JacksonParser extends Logging {
       case (VALUE_STRING, FloatType) =>
         // Special case handling for NaN and Infinity.
         val value = parser.getText
-        val lowerCaseValue = value.toLowerCase()
-        if (lowerCaseValue.equals("nan") ||
-          lowerCaseValue.equals("infinity") ||
-          lowerCaseValue.equals("-infinity") ||
-          lowerCaseValue.equals("inf") ||
-          lowerCaseValue.equals("-inf")) {
+        if (value.equals("NaN") ||
+          value.equals("Infinity") ||
+          value.equals("-Infinity")) {
           value.toFloat
         } else {
           throw new SparkSQLJsonProcessingException(s"Cannot parse $value as FloatType.")
@@ -146,12 +143,9 @@ object JacksonParser extends Logging {
       case (VALUE_STRING, DoubleType) =>
         // Special case handling for NaN and Infinity.
         val value = parser.getText
-        val lowerCaseValue = value.toLowerCase()
-        if (lowerCaseValue.equals("nan") ||
-          lowerCaseValue.equals("infinity") ||
-          lowerCaseValue.equals("-infinity") ||
-          lowerCaseValue.equals("inf") ||
-          lowerCaseValue.equals("-inf")) {
+        if (value.equals("NaN") ||
+          value.equals("Infinity") ||
+          value.equals("-Infinity")) {
           value.toDouble
         } else {
           throw new SparkSQLJsonProcessingException(s"Cannot parse $value as DoubleType.")

diff --git a/.../test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/.../test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
@@ -93,23 +93,45 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSQLContext {
     assert(df.first().getLong(0) == 18)
   }
 
-  // The following two tests are not really working - need to look into Jackson's
-  // JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS.
-  ignore("allowNonNumericNumbers off") {
-    val str = """{"age": NaN}"""
-    val rdd = spark.sparkContext.parallelize(Seq(str))
-    val df = spark.read.json(rdd)
-
-    assert(df.schema.head.name == "_corrupt_record")
+  test("allowNonNumericNumbers off") {
+    // non-quoted non-numeric numbers don't work if allowNonNumericNumbers is off.
+    var testCases: Seq[String] = Seq("""{"age": NaN}""", """{"age": Infinity}""",
+      """{"age": -Infinity}""", """{"age": +INF}""", """{"age": -INF}""")
+    testCases.foreach { str =>
+      val rdd = spark.sparkContext.parallelize(Seq(str))
+      val df = spark.read.option("allowNonNumericNumbers", "false").json(rdd)
+
+      assert(df.schema.head.name == "_corrupt_record")
+    }
+
+    // quoted non-numeric numbers should still work even allowNonNumericNumbers is off.
+    testCases = Seq("""{"age": "NaN"}""", """{"age": "Infinity"}""", """{"age": "-Infinity"}""")
+    val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isNegInfinity)
+
+    testCases.zipWithIndex.foreach { case (str, idx) =>
+      val rdd = spark.sparkContext.parallelize(Seq(str))
+      val df = spark.read.option("allowNonNumericNumbers", "false").json(rdd)
+
+      assert(df.schema.head.name == "age")
+      assert(tests(idx)(df.first().getDouble(0)))
+    }
   }
 
-  ignore("allowNonNumericNumbers on") {
-    val str = """{"age": NaN}"""
-    val rdd = spark.sparkContext.parallelize(Seq(str))
-    val df = spark.read.option("allowNonNumericNumbers", "true").json(rdd)
-
-    assert(df.schema.head.name == "age")
-    assert(df.first().getDouble(0).isNaN)
+  test("allowNonNumericNumbers on") {
+    val testCases: Seq[String] = Seq("""{"age": NaN}""", """{"age": Infinity}""",
+      """{"age": -Infinity}""", """{"age": +INF}""", """{"age": -INF}""", """{"age": "NaN"}""",
+      """{"age": "Infinity"}""", """{"age": "-Infinity"}""")
+    val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isNegInfinity,
+      _.isPosInfinity, _.isNegInfinity, _.isNaN, _.isPosInfinity, _.isNegInfinity,
+      _.isPosInfinity, _.isNegInfinity)
+
+    testCases.zipWithIndex.foreach { case (str, idx) =>
+      val rdd = spark.sparkContext.parallelize(Seq(str))
+      val df = spark.read.option("allowNonNumericNumbers", "true").json(rdd)
+
+      assert(df.schema.head.name == "age")
+      assert(tests(idx)(df.first().getDouble(0)))
+    }
   }
 
   test("allowBackslashEscapingAnyCharacter off") {