diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index a3d7eca04b61..a2771daabe33 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -160,6 +160,8 @@ def json(self, path, schema=None):
quotes
* ``allowNumericLeadingZeros`` (default ``false``): allows leading zeros in numbers \
(e.g. 00012)
+ * ``allowBackslashEscapingAnyCharacter`` (default ``false``): allows accepting quoting \
+ of all character using backslash quoting mechanism
>>> df1 = sqlContext.read.json('python/test_support/sql/people.json')
>>> df1.dtypes
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index c1a8f19313a7..ab872c3f0bc9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -257,6 +257,8 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging {
*
*
`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers
* (e.g. 00012)
+ * `allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all
+ * character using backslash quoting mechanism
*
* @since 1.6.0
*/
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala
index c132ead20e7d..f805c0092585 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala
@@ -31,7 +31,8 @@ case class JSONOptions(
allowUnquotedFieldNames: Boolean = false,
allowSingleQuotes: Boolean = true,
allowNumericLeadingZeros: Boolean = false,
- allowNonNumericNumbers: Boolean = false) {
+ allowNonNumericNumbers: Boolean = false,
+ allowBackslashEscapingAnyCharacter: Boolean = false) {
/** Sets config options on a Jackson [[JsonFactory]]. */
def setJacksonOptions(factory: JsonFactory): Unit = {
@@ -40,6 +41,8 @@ case class JSONOptions(
factory.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, allowSingleQuotes)
factory.configure(JsonParser.Feature.ALLOW_NUMERIC_LEADING_ZEROS, allowNumericLeadingZeros)
factory.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, allowNonNumericNumbers)
+ factory.configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER,
+ allowBackslashEscapingAnyCharacter)
}
}
@@ -59,6 +62,8 @@ object JSONOptions {
allowNumericLeadingZeros =
parameters.get("allowNumericLeadingZeros").map(_.toBoolean).getOrElse(false),
allowNonNumericNumbers =
- parameters.get("allowNonNumericNumbers").map(_.toBoolean).getOrElse(true)
+ parameters.get("allowNonNumericNumbers").map(_.toBoolean).getOrElse(true),
+ allowBackslashEscapingAnyCharacter =
+ parameters.get("allowBackslashEscapingAnyCharacter").map(_.toBoolean).getOrElse(false)
)
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
index 4cc0a3a9585d..1742df31bba9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
@@ -111,4 +111,23 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSQLContext {
assert(df.schema.head.name == "age")
assert(df.first().getDouble(0).isNaN)
}
+
+ test("allowBackslashEscapingAnyCharacter off") {
+ val str = """{"name": "Cazen Lee", "price": "\$10"}"""
+ val rdd = sqlContext.sparkContext.parallelize(Seq(str))
+ val df = sqlContext.read.option("allowBackslashEscapingAnyCharacter", "false").json(rdd)
+
+ assert(df.schema.head.name == "_corrupt_record")
+ }
+
+ test("allowBackslashEscapingAnyCharacter on") {
+ val str = """{"name": "Cazen Lee", "price": "\$10"}"""
+ val rdd = sqlContext.sparkContext.parallelize(Seq(str))
+ val df = sqlContext.read.option("allowBackslashEscapingAnyCharacter", "true").json(rdd)
+
+ assert(df.schema.head.name == "name")
+ assert(df.schema.last.name == "price")
+ assert(df.first().getString(0) == "Cazen Lee")
+ assert(df.first().getString(1) == "$10")
+ }
}