Skip to content
Closed
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
adc9ded
[SPARK-31937][SQL] Support processing array and map type using spark …
AngersZhuuuu Dec 29, 2020
6a7438b
Update CatalystTypeConverters.scala
AngersZhuuuu Dec 29, 2020
d3b9cec
fix failed UT
AngersZhuuuu Dec 29, 2020
fdd5225
Update SparkScriptTransformationSuite.scala
AngersZhuuuu Dec 29, 2020
aa16c8f
Update BaseScriptTransformationSuite.scala
AngersZhuuuu Dec 29, 2020
092c927
Update BaseScriptTransformationExec.scala
AngersZhuuuu Dec 29, 2020
9761c0e
Merge branch 'master' into SPARK-31937
AngersZhuuuu Dec 29, 2020
28ad7fa
Update BaseScriptTransformationSuite.scala
AngersZhuuuu Dec 29, 2020
9ac75fc
Merge branch 'master' into SPARK-31937
AngersZhuuuu Jan 4, 2021
33d8b5b
Update BaseScriptTransformationExec.scala
AngersZhuuuu Jan 4, 2021
63f07eb
follow comment
AngersZhuuuu Feb 4, 2021
b631b70
Update BaseScriptTransformationExec.scala
AngersZhuuuu Feb 4, 2021
b7e7f92
follow comment
AngersZhuuuu Feb 5, 2021
8dec5a1
follow comment
AngersZhuuuu Feb 5, 2021
529d54d
Update BaseScriptTransformationExec.scala
AngersZhuuuu Feb 6, 2021
4f0e78f
Avoid construct JsonToStructs repeated
AngersZhuuuu Feb 6, 2021
ed8c54c
remove unused UT
AngersZhuuuu Feb 6, 2021
520f4b8
Update sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScr…
AngersZhuuuu Apr 16, 2021
97f9d58
Merge branch 'master' into SPARK-31937
AngersZhuuuu Apr 16, 2021
b5a4268
[SPARK-35097][SQL] Add column name to SparkUpgradeException about anc…
AngersZhuuuu Apr 18, 2021
76a746e
Revert "[SPARK-35097][SQL] Add column name to SparkUpgradeException a…
AngersZhuuuu Apr 18, 2021
6aa05fc
fix UT
AngersZhuuuu Apr 19, 2021
9e3f808
Revert "fix UT"
AngersZhuuuu Apr 19, 2021
3f51d27
fix UT
AngersZhuuuu Apr 19, 2021
adf8a66
Update sql-migration-guide.md
AngersZhuuuu Apr 19, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update BaseScriptTransformationSuite.scala
  • Loading branch information
AngersZhuuuu committed Dec 29, 2020
commit 28ad7faccbe10fc75d063a3141d50613d665d94b
Original file line number Diff line number Diff line change
Expand Up @@ -448,58 +448,83 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
}
}

test("SPARK-31936: Script transform support Array/MapType/StructType (no serde)") {
assume(TestUtils.testCommandAvailable("python"))
withTempView("v") {
val df = Seq(
(Array(0, 1, 2), Array(Array(0, 1), Array(2)),
Map("a" -> 1), Map("b" -> Array("a", "b"))),
(Array(3, 4, 5), Array(Array(3, 4), Array(5)),
Map("b" -> 2), Map("c" -> Array("c", "d"))),
(Array(6, 7, 8), Array(Array(6, 7), Array(8)),
Map("c" -> 3), Map("d" -> Array("e", "f")))
).toDF("a", "b", "c", "d")
.select('a, 'b, 'c, 'd,
struct('a, 'b).as("e"),
struct('a, 'd).as("f"),
struct(struct('a, 'b), struct('a, 'd)).as("g")
)
test("SPARK-33930: Script Transform default FIELD DELIMIT should be \u0001 (no serde)") {
withTempView("v") {
val df = Seq(
(1, 2, 3),
(2, 3, 4),
(3, 4, 5)
).toDF("a", "b", "c")
df.createTempView("v")

checkAnswer(
df,
(child: SparkPlan) => createScriptTransformationExec(
input = Seq(
df.col("a").expr,
df.col("b").expr,
df.col("c").expr,
df.col("d").expr,
df.col("e").expr,
df.col("f").expr,
df.col("g").expr),
script = "cat",
output = Seq(
AttributeReference("a", ArrayType(IntegerType))(),
AttributeReference("b", ArrayType(ArrayType(IntegerType)))(),
AttributeReference("c", MapType(StringType, IntegerType))(),
AttributeReference("d", MapType(StringType, ArrayType(StringType)))(),
AttributeReference("e", StructType(
Array(StructField("col1", ArrayType(IntegerType)),
StructField("col2", ArrayType(ArrayType(IntegerType))))))(),
AttributeReference("f", StructType(
checkAnswer(
sql(
s"""
|SELECT TRANSFORM(a, b, c)
| ROW FORMAT DELIMITED
| USING 'cat' AS (a)
| ROW FORMAT DELIMITED
| FIELDS TERMINATED BY '&'
|FROM v
""".stripMargin), identity,
Row("1\u00012\u00013") ::
Row("2\u00013\u00014") ::
Row("3\u00014\u00015") :: Nil)
}
}

test("SPARK-31936: Script transform support ArrayType/MapType/StructType (no serde)") {
assume(TestUtils.testCommandAvailable("python"))
withTempView("v") {
val df = Seq(
(Array(0, 1, 2), Array(Array(0, 1), Array(2)),
Map("a" -> 1), Map("b" -> Array("a", "b"))),
(Array(3, 4, 5), Array(Array(3, 4), Array(5)),
Map("b" -> 2), Map("c" -> Array("c", "d"))),
(Array(6, 7, 8), Array(Array(6, 7), Array(8)),
Map("c" -> 3), Map("d" -> Array("e", "f")))
).toDF("a", "b", "c", "d")
.select('a, 'b, 'c, 'd,
struct('a, 'b).as("e"),
struct('a, 'd).as("f"),
struct(struct('a, 'b), struct('a, 'd)).as("g")
)

checkAnswer(
df,
(child: SparkPlan) => createScriptTransformationExec(
input = Seq(
df.col("a").expr,
df.col("b").expr,
df.col("c").expr,
df.col("d").expr,
df.col("e").expr,
df.col("f").expr,
df.col("g").expr),
script = "cat",
output = Seq(
AttributeReference("a", ArrayType(IntegerType))(),
AttributeReference("b", ArrayType(ArrayType(IntegerType)))(),
AttributeReference("c", MapType(StringType, IntegerType))(),
AttributeReference("d", MapType(StringType, ArrayType(StringType)))(),
AttributeReference("e", StructType(
Array(StructField("col1", ArrayType(IntegerType)),
StructField("col2", ArrayType(ArrayType(IntegerType))))))(),
AttributeReference("f", StructType(
Array(StructField("col1", ArrayType(IntegerType)),
StructField("col2", MapType(StringType, ArrayType(StringType))))))(),
AttributeReference("g", StructType(
Array(StructField("col1", StructType(
Array(StructField("col1", ArrayType(IntegerType)),
StructField("col2", MapType(StringType, ArrayType(StringType))))))(),
AttributeReference("g", StructType(
Array(StructField("col1", StructType(
StructField("col2", ArrayType(ArrayType(IntegerType)))))),
StructField("col2", StructType(
Array(StructField("col1", ArrayType(IntegerType)),
StructField("col2", ArrayType(ArrayType(IntegerType)))))),
StructField("col2", StructType(
Array(StructField("col1", ArrayType(IntegerType)),
StructField("col2", MapType(StringType, ArrayType(StringType)))))))))()),
child = child,
ioschema = defaultIOSchema
),
df.select('a, 'b, 'c, 'd, 'e, 'f, 'g).collect())
}
StructField("col2", MapType(StringType, ArrayType(StringType)))))))))()),
child = child,
ioschema = defaultIOSchema
),
df.select('a, 'b, 'c, 'd, 'e, 'f, 'g).collect())
}
}

test("SPARK-31936: Script transform support 7 level nested complex type (no serde)") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you update the test title? In the current PR, level 7 looks meaningless.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is only for deep-nested array cases? How about the other deep-nested cases of map/struct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is only for deep-nested array cases? How about the other deep-nested cases of map/struct?

Updated, how about current?

Expand Down Expand Up @@ -543,28 +568,6 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
}.getMessage
assert(e.contains("Number of levels of nesting supported for Spark SQL" +
" script transform is 7 Unable to work with level 8"))
test("SPARK-33930: Script Transform default FIELD DELIMIT should be \u0001 (no serde)") {
withTempView("v") {
val df = Seq(
(1, 2, 3),
(2, 3, 4),
(3, 4, 5)
).toDF("a", "b", "c")
df.createTempView("v")

checkAnswer(
sql(
s"""
|SELECT TRANSFORM(a, b, c)
| ROW FORMAT DELIMITED
| USING 'cat' AS (a)
| ROW FORMAT DELIMITED
| FIELDS TERMINATED BY '&'
|FROM v
""".stripMargin), identity,
Row("1\u00012\u00013") ::
Row("2\u00013\u00014") ::
Row("3\u00014\u00015") :: Nil)
}
}
}
Expand Down