Skip to content

Commit aed7c5e

Browse files
committed
nullability and remove rand
1 parent fb9de34 commit aed7c5e

File tree

3 files changed

+60
-40
lines changed

3 files changed

+60
-40
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717

1818
package org.apache.spark.sql.catalyst.analysis
1919

20+
import scala.util.control.NonFatal
21+
2022
import org.apache.spark.sql.catalyst.InternalRow
21-
import org.apache.spark.sql.catalyst.expressions.{Cast, InterpretedProjection, Unevaluable}
23+
import org.apache.spark.sql.catalyst.expressions.Cast
2224
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
2325
import org.apache.spark.sql.catalyst.rules.Rule
2426
import org.apache.spark.sql.types.{StructField, StructType}
@@ -34,26 +36,6 @@ object ResolveInlineTables extends Rule[LogicalPlan] {
3436
convert(table)
3537
}
3638

37-
/**
38-
* Validates that all inline table data are valid expressions that can be evaluated.
39-
*
40-
* This is package visible for unit testing.
41-
*/
42-
private[analysis] def validateInputEvaluable(table: UnresolvedInlineTable): Unit = {
43-
table.rows.foreach { row =>
44-
row.foreach { e =>
45-
// We want to support foldable expressions and nondeterministic expressions (e.g. rand)
46-
// that are evaluable.
47-
// Note that there are expressions that are evaluable but not actually valid for inline
48-
// tables. One example is AttributeReference. However, since UnresolvedInlineTable is a
49-
// leaf node, the analyzer would not resolve UnresolvedAttribute into AttributeReference.
50-
if (!e.resolved || e.isInstanceOf[Unevaluable]) {
51-
e.failAnalysis(s"cannot evaluate expression ${e.sql} in inline table definition")
52-
}
53-
}
54-
}
55-
}
56-
5739
/**
5840
* Validates the input data dimension:
5941
* 1. All rows have the same cardinality.
@@ -72,6 +54,23 @@ object ResolveInlineTables extends Rule[LogicalPlan] {
7254
}
7355
}
7456

57+
/**
58+
* Validates that all inline table data are valid expressions that can be evaluated
59+
* (in this they must be foldable).
60+
*
61+
* This is package visible for unit testing.
62+
*/
63+
private[analysis] def validateInputEvaluable(table: UnresolvedInlineTable): Unit = {
64+
table.rows.foreach { row =>
65+
row.foreach { e =>
66+
// Note that nondeterministic expressions are not supported since they are not foldable.
67+
if (!e.resolved || !e.foldable) {
68+
e.failAnalysis(s"cannot evaluate expression ${e.sql} in inline table definition")
69+
}
70+
}
71+
}
72+
}
73+
7574
/**
7675
* Convert a valid (with right shape and foldable inputs) [[UnresolvedInlineTable]]
7776
* into a [[LocalRelation]].
@@ -81,24 +80,33 @@ object ResolveInlineTables extends Rule[LogicalPlan] {
8180
* This is package visible for unit testing.
8281
*/
8382
private[analysis] def convert(table: UnresolvedInlineTable): LocalRelation = {
84-
// For each column, traverse all the values and find a common data type.
85-
val targetTypes = table.rows.transpose.zip(table.names).map { case (column, name) =>
83+
// For each column, traverse all the values and find a common data type and nullability.
84+
val fields = table.rows.transpose.zip(table.names).map { case (column, name) =>
8685
val inputTypes = column.map(_.dataType)
87-
TypeCoercion.findWiderTypeWithoutStringPromotion(inputTypes).getOrElse {
86+
val tpe = TypeCoercion.findWiderTypeWithoutStringPromotion(inputTypes).getOrElse {
8887
table.failAnalysis(s"incompatible types found in column $name for inline table")
8988
}
89+
StructField(name, tpe, nullable = column.exists(_.nullable))
9090
}
91-
assert(targetTypes.size == table.names.size)
91+
val attributes = StructType(fields).toAttributes
92+
assert(fields.size == table.names.size)
9293

9394
val newRows: Seq[InternalRow] = table.rows.map { row =>
94-
new InterpretedProjection(row.zipWithIndex.map { case (e, ci) =>
95-
val targetType = targetTypes(ci)
96-
if (e.dataType.sameType(targetType)) e else Cast(e, targetType)
97-
}).apply(null)
95+
InternalRow.fromSeq(row.zipWithIndex.map { case (e, ci) =>
96+
val targetType = fields(ci).dataType
97+
try {
98+
if (e.dataType.sameType(targetType)) {
99+
e.eval()
100+
} else {
101+
Cast(e, targetType).eval()
102+
}
103+
} catch {
104+
case NonFatal(ex) =>
105+
table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}")
106+
}
107+
})
98108
}
99109

100-
val attributes = StructType(targetTypes.zip(table.names)
101-
.map { case (typ, name) => StructField(name, typ) }).toAttributes
102110
LocalRelation(attributes, newRows)
103111
}
104112
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import org.apache.spark.sql.AnalysisException
2323
import org.apache.spark.sql.catalyst.expressions.{Literal, Rand}
2424
import org.apache.spark.sql.catalyst.expressions.aggregate.Count
2525
import org.apache.spark.sql.catalyst.plans.PlanTest
26-
import org.apache.spark.sql.types.LongType
26+
import org.apache.spark.sql.types.{LongType, NullType}
2727

2828
/**
2929
* Unit tests for [[ResolveInlineTables]]. Note that there are also test cases defined in
@@ -38,20 +38,22 @@ class ResolveInlineTablesSuite extends PlanTest with BeforeAndAfter {
3838
ResolveInlineTables.validateInputEvaluable(
3939
UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1)))))
4040

41-
// nondeterministic (rand) should be fine
42-
ResolveInlineTables.validateInputEvaluable(
43-
UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(Rand(1)))))
41+
// nondeterministic (rand) should not work
42+
intercept[AnalysisException] {
43+
ResolveInlineTables.validateInputEvaluable(
44+
UnresolvedInlineTable(Seq("c1"), Seq(Seq(Rand(1)))))
45+
}
4446

4547
// aggregate should not work
4648
intercept[AnalysisException] {
4749
ResolveInlineTables.validateInputEvaluable(
48-
UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(Count(lit(1))))))
50+
UnresolvedInlineTable(Seq("c1"), Seq(Seq(Count(lit(1))))))
4951
}
5052

5153
// unresolved attribute should not work
5254
intercept[AnalysisException] {
5355
ResolveInlineTables.validateInputEvaluable(
54-
UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(UnresolvedAttribute("A")))))
56+
UnresolvedInlineTable(Seq("c1"), Seq(Seq(UnresolvedAttribute("A")))))
5557
}
5658
}
5759

@@ -86,4 +88,14 @@ class ResolveInlineTablesSuite extends PlanTest with BeforeAndAfter {
8688
assert(converted.data(0).getLong(0) == 1L)
8789
assert(converted.data(1).getLong(0) == 2L)
8890
}
91+
92+
test("nullability inference in convert") {
93+
val table1 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L))))
94+
val converted1 = ResolveInlineTables.convert(table1)
95+
assert(!converted1.schema.fields(0).nullable)
96+
97+
val table2 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(Literal(null, NullType))))
98+
val converted2 = ResolveInlineTables.convert(table2)
99+
assert(converted2.schema.fields(0).nullable)
100+
}
89101
}

sql/core/src/test/resources/sql-tests/results/inline-table.sql.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@ two 3.0
9494
-- !query 10
9595
select * from values ("one", rand(5)), ("two", 3.0D) as data(a, b)
9696
-- !query 10 schema
97-
struct<a:string,b:double>
97+
struct<>
9898
-- !query 10 output
99-
one 0.087440518337355
100-
two 3.0
99+
org.apache.spark.sql.AnalysisException
100+
cannot evaluate expression rand(5) in inline table definition; line 1 pos 29
101101

102102

103103
-- !query 11

0 commit comments

Comments
 (0)