diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index e666200a78d4..4b753e1f28e5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -339,11 +339,11 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor new ResolveHints.RemoveAllHints), Batch("Nondeterministic", Once, PullOutNondeterministic), + Batch("UpdateNullability", Once, + UpdateAttributeNullability), Batch("UDF", Once, HandleNullInputsForUDF, ResolveEncodersInUDF), - Batch("UpdateNullability", Once, - UpdateAttributeNullability), Batch("Subquery", Once, UpdateOuterReferences), Batch("Cleanup", fixedPoint, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala index 87ca3a07c4d5..fe47d6c68555 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala @@ -1183,4 +1183,15 @@ class UDFSuite extends QueryTest with SharedSparkSession { df10.select(zip_with(col("array1"), col("array2"), (b1, b2) => reverseThenConcat2(b1, b2))) checkAnswer(test10, Row(Array(Row("cbaihg"), Row("fedlkj"))) :: Nil) } + + test("SPARK-47927: Correctly pass null values derived from join to UDF") { + val f = udf[Tuple1[Option[Int]], Tuple1[Option[Int]]](identity) + val ds1 = Seq(1).toDS() + val ds2 = Seq[Int]().toDS() + + checkAnswer( + ds1.join(ds2, ds1("value") === ds2("value"), "left_outer") + .select(f(struct(ds2("value").as("_1")))), + Row(Row(null))) + } }