[SPARK-34555][SQL] Resolve metadata output from DataFrame

karenfeng · cloud-fan · commit b01dd12805f7 · 2021-03-03T22:07:41.000+08:00
### What changes were proposed in this pull request? Add metadataOutput as a fallback to resolution. Builds off apache#31654. ### Why are the changes needed? The metadata columns could not be resolved via `df.col("metadataColName")` from the DataFrame API. ### Does this PR introduce _any_ user-facing change? Yes, the metadata columns can now be resolved as described above. ### How was this patch tested? Scala unit test. Closes apache#31668 from karenfeng/spark-34555. Authored-by: Karen Feng <karen.feng@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -95,6 +95,8 @@ abstract class LogicalPlan
 
   private[this] lazy val outputAttributes = AttributeSeq(output)
 
+  private[this] lazy val outputMetadataAttributes = AttributeSeq(metadataOutput)
+
   /**
    * Optionally resolves the given strings to a [[NamedExpression]] using the input from all child
    * nodes of this LogicalPlan. The attribute is expressed as
@@ -115,6 +117,7 @@ abstract class LogicalPlan
       nameParts: Seq[String],
       resolver: Resolver): Option[NamedExpression] =
     outputAttributes.resolve(nameParts, resolver)
+      .orElse(outputMetadataAttributes.resolve(nameParts, resolver))
 
   /**
    * Given an attribute name, split it to name parts by dot, but
@@ -124,7 +127,7 @@ abstract class LogicalPlan
   def resolveQuoted(
       name: String,
       resolver: Resolver): Option[NamedExpression] = {
-    outputAttributes.resolve(UnresolvedAttribute.parseAttributeName(name), resolver)
+    resolve(UnresolvedAttribute.parseAttributeName(name), resolver)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2607,6 +2607,27 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("SPARK-34555: Resolve DataFrame metadata column") {
+    val tbl = s"${catalogAndNamespace}table"
+    withTable(tbl) {
+      sql(s"CREATE TABLE $tbl (id bigint, data string) USING $v2Format " +
+        "PARTITIONED BY (bucket(4, id), id)")
+      sql(s"INSERT INTO $tbl VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+      val table = spark.table(tbl)
+      val dfQuery = table.select(
+        table.col("id"),
+        table.col("data"),
+        table.col("index"),
+        table.col("_partition")
+      )
+
+      checkAnswer(
+        dfQuery,
+        Seq(Row(1, "a", 0, "3/1"), Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3"))
+      )
+    }
+  }
+
   test("SPARK-34561: drop/add columns to a dataset of `DESCRIBE TABLE`") {
     val tbl = s"${catalogAndNamespace}tbl"
     withTable(tbl) {