From 7bd488b4d80c70ce51baccc50db1a5f5f22d5d44 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 16 Jan 2023 13:57:38 +0800 Subject: [PATCH 1/6] Override equals and hashCode for TableIdentifier --- .../scala/org/apache/spark/sql/catalyst/identifiers.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala index 2f818fecad93..724277778f90 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala @@ -104,6 +104,13 @@ case class TableIdentifier(table: String, database: Option[String], catalog: Opt def this(table: String) = this(table, None, None) def this(table: String, database: Option[String]) = this(table, database, None) + + override def equals(obj: Any): Boolean = obj match { + case t: TableIdentifier => table == t.table && database == t.database && catalog == t.catalog + case _ => false + } + + override def hashCode(): Int = unquotedString.hashCode } /** A fully qualified identifier for a table (i.e., database.tableName) */ From 104cdaca188cad70ab4946d2fcd8744202ac16bd Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 16 Jan 2023 14:22:58 +0800 Subject: [PATCH 2/6] update golden files --- .../test/resources/sql-tests/results/explain-aqe.sql.out | 6 +++--- .../src/test/resources/sql-tests/results/explain.sql.out | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out index 08a63eda0b06..458f8744782c 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out @@ -1086,20 +1086,20 @@ struct +- 'UnresolvedRelation [explain_temp4], [], false == Analyzed Logical Plan == -InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@7d811218, [key, val] +InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@5c30f6b5, [key, val] +- Project [key#x, val#x] +- SubqueryAlias spark_catalog.default.explain_temp4 +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet == Optimized Logical Plan == -InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@7d811218, [key, val] +InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@5c30f6b5, [key, val] +- WriteFiles +- Sort [val#x ASC NULLS FIRST], false +- Project [key#x, empty2null(val#x) AS val#x] +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet == Physical Plan == -Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@7d811218, [key, val] +Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@5c30f6b5, [key, val] +- WriteFiles +- *Sort [val#x ASC NULLS FIRST], false, 0 +- *Project [key#x, empty2null(val#x) AS val#x] diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out index d0813ecd52ee..2d20c609b76f 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out @@ -1028,20 +1028,20 @@ struct +- 'UnresolvedRelation [explain_temp4], [], false == Analyzed Logical Plan == -InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@7d811218, [key, val] +InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@5c30f6b5, [key, val] +- Project [key#x, val#x] +- SubqueryAlias spark_catalog.default.explain_temp4 +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet == Optimized Logical Plan == -InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@7d811218, [key, val] +InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@5c30f6b5, [key, val] +- WriteFiles +- Sort [val#x ASC NULLS FIRST], false +- Project [key#x, empty2null(val#x) AS val#x] +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet == Physical Plan == -Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@7d811218, [key, val] +Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@5c30f6b5, [key, val] +- WriteFiles +- *Sort [val#x ASC NULLS FIRST], false, 0 +- *Project [key#x, empty2null(val#x) AS val#x] From 38be9a0a4e6cc9f5209661209174f22d419ad188 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 16 Jan 2023 17:12:30 +0800 Subject: [PATCH 3/6] remove hashCode and revert change --- .../scala/org/apache/spark/sql/catalyst/identifiers.scala | 7 ------- .../scala/org/apache/spark/sql/SQLQueryTestHelper.scala | 5 +++-- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala index 724277778f90..2f818fecad93 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala @@ -104,13 +104,6 @@ case class TableIdentifier(table: String, database: Option[String], catalog: Opt def this(table: String) = this(table, None, None) def this(table: String, database: Option[String]) = this(table, database, None) - - override def equals(obj: Any): Boolean = obj match { - case t: TableIdentifier => table == t.table && database == t.database && catalog == t.catalog - case _ => false - } - - override def hashCode(): Int = unquotedString.hashCode } /** A fully qualified identifier for a table (i.e., database.tableName) */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index fb4bd79780ce..8e89b4caeffb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -39,14 +39,15 @@ trait SQLQueryTestHelper { line.replaceAll("#\\d+", "#x") .replaceAll("plan_id=\\d+", "plan_id=x") .replaceAll( - s"Location.*$clsName/", - s"Location $notIncludedMsg/{warehouse_dir}/") + s"Location.*$clsName/", + s"Location $notIncludedMsg/{warehouse_dir}/") .replaceAll(s"file:.*$clsName", s"Location $notIncludedMsg/{warehouse_dir}") .replaceAll("Created By.*", s"Created By $notIncludedMsg") .replaceAll("Created Time.*", s"Created Time $notIncludedMsg") .replaceAll("Last Access.*", s"Last Access $notIncludedMsg") .replaceAll("Partition Statistics\t\\d+", s"Partition Statistics\t$notIncludedMsg") .replaceAll("\\*\\(\\d+\\) ", "*") // remove the WholeStageCodegen codegenStageIds + .replaceAll("@[0-9a-z]+,", "@x,") // remove hashCode } From b0f8c6ce394b6bb22bea812afeded8b9605eacf6 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 16 Jan 2023 17:16:34 +0800 Subject: [PATCH 4/6] revert change --- .../test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index 8e89b4caeffb..99b7447f2f3d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -39,8 +39,8 @@ trait SQLQueryTestHelper { line.replaceAll("#\\d+", "#x") .replaceAll("plan_id=\\d+", "plan_id=x") .replaceAll( - s"Location.*$clsName/", - s"Location $notIncludedMsg/{warehouse_dir}/") + s"Location.*$clsName/", + s"Location $notIncludedMsg/{warehouse_dir}/") .replaceAll(s"file:.*$clsName", s"Location $notIncludedMsg/{warehouse_dir}") .replaceAll("Created By.*", s"Created By $notIncludedMsg") .replaceAll("Created Time.*", s"Created Time $notIncludedMsg") From f30204e625c529675f23b69925e31d75dd3907f6 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 16 Jan 2023 17:25:43 +0800 Subject: [PATCH 5/6] update golden files --- .../test/resources/sql-tests/results/explain-aqe.sql.out | 6 +++--- .../src/test/resources/sql-tests/results/explain.sql.out | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out index 458f8744782c..fe604292c6f8 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out @@ -1086,20 +1086,20 @@ struct +- 'UnresolvedRelation [explain_temp4], [], false == Analyzed Logical Plan == -InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@5c30f6b5, [key, val] +InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@x, [key, val] +- Project [key#x, val#x] +- SubqueryAlias spark_catalog.default.explain_temp4 +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet == Optimized Logical Plan == -InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@5c30f6b5, [key, val] +InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@x, [key, val] +- WriteFiles +- Sort [val#x ASC NULLS FIRST], false +- Project [key#x, empty2null(val#x) AS val#x] +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet == Physical Plan == -Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@5c30f6b5, [key, val] +Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@x, [key, val] +- WriteFiles +- *Sort [val#x ASC NULLS FIRST], false, 0 +- *Project [key#x, empty2null(val#x) AS val#x] diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out index 2d20c609b76f..8133f03035cc 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out @@ -1028,20 +1028,20 @@ struct +- 'UnresolvedRelation [explain_temp4], [], false == Analyzed Logical Plan == -InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@5c30f6b5, [key, val] +InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@x, [key, val] +- Project [key#x, val#x] +- SubqueryAlias spark_catalog.default.explain_temp4 +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet == Optimized Logical Plan == -InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@5c30f6b5, [key, val] +InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@x, [key, val] +- WriteFiles +- Sort [val#x ASC NULLS FIRST], false +- Project [key#x, empty2null(val#x) AS val#x] +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet == Physical Plan == -Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@5c30f6b5, [key, val] +Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@x, [key, val] +- WriteFiles +- *Sort [val#x ASC NULLS FIRST], false, 0 +- *Project [key#x, empty2null(val#x) AS val#x] From bfe25b2860178041553adf3f407403cbe9924f2e Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 16 Jan 2023 19:24:19 +0800 Subject: [PATCH 6/6] remove @hashCode --- .../test/resources/sql-tests/results/explain-aqe.sql.out | 6 +++--- .../src/test/resources/sql-tests/results/explain.sql.out | 6 +++--- .../scala/org/apache/spark/sql/SQLQueryTestHelper.scala | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out index fe604292c6f8..052a4576c204 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out @@ -1086,20 +1086,20 @@ struct +- 'UnresolvedRelation [explain_temp4], [], false == Analyzed Logical Plan == -InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@x, [key, val] +InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex, [key, val] +- Project [key#x, val#x] +- SubqueryAlias spark_catalog.default.explain_temp4 +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet == Optimized Logical Plan == -InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@x, [key, val] +InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex, [key, val] +- WriteFiles +- Sort [val#x ASC NULLS FIRST], false +- Project [key#x, empty2null(val#x) AS val#x] +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet == Physical Plan == -Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@x, [key, val] +Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex, [key, val] +- WriteFiles +- *Sort [val#x ASC NULLS FIRST], false, 0 +- *Project [key#x, empty2null(val#x) AS val#x] diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out index 8133f03035cc..9489fa977c52 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out @@ -1028,20 +1028,20 @@ struct +- 'UnresolvedRelation [explain_temp4], [], false == Analyzed Logical Plan == -InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@x, [key, val] +InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex, [key, val] +- Project [key#x, val#x] +- SubqueryAlias spark_catalog.default.explain_temp4 +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet == Optimized Logical Plan == -InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@x, [key, val] +InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex, [key, val] +- WriteFiles +- Sort [val#x ASC NULLS FIRST], false +- Project [key#x, empty2null(val#x) AS val#x] +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet == Physical Plan == -Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex@x, [key, val] +Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex, [key, val] +- WriteFiles +- *Sort [val#x ASC NULLS FIRST], false, 0 +- *Project [key#x, empty2null(val#x) AS val#x] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index 99b7447f2f3d..95cc26cedb87 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -47,7 +47,7 @@ trait SQLQueryTestHelper { .replaceAll("Last Access.*", s"Last Access $notIncludedMsg") .replaceAll("Partition Statistics\t\\d+", s"Partition Statistics\t$notIncludedMsg") .replaceAll("\\*\\(\\d+\\) ", "*") // remove the WholeStageCodegen codegenStageIds - .replaceAll("@[0-9a-z]+,", "@x,") // remove hashCode + .replaceAll("@[0-9a-z]+,", ",") // remove hashCode }