diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md index 12b79828e44cb..39efdc3df3645 100644 --- a/docs/sql-performance-tuning.md +++ b/docs/sql-performance-tuning.md @@ -441,7 +441,7 @@ The following SQL properties enable Storage Partition Join in different join que Property NameDefaultMeaningSince Version spark.sql.sources.v2.bucketing.enabled - false + true When true, try to eliminate shuffle by using the partitioning reported by a compatible V2 data source. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ef1a6521eb913..b3c7da3685956 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1729,7 +1729,7 @@ object SQLConf { "avoid shuffle if necessary.") .version("3.3.0") .booleanConf - .createWithDefault(false) + .createWithDefault(true) val V2_BUCKETING_PUSH_PART_VALUES_ENABLED = buildConf("spark.sql.sources.v2.bucketing.pushPartValues.enabled") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala index 2c24cc7d570ba..1ed5ea4216a9f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala @@ -1217,7 +1217,8 @@ abstract class DynamicPartitionPruningSuiteBase test("SPARK-32509: Unused Dynamic Pruning filter shouldn't affect " + "canonicalization and exchange reuse") { withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.V2_BUCKETING_ENABLED.key -> "false") { val df = sql( """ WITH view1 as ( | SELECT f.store_id FROM fact_stats f WHERE f.units_sold = 70