diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index fb43db5ec57d..ce4a9cae2d17 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2894,8 +2894,9 @@ object SQLConf { buildConf("spark.sql.addPartitionInBatch.size") .internal() .doc("The number of partitions to be handled in one turn when use " + - "`AlterTableAddPartitionCommand` to add partitions into table. The smaller " + - "batch size is, the less memory is required for the real handler, e.g. Hive Metastore.") + "`AlterTableAddPartitionCommand` or `RepairTableCommand` to add partitions into table. " + + "The smaller batch size is, the less memory is required for the real handler, e.g. " + + "Hive Metastore.") .version("3.0.0") .intConf .checkValue(_ > 0, "The value of spark.sql.addPartitionInBatch.size must be positive") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index c7456cd9d205..0876b5f05876 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -771,7 +771,7 @@ case class RepairTableCommand( // Hive metastore may not have enough memory to handle millions of partitions in single RPC, // we should split them into smaller batches. Since Hive client is not thread safe, we cannot // do this in parallel. - val batchSize = 100 + val batchSize = spark.conf.get(SQLConf.ADD_PARTITION_BATCH_SIZE) partitionSpecsAndLocs.toIterator.grouped(batchSize).foreach { batch => val now = MILLISECONDS.toSeconds(System.currentTimeMillis()) val parts = batch.map { case (spec, location) =>