-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-47764][CORE][SQL] Cleanup shuffle dependencies based on ShuffleCleanupMode #45930
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2cc5a8b
2feb4c2
e30ffa8
50134ed
e803026
f6ab96f
d0f33d6
42d172e
b6283cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,14 +20,16 @@ package org.apache.spark.sql.execution | |
| import java.util.concurrent.{ConcurrentHashMap, ExecutorService, Future => JFuture} | ||
| import java.util.concurrent.atomic.AtomicLong | ||
|
|
||
| import scala.jdk.CollectionConverters._ | ||
| import scala.util.control.NonFatal | ||
|
|
||
| import org.apache.spark.{ErrorMessageFormat, JobArtifactSet, SparkException, SparkThrowable, SparkThrowableHelper} | ||
| import org.apache.spark.{ErrorMessageFormat, JobArtifactSet, SparkEnv, SparkException, SparkThrowable, SparkThrowableHelper} | ||
| import org.apache.spark.SparkContext.{SPARK_JOB_DESCRIPTION, SPARK_JOB_INTERRUPT_ON_CANCEL} | ||
| import org.apache.spark.internal.Logging | ||
| import org.apache.spark.internal.config.{SPARK_DRIVER_PREFIX, SPARK_EXECUTOR_PREFIX} | ||
| import org.apache.spark.internal.config.Tests.IS_TESTING | ||
| import org.apache.spark.sql.SparkSession | ||
| import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec | ||
| import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart} | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.internal.StaticSQLConf.SQL_EVENT_TRUNCATE_LENGTH | ||
|
|
@@ -115,6 +117,7 @@ object SQLExecution extends Logging { | |
|
|
||
| withSQLConfPropagated(sparkSession) { | ||
| var ex: Option[Throwable] = None | ||
| var isExecutedPlanAvailable = false | ||
| val startTime = System.nanoTime() | ||
| val startEvent = SparkListenerSQLExecutionStart( | ||
| executionId = executionId, | ||
|
|
@@ -147,6 +150,7 @@ object SQLExecution extends Logging { | |
| } | ||
| sc.listenerBus.post( | ||
| startEvent.copy(physicalPlanDescription = planDesc, sparkPlanInfo = planInfo)) | ||
| isExecutedPlanAvailable = true | ||
| f() | ||
| } | ||
| } catch { | ||
|
|
@@ -161,6 +165,24 @@ object SQLExecution extends Logging { | |
| case e => | ||
| Utils.exceptionString(e) | ||
| } | ||
| if (queryExecution.shuffleCleanupMode != DoNotCleanup | ||
| && isExecutedPlanAvailable) { | ||
| val shuffleIds = queryExecution.executedPlan match { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems the root node can be a command. Shall we collect all the AdaptiveSparkPlanExec inside the plan ?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh this is a good catch! I think we should. cc @bozhang2820
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could be wrong but I thought
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally we should clean up shuffles for CTAS and INSERT as well, as they also run queries. |
||
| case ae: AdaptiveSparkPlanExec => | ||
| ae.context.shuffleIds.asScala.keys | ||
| case _ => | ||
| Iterable.empty | ||
| } | ||
| shuffleIds.foreach { shuffleId => | ||
| queryExecution.shuffleCleanupMode match { | ||
| case RemoveShuffleFiles => | ||
| SparkEnv.get.shuffleManager.unregisterShuffle(shuffleId) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shall we call
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for catching this! Will fix this in a follow-up asap.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Created #46302. |
||
| case SkipMigration => | ||
| SparkEnv.get.blockManager.migratableResolver.addShuffleToSkip(shuffleId) | ||
| case _ => // this should not happen | ||
| } | ||
| } | ||
| } | ||
| val event = SparkListenerSQLExecutionEnd( | ||
| executionId, | ||
| System.currentTimeMillis(), | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if the value does not matter, shall we just use
Objecttype and always pass null?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately Guava cache won't accept null values...