-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-47081][CONNECT] Support Query Execution Progress #45150
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
084d257
234b927
f78519e
962dfd4
4947e79
228717f
36d7924
dfb29e4
be08f53
be7c445
1b1a61a
aa924c0
7cedd98
e2063f2
84425c3
50e4cbd
677e70b
71033d0
5687f6c
2d75941
30560d0
453bda9
cc864c9
ad4791e
b662410
85caee5
ac91982
deffbbc
415bdd8
6fcc36f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,191 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.connect.execution | ||
|
|
||
| import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger} | ||
|
|
||
| import org.apache.spark.connect.proto.ExecutePlanResponse | ||
| import org.apache.spark.internal.Logging | ||
| import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart} | ||
|
|
||
| /** | ||
| * A listener that tracks the execution of jobs and stages for a given set of tags. This is used | ||
| * to track the progress of a job that is being executed through the connect API. | ||
| * | ||
| * The listener is instantiated once for the SparkConnectService and then used to track all the | ||
| * current query executions. | ||
| */ | ||
| private[connect] class ConnectProgressExecutionListener extends SparkListener with Logging { | ||
grundprinzip marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| /** | ||
| * A tracker for a given tag. This is used to track the progress of an operation is being | ||
| * executed through the connect API. | ||
| */ | ||
| class ExecutionTracker(val tag: String) { | ||
|
|
||
| class StageInfo( | ||
| val stageId: Int, | ||
| var numTasks: Int, | ||
| var completedTasks: Int = 0, | ||
| var inputBytesRead: Long = 0, | ||
| var completed: Boolean = false) { | ||
|
|
||
| val lock = new Object | ||
| def update(i: StageInfo => Unit): Unit = { | ||
| lock.synchronized { | ||
| i(this) | ||
| } | ||
| } | ||
|
|
||
| def toProto(): ExecutePlanResponse.ExecutionProgress.StageInfo = { | ||
| ExecutePlanResponse.ExecutionProgress.StageInfo | ||
| .newBuilder() | ||
| .setStageId(stageId) | ||
| .setNumTasks(numTasks) | ||
| .setNumCompletedTasks(completedTasks) | ||
| .setInputBytesRead(inputBytesRead) | ||
| .setDone(completed) | ||
| .build() | ||
| } | ||
| } | ||
|
|
||
| // The set of jobs that are being tracked by this tracker. We always only add to this list | ||
| // but never remove. This is to avoid concurrency issues. | ||
| private[ConnectProgressExecutionListener] var jobs: Set[Int] = Set() | ||
grundprinzip marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // The set of stages that are being tracked by this tracker. We always only add to this list | ||
| // but never remove. This is to avoid concurrency issues. | ||
| private[ConnectProgressExecutionListener] var stages: Map[Int, StageInfo] = Map.empty | ||
| // The tracker is marked as dirty if it has new progress to report. | ||
| private[ConnectProgressExecutionListener] val dirty = new AtomicBoolean(false) | ||
| // Tracks all currently running tasks for a particular tracker. | ||
| private[ConnectProgressExecutionListener] val inFlightTasks = new AtomicInteger(0) | ||
|
|
||
| /** | ||
| * Yield the current state of the tracker if it is dirty. A consumer of the tracker can | ||
| * provide a callback that will be called with the current state of the tracker if the tracker | ||
| * has new progress to report. | ||
| * | ||
| * If the tracker was marked as dirty, the state is reset after. | ||
| */ | ||
| def yieldWhenDirty(thunk: (Seq[StageInfo], Long) => Unit): Unit = { | ||
| if (dirty.get()) { | ||
| thunk(stages.values.toSeq, inFlightTasks.get()) | ||
| dirty.set(false) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Add a job to the tracker. This will add the job to the list of jobs that are being tracked | ||
| */ | ||
| def addJob(job: SparkListenerJobStart): Unit = synchronized { | ||
| jobs = jobs + job.jobId | ||
| job.stageInfos.foreach { stage => | ||
| stages = stages + (stage.stageId -> new StageInfo(stage.stageId, stage.numTasks)) | ||
| } | ||
| dirty.set(true) | ||
| } | ||
|
|
||
| def jobCount(): Int = { | ||
| jobs.size | ||
| } | ||
|
|
||
| def stageCount(): Int = { | ||
| stages.size | ||
| } | ||
| } | ||
|
|
||
| val trackedTags = collection.concurrent.TrieMap[String, ExecutionTracker]() | ||
|
|
||
| override def onJobStart(jobStart: SparkListenerJobStart): Unit = { | ||
| val tags = jobStart.properties.getProperty("spark.job.tags") | ||
| if (tags != null) { | ||
| val thisJobTags = tags.split(",").map(_.trim).toSet | ||
| thisJobTags.foreach { tag => | ||
| trackedTags.get(tag).foreach { tracker => | ||
| tracker.addJob(jobStart) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = { | ||
| // Check if the task belongs to a job that we are tracking. | ||
| trackedTags.foreach({ case (_, tracker) => | ||
| if (tracker.stages.contains(taskStart.stageId)) { | ||
| tracker.inFlightTasks.incrementAndGet() | ||
| tracker.dirty.set(true) | ||
| } | ||
| }) | ||
| } | ||
|
|
||
| override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { | ||
| // Check if the task belongs to a job that we are tracking. | ||
| trackedTags.foreach({ case (_, tracker) => | ||
| if (tracker.stages.contains(taskEnd.stageId)) { | ||
| tracker.stages.get(taskEnd.stageId).foreach { stage => | ||
| stage.update { i => | ||
| i.completedTasks += 1 | ||
| i.inputBytesRead += taskEnd.taskMetrics.inputMetrics.bytesRead | ||
| } | ||
| } | ||
| // This should never become negative, simply reset to zero if it does. | ||
| tracker.inFlightTasks.decrementAndGet() | ||
| if (tracker.inFlightTasks.get() < 0) { | ||
| tracker.inFlightTasks.set(0) | ||
| } | ||
| tracker.dirty.set(true) | ||
| } | ||
| }) | ||
| } | ||
|
|
||
| override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = { | ||
| trackedTags.foreach({ case (_, tracker) => | ||
| if (tracker.stages.contains(stageCompleted.stageInfo.stageId)) { | ||
| tracker.stages(stageCompleted.stageInfo.stageId).update { stage => | ||
| stage.completed = true | ||
| } | ||
| tracker.dirty.set(true) | ||
| } | ||
| }) | ||
| } | ||
|
|
||
| override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { | ||
grundprinzip marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| trackedTags.foreach({ case (_, tracker) => | ||
| if (tracker.jobs.contains(jobEnd.jobId)) { | ||
| tracker.dirty.set(true) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we set the dirty flag when nothing is updated?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is mostly to make sure that all progress is reported and an update is sent to the client. If you're tracking time between progress messages, every message itself is progress. |
||
| } | ||
| }) | ||
| } | ||
|
|
||
| def tryGetTracker(tag: String): Option[ExecutionTracker] = { | ||
| trackedTags.get(tag) | ||
| } | ||
|
|
||
| def registerJobTag(tag: String): Unit = { | ||
| trackedTags += tag -> new ExecutionTracker(tag) | ||
| } | ||
|
|
||
| def removeJobTag(tag: String): Unit = { | ||
| trackedTags -= tag | ||
| } | ||
|
|
||
| def clearJobTags(): Unit = { | ||
| trackedTags.clear() | ||
| } | ||
|
|
||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.