-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-44422][CONNECT] Spark Connect fine grained interrupt #42009
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
16d126f
6d073cc
380a373
4d9ad53
6095e6b
65e9cf5
ae6cbaf
b213c00
a297d45
e6bdcab
32126e7
2e32219
608c792
5112ccf
3655ebf
6024e24
63f7a05
652091f
8198654
e4047e3
b8a4ac7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,6 +40,7 @@ private[sql] class SparkResult[T]( | |
| extends AutoCloseable | ||
| with Cleanable { self => | ||
|
|
||
| private[this] var opId: String = null | ||
|
||
| private[this] var numRecords: Int = 0 | ||
| private[this] var structType: StructType = _ | ||
| private[this] var arrowSchema: pojo.Schema = _ | ||
|
|
@@ -79,6 +80,19 @@ private[sql] class SparkResult[T]( | |
| var stop = false | ||
| while (!stop && responses.hasNext) { | ||
| val response = responses.next() | ||
|
|
||
| // Save and validate operationId | ||
| if (opId == null) { | ||
| opId = response.getOperationId | ||
| } | ||
| if (opId != response.getOperationId) { | ||
| // backwards compatibility: | ||
| // response from an old server without operationId field would have getOperationId == "". | ||
| throw new IllegalStateException( | ||
| "Received response with wrong operationId. " + | ||
| s"Expected '$opId' but received '${response.getOperationId}'.") | ||
| } | ||
|
|
||
| if (response.hasSchema) { | ||
| // The original schema should arrive before ArrowBatches. | ||
| structType = | ||
|
|
@@ -148,6 +162,15 @@ private[sql] class SparkResult[T]( | |
| structType | ||
| } | ||
|
|
||
| /** | ||
| * @return | ||
| * the operationId of the result. | ||
| */ | ||
| def operationId: String = { | ||
| processResponses(stopOnFirstNonEmptyResponse = true) | ||
| opId | ||
| } | ||
|
|
||
| /** | ||
| * Create an Array with the contents of the result. | ||
| */ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,9 +19,9 @@ package org.apache.spark.sql.connect.service | |
|
|
||
| import scala.collection.JavaConverters._ | ||
|
|
||
| import org.apache.spark.SparkContext | ||
| import org.apache.spark.connect.proto | ||
| import org.apache.spark.internal.Logging | ||
| import org.apache.spark.sql.connect.common.ProtoUtils | ||
| import org.apache.spark.sql.connect.execution.{ExecuteGrpcResponseSender, ExecuteResponseObserver, ExecuteThreadRunner} | ||
| import org.apache.spark.util.SystemClock | ||
|
|
||
|
|
@@ -40,15 +40,20 @@ private[connect] class ExecuteHolder( | |
| s"Session_${sessionHolder.sessionId}_" + | ||
| s"Request_${operationId}" | ||
|
|
||
| val userDefinedTags: Seq[String] = request.getTagsList().asScala.toSeq.map { tag => | ||
| throwIfInvalidTag(tag) | ||
| tag | ||
| } | ||
| val userDefinedTags: Set[String] = request | ||
| .getTagsList() | ||
| .asScala | ||
| .toSeq | ||
| .map { tag => | ||
| ProtoUtils.throwIfInvalidTag(tag) | ||
| tag | ||
| } | ||
| .toSet | ||
|
|
||
| val session = sessionHolder.session | ||
|
|
||
| val responseObserver: ExecuteResponseObserver[proto.ExecutePlanResponse] = | ||
| new ExecuteResponseObserver[proto.ExecutePlanResponse]() | ||
| new ExecuteResponseObserver[proto.ExecutePlanResponse](this) | ||
|
|
||
| val eventsManager: ExecuteEventsManager = ExecuteEventsManager(this, new SystemClock()) | ||
|
|
||
|
|
@@ -98,23 +103,12 @@ private[connect] class ExecuteHolder( | |
| runner.interrupt() | ||
| } | ||
|
|
||
| /** | ||
| * Spark Connect tags are also added as SparkContext job tags, but to make the tag unique, they | ||
| * need to be combined with userId and sessionId. | ||
| */ | ||
| def tagToSparkJobTag(tag: String): String = { | ||
juliuszsompolski marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @juliuszsompolski input
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for spotting! to #42120 ?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure |
||
| "SparkConnectUserDefinedTag_" + | ||
| s"User_${sessionHolder.userId}_Session_${sessionHolder.sessionId}" | ||
| } | ||
|
|
||
| private def throwIfInvalidTag(tag: String) = { | ||
| // Same format rules apply to Spark Connect execution tags as to SparkContext job tags. | ||
| // see SparkContext.throwIfInvalidTag. | ||
| if (tag == null) { | ||
| throw new IllegalArgumentException("Spark Connect execution tag cannot be null.") | ||
| } | ||
| if (tag.contains(SparkContext.SPARK_JOB_TAGS_SEP)) { | ||
| throw new IllegalArgumentException( | ||
| s"Spark Connect execution tag cannot contain '${SparkContext.SPARK_JOB_TAGS_SEP}'.") | ||
| } | ||
| if (tag.isEmpty) { | ||
| throw new IllegalArgumentException("Spark Connect execution tag cannot be an empty string.") | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This has actually been fixed by #41315
(now the execution is in different thread, and the interrupt interrupts that thread, not only Spark Jobs.