-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-21765] Set isStreaming on leaf nodes for streaming plans. #18973
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
488a408
8dfe40e
dcdcf52
2ec6154
f289d88
b760644
28c2f4b
ac7d785
e55abe6
c837069
09352ba
3f11fac
8857cf5
fd725bb
6a048bb
8fd9053
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -429,9 +429,10 @@ case class Sort( | |
|
|
||
| /** Factory for constructing new `Range` nodes. */ | ||
| object Range { | ||
| def apply(start: Long, end: Long, step: Long, numSlices: Option[Int]): Range = { | ||
| def apply(start: Long, end: Long, step: Long, | ||
| numSlices: Option[Int], isStreaming: Boolean = false): Range = { | ||
| val output = StructType(StructField("id", LongType, nullable = false) :: Nil).toAttributes | ||
| new Range(start, end, step, numSlices, output) | ||
| new Range(start, end, step, numSlices, output, isStreaming) | ||
| } | ||
| def apply(start: Long, end: Long, step: Long, numSlices: Int): Range = { | ||
| Range(start, end, step, Some(numSlices)) | ||
|
|
@@ -443,7 +444,8 @@ case class Range( | |
| end: Long, | ||
| step: Long, | ||
| numSlices: Option[Int], | ||
| output: Seq[Attribute]) | ||
| output: Seq[Attribute], | ||
| override val isStreaming: Boolean) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how can a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think there's necessarily a reason it shouldn't be able to; streaming sources are free to define getBatch() however they'd like. Right now the only source actually doing that is a fake source in StreamSuite. |
||
| extends LeafNode with MultiInstanceRelation { | ||
|
|
||
| require(step != 0, s"step ($step) cannot be 0") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -420,8 +420,10 @@ class SQLContext private[sql](val sparkSession: SparkSession) | |
| * converted to Catalyst rows. | ||
| */ | ||
| private[sql] | ||
| def internalCreateDataFrame(catalystRows: RDD[InternalRow], schema: StructType) = { | ||
| sparkSession.internalCreateDataFrame(catalystRows, schema) | ||
| def internalCreateDataFrame(catalystRows: RDD[InternalRow], | ||
|
||
| schema: StructType, | ||
| isStreaming: Boolean = false) = { | ||
| sparkSession.internalCreateDataFrame(catalystRows, schema, isStreaming) | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -564,10 +564,14 @@ class SparkSession private( | |
| */ | ||
| private[sql] def internalCreateDataFrame( | ||
| catalystRows: RDD[InternalRow], | ||
| schema: StructType): DataFrame = { | ||
| schema: StructType, | ||
| isStreaming: Boolean = false): DataFrame = { | ||
| // TODO: use MutableProjection when rowRDD is another DataFrame and the applied | ||
| // schema differs from the existing schema on any field data type. | ||
| val logicalPlan = LogicalRDD(schema.toAttributes, catalystRows)(self) | ||
| val logicalPlan = LogicalRDD( | ||
| schema.toAttributes, | ||
| catalystRows, | ||
| isStreaming = isStreaming)(self) | ||
|
||
| Dataset.ofRows(self, logicalPlan) | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add docs to explain what
isStreamingis?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. (I think this is a correct summary?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Make sure this is same as the updated isStreaming docs (see my other comments)