-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-15915][SQL] Logical plans should use canonicalized plan when override sameResult. #13638
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
9eb4c25
52641f4
11dc433
3d27607
0c9ec86
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,7 +87,7 @@ private[sql] class CacheManager extends Logging { | |
| query: Dataset[_], | ||
| tableName: Option[String] = None, | ||
| storageLevel: StorageLevel = MEMORY_AND_DISK): Unit = writeLock { | ||
| val planToCache = query.queryExecution.analyzed | ||
| val planToCache = query.queryExecution.analyzed.canonicalized | ||
| if (lookupCachedData(planToCache).nonEmpty) { | ||
| logWarning("Asked to cache already cached data.") | ||
| } else { | ||
|
|
@@ -106,7 +106,7 @@ private[sql] class CacheManager extends Logging { | |
|
|
||
| /** Removes the data for the given [[Dataset]] from the cache */ | ||
| private[sql] def uncacheQuery(query: Dataset[_], blocking: Boolean = true): Unit = writeLock { | ||
| val planToCache = query.queryExecution.analyzed | ||
| val planToCache = query.queryExecution.analyzed.canonicalized | ||
| val dataIndex = cachedData.indexWhere(cd => planToCache.sameResult(cd.plan)) | ||
| require(dataIndex >= 0, s"Table $query is not cached.") | ||
| cachedData(dataIndex).cachedRepresentation.cachedColumnBuffers.unpersist(blocking) | ||
|
|
@@ -120,7 +120,7 @@ private[sql] class CacheManager extends Logging { | |
| private[sql] def tryUncacheQuery( | ||
| query: Dataset[_], | ||
| blocking: Boolean = true): Boolean = writeLock { | ||
| val planToCache = query.queryExecution.analyzed | ||
| val planToCache = query.queryExecution.analyzed.canonicalized | ||
| val dataIndex = cachedData.indexWhere(cd => planToCache.sameResult(cd.plan)) | ||
| val found = dataIndex >= 0 | ||
| if (found) { | ||
|
|
@@ -137,7 +137,8 @@ private[sql] class CacheManager extends Logging { | |
|
|
||
| /** Optionally returns cached data for the given [[LogicalPlan]]. */ | ||
| private[sql] def lookupCachedData(plan: LogicalPlan): Option[CachedData] = readLock { | ||
| cachedData.find(cd => plan.sameResult(cd.plan)) | ||
| val canonicalized = plan.canonicalized | ||
| cachedData.find(cd => canonicalized.sameResult(cd.plan)) | ||
| } | ||
|
|
||
| /** Replaces segments of the given logical plan with cached versions where possible. */ | ||
|
|
@@ -155,8 +156,9 @@ private[sql] class CacheManager extends Logging { | |
| * function will over invalidate. | ||
| */ | ||
| private[sql] def invalidateCache(plan: LogicalPlan): Unit = writeLock { | ||
| val canonicalized = plan.canonicalized | ||
| cachedData.foreach { | ||
| case data if data.plan.collect { case p if p.sameResult(plan) => p }.nonEmpty => | ||
| case data if data.plan.collect { case p if p.sameResult(canonicalized) => p }.nonEmpty => | ||
|
||
| data.cachedRepresentation.recache() | ||
| case _ => | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we still need these changes?
LogicalPlan.canonicalizedis a lazy val and we don't have performance penalty even we use un-canonicalized plan as key.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems we don't need them for now.
I'll revert the changes.