More works: majorly split out updating session to individual physical…

… node * we will leverage such node for batch case if we want
apache · HeartSaVioR · Sep 4, 2018 · Sep 6, 2018 · Sep 6, 2018 · Sep 7, 2018
commit 7b57fe5d4fb1ca4a62ae9ed8d7851ffee6633fb9
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.aggregate
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.streaming.{SessionWindowStateStoreRestoreExec, SessionWindowStateStoreSaveExec, StateStoreRestoreExec, StateStoreSaveExec}
+import org.apache.spark.sql.execution.streaming._
 
 /**
  * Utility functions used by the query planner to convert our plan to new aggregation code path.
@@ -115,8 +115,6 @@ object AggUtils {
     finalAggregate :: Nil
   }
 
-  // FIXME: distinct in session makes sense?
-
   def planAggregateWithOneDistinct(
       groupingExpressions: Seq[NamedExpression],
       functionsWithDistinct: Seq[AggregateExpression],
@@ -348,7 +346,9 @@ object AggUtils {
    *  - Shuffle & Sort (distribution: keys "without" session, sort: all keys)
    *  - SessionWindowStateStoreRestore (group: keys "without" session)
    *    - merge input tuples with stored tuples (sessions) respecting sort order
+   *  - UpdatingSessionExec
    *    - calculate session among tuples, and update all tuples to get correct session range
+   *    - NOTE: it leverages the fact that the output of SessionWindowStateStoreRestore is sorted
    *  - PartialMerge (group: all keys)
    *    - now there is at most 1 tuple per group
    *  - SessionWindowStateStoreSave (group: keys "without" session)
@@ -389,6 +389,10 @@ object AggUtils {
     val restored = SessionWindowStateStoreRestoreExec(groupingWithoutSessionAttributes,
       sessionExpression.toAttribute, stateInfo = None, eventTimeWatermark = None, partialAggregate)
 
+    val updatedSession = UpdatingSessionExec(groupingWithoutSessionAttributes,
+      sessionExpression.toAttribute, optRequiredChildDistribution = None,
+      optRequiredChildOrdering = None, restored)
+
     val partialMerged: SparkPlan = {
       val aggregateExpressions = functionsWithoutDistinct.map(_.copy(mode = PartialMerge))
       val aggregateAttributes = aggregateExpressions.map(_.resultAttribute)
@@ -401,7 +405,7 @@ object AggUtils {
         initialInputBufferOffset = groupingAttributes.length,
         resultExpressions = groupingAttributes ++
             aggregateExpressions.flatMap(_.aggregateFunction.inputAggBufferAttributes),
-        child = restored)
+        child = updatedSession)
     }
 
     // Note: stateId and returnAllStates are filled in later with preparation rules

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/UpdatingSessionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/UpdatingSessionExec.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.aggregate
+
+import org.apache.spark.TaskContext
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.catalyst.plans.physical.{Distribution, Partitioning}
+import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.streaming.UpdatingSessionIterator
+
+// FIXME: javadoc should provide precondition that input must be sorted
+// or both required child distribution as well as required child ordering should be presented
+// to guarantee input will be sorted
+case class UpdatingSessionExec(
+    keyExpressions: Seq[Attribute],
+    sessionExpression: Attribute,
+    optRequiredChildDistribution: Option[Seq[Distribution]],
+    optRequiredChildOrdering: Option[Seq[Seq[SortOrder]]],
+    child: SparkPlan) extends UnaryExecNode {
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    child.execute().mapPartitions { iter =>
+      val newIter = new UpdatingSessionIterator(iter, keyExpressions, sessionExpression,
+        child.output)
+
+      val debugIter = newIter.map { row =>
+        val keysProjection = GenerateUnsafeProjection.generate(keyExpressions, child.output)
+        val sessionProjection = GenerateUnsafeProjection.generate(
+          Seq(sessionExpression), child.output)
+        val rowProjection = GenerateUnsafeProjection.generate(child.output, child.output)
+
+        // FIXME: remove
+        val debugPartitionId = TaskContext.get().partitionId()
+
+        logWarning(s"DEBUG: partitionId $debugPartitionId - updated session row - keys ${keysProjection(row)}")
+        logWarning(s"DEBUG: partitionId $debugPartitionId - updated session row - session ${sessionProjection(row)}")
+        logWarning(s"DEBUG: partitionId $debugPartitionId - updated session row - row (proj) ${rowProjection(row)}")
+        logWarning(s"DEBUG: partitionId $debugPartitionId - updated session row - row ${row}")
+
+        row
+      }
+
+      debugIter
+    }
+  }
+
+  override def output: Seq[Attribute] = child.output
+
+  override def outputPartitioning: Partitioning = child.outputPartitioning
+
+  override def requiredChildDistribution: Seq[Distribution] = optRequiredChildDistribution match {
+    case Some(distribution) => distribution
+    case None => super.requiredChildDistribution
+  }
+
+  override def requiredChildOrdering: Seq[Seq[SortOrder]] = optRequiredChildOrdering match {
+    case Some(ordering) => ordering
+    case None => super.requiredChildOrdering
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -118,7 +118,7 @@ class IncrementalExecution(
               Some(aggStateInfo),
               stateFormatVersion,
               child) :: Nil))
-/*
+
       case SessionWindowStateStoreSaveExec(keys, session, None, None, None,
              UnaryExecNode(agg,
                UnaryExecNode(agg2,
@@ -138,25 +138,6 @@ class IncrementalExecution(
                 Some(aggStateInfo),
                 Some(offsetSeqMetadata.batchWatermarkMs),
                 child) :: Nil) :: Nil))
-                */
-
-      case SessionWindowStateStoreSaveExec(keys, session, None, None, None,
-             UnaryExecNode(agg,
-                 SessionWindowStateStoreRestoreExec(_, _, None, None, child))) =>
-        val aggStateInfo = nextStatefulOperationStateInfo
-        SessionWindowStateStoreSaveExec(
-          keys,
-          session,
-          Some(aggStateInfo),
-          Some(outputMode),
-          Some(offsetSeqMetadata.batchWatermarkMs),
-          agg.withNewChildren(
-              SessionWindowStateStoreRestoreExec(
-                keys,
-                session,
-                Some(aggStateInfo),
-                Some(offsetSeqMetadata.batchWatermarkMs),
-                child) :: Nil))
 
       case StreamingDeduplicateExec(keys, child, None, None) =>
         StreamingDeduplicateExec(

diff --git a/...ore/src/main/scala/org/apache/spark/sql/execution/streaming/UpdatingSessionIterator.scala b/...ore/src/main/scala/org/apache/spark/sql/execution/streaming/UpdatingSessionIterator.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.streaming
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, BindReferences, CreateNamedStruct, Expression, Literal, PreciseTimestampConversion, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.types.{LongType, TimestampType}
 
@@ -47,7 +47,7 @@ class UpdatingSessionIterator(
 
   val processedKeys: mutable.HashSet[InternalRow] = new mutable.HashSet[InternalRow]()
 
-  // FIXME: data loss seen... one data from input and one data from state
+  // FIXME: check whether it can be run with such situation: empty groupWithoutSessionExpressions
 
   override def hasNext: Boolean = {
     assertIteratorNotCorrupted()
@@ -191,8 +191,6 @@ class UpdatingSessionIterator(
       returnRowsIter = returnRows.iterator
     }
 
-    //returnRowsIter = returnRows.iterator
-
     // FIXME: DEBUG
     val (rIter, tmpReturnRowsIter) = returnRowsIter.duplicate
     returnRowsIter = rIter

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -482,7 +482,10 @@ case class SessionWindowStateStoreRestoreExec(
       }
 
       val mergedIter = new MergingSortWithMultiValuesStateIterator(filteredIterator, stateManager,
-        keyExpressions, sessionExpression, watermarkPredicateForData, child.output)
+        keyExpressions, sessionExpression, watermarkPredicateForData, child.output).map { row =>
+        numOutputRows += 1
+        row
+      }
 
       val debugMergedIter = mergedIter.map { row =>
         val keysProjection = GenerateUnsafeProjection.generate(keyExpressions, child.output)
@@ -498,22 +501,7 @@ case class SessionWindowStateStoreRestoreExec(
         row
       }
 
-      new UpdatingSessionIterator(debugMergedIter, keyExpressions, sessionExpression,
-        child.output).map { row =>
-        numOutputRows += 1
-
-        val keysProjection = GenerateUnsafeProjection.generate(keyExpressions, child.output)
-        val sessionProjection = GenerateUnsafeProjection.generate(
-          Seq(sessionExpression), child.output)
-        val rowProjection = GenerateUnsafeProjection.generate(child.output, child.output)
-
-        logWarning(s"DEBUG: partitionId $debugPartitionId - updated session row - keys ${keysProjection(row)}")
-        logWarning(s"DEBUG: partitionId $debugPartitionId - updated session row - session ${sessionProjection(row)}")
-        logWarning(s"DEBUG: partitionId $debugPartitionId - updated session row - row (proj) ${rowProjection(row)}")
-        logWarning(s"DEBUG: partitionId $debugPartitionId - updated session row - row ${row}")
-
-        row
-      }
+      debugMergedIter
     }
   }
 

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.{AnalysisException, Dataset}
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.functions.{count, window, session}
+import org.apache.spark.sql.functions.{count, session, window}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode._
 import org.apache.spark.util.Utils