tests minus purging

ericm-db · ericm-db · Jul 8, 2024 · Jul 8, 2024 · Jul 8, 2024 · Jul 9, 2024
commit cbbd47f01168fd789e02fda718f3c310074beb1d
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -40,7 +40,6 @@ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
 import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, ReadLimit, SparkDataStream}
 import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsTruncate, Write}
-import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.execution.streaming.sources.ForeachBatchUserFuncException
 import org.apache.spark.sql.internal.SQLConf
@@ -240,23 +239,6 @@ abstract class StreamExecution(
    */
   val commitLog = new CommitLog(sparkSession, checkpointFile("commits"))
 
-
-  lazy val operatorStateMetadataLogs: Map[Long, OperatorStateMetadataLog] = {
-    populateOperatorStateMetadatas(getLatestExecutionContext().executionPlan.executedPlan)
-  }
-
-  private def populateOperatorStateMetadatas(
-      plan: SparkPlan): Map[Long, OperatorStateMetadataLog] = {
-    plan.flatMap {
-      case s: StateStoreWriter => s.stateInfo.map { info =>
-        val metadataPath = s.metadataFilePath()
-        info.operatorId -> new OperatorStateMetadataLog(sparkSession,
-          metadataPath.toString)
-      }
-      case _ => Seq.empty
-    }.toMap
-  }
-
   /** Whether all fields of the query have been initialized */
   private def isInitialized: Boolean = state.get != INITIALIZING
 

diff --git a/...core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala b/...core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala
@@ -440,20 +440,6 @@ case class TransformWithStateExec(
     OperatorStateMetadataV2(operatorInfo, stateStoreInfo, json)
   }
 
-  private def stateSchemaFilePath(storeName: Option[String] = None): Path = {
-    def stateInfo = getStateInfo
-    val stateCheckpointPath =
-      new Path(getStateInfo.checkpointLocation,
-        s"${stateInfo.operatorId.toString}")
-    storeName match {
-      case Some(storeName) =>
-        val storeNamePath = new Path(stateCheckpointPath, storeName)
-        new Path(new Path(storeNamePath, "_metadata"), "schema")
-      case None =>
-        new Path(new Path(stateCheckpointPath, "_metadata"), "schema")
-    }
-  }
-
   override protected def doExecute(): RDD[InternalRow] = {
     metrics // force lazy init at driver
 

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -165,6 +165,20 @@ trait StateStoreWriter extends StatefulOperator with PythonSQLMetrics { self: Sp
       "number of state store instances")
   ) ++ stateStoreCustomMetrics ++ pythonMetrics
 
+  def stateSchemaFilePath(storeName: Option[String] = None): Path = {
+    def stateInfo = getStateInfo
+    val stateCheckpointPath =
+      new Path(getStateInfo.checkpointLocation,
+        s"${stateInfo.operatorId.toString}")
+    storeName match {
+      case Some(storeName) =>
+        val storeNamePath = new Path(stateCheckpointPath, storeName)
+        new Path(new Path(storeNamePath, "_metadata"), "schema")
+      case None =>
+        new Path(new Path(stateCheckpointPath, "_metadata"), "schema")
+    }
+  }
+
   /**
    * Get the progress made by this stateful operator after execution. This should be called in
    * the driver after this SparkPlan has been executed and metrics have been updated.