apache
diff --git a/‎connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala‎
Lines changed: 3 additions & 1 deletion b/‎connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala‎
Lines changed: 70 additions & 6 deletions b/‎connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala‎
Lines changed: 70 additions & 6 deletions
diff --git a/‎connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala‎
Lines changed: 7 additions & 0 deletions b/‎connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala‎
Lines changed: 2 additions & 1 deletion b/‎connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala‎
Lines changed: 6 additions & 1 deletion b/‎connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/ArtifactSuite.scala‎
Lines changed: 1 addition & 1 deletion b/‎connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/ArtifactSuite.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala‎
Lines changed: 0 additions & 1 deletion b/‎connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/ConnectFunSuite.scala‎
Lines changed: 1 addition & 2 deletions b/‎connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/ConnectFunSuite.scala‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎connector/connect/common/src/main/protobuf/spark/connect/base.proto‎
Lines changed: 1 addition & 14 deletions b/‎connector/connect/common/src/main/protobuf/spark/connect/base.proto‎
Lines changed: 1 addition & 14 deletions
diff --git a/‎connector/connect/common/src/main/protobuf/spark/connect/common.proto‎
Lines changed: 37 additions & 0 deletions b/‎connector/connect/common/src/main/protobuf/spark/connect/common.proto‎
Lines changed: 37 additions & 0 deletions
@@ -458,7 +458,9 @@ class DataFrameReader private[sql] (sparkSession: SparkSession) extends Logging
    */
   def table(tableName: String): DataFrame = {
     sparkSession.newDataFrame { builder =>
-      builder.getReadBuilder.getNamedTableBuilder.setUnparsedIdentifier(tableName)
+      builder.getReadBuilder.getNamedTableBuilder
+        .setUnparsedIdentifier(tableName)
+        .putAllOptions(extraOptions.toMap.asJava)
     }
   }
 
 
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{PrimitiveLongEncoder, ProductEncoder, StringEncoder, UnboundRowEncoder}
 import org.apache.spark.sql.catalyst.expressions.RowOrdering
 import org.apache.spark.sql.connect.client.SparkResult
-import org.apache.spark.sql.connect.common.DataTypeProtoConverter
+import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, StorageLevelProtoConverter}
 import org.apache.spark.sql.functions.{struct, to_json}
 import org.apache.spark.sql.types.{Metadata, StructType}
 import org.apache.spark.storage.StorageLevel
@@ -2771,22 +2771,86 @@ class Dataset[T] private[sql] (
     new DataFrameWriterV2[T](table, this)
   }
 
+  /**
+   * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`).
+   *
+   * @group basic
+   * @since 3.4.0
+   */
   def persist(): this.type = {
-    throw new UnsupportedOperationException("persist is not implemented.")
+    sparkSession.analyze { builder =>
+      builder.getPersistBuilder.setRelation(plan.getRoot)
+    }
+    this
   }
 
+  /**
+   * Persist this Dataset with the given storage level.
+   *
+   * @param newLevel
+   *   One of: `MEMORY_ONLY`, `MEMORY_AND_DISK`, `MEMORY_ONLY_SER`, `MEMORY_AND_DISK_SER`,
+   *   `DISK_ONLY`, `MEMORY_ONLY_2`, `MEMORY_AND_DISK_2`, etc.
+   * @group basic
+   * @since 3.4.0
+   */
   def persist(newLevel: StorageLevel): this.type = {
-    throw new UnsupportedOperationException("persist is not implemented.")
+    sparkSession.analyze { builder =>
+      builder.getPersistBuilder
+        .setRelation(plan.getRoot)
+        .setStorageLevel(StorageLevelProtoConverter.toConnectProtoType(newLevel))
+    }
+    this
   }
 
+  /**
+   * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk. This
+   * will not un-persist any cached data that is built upon this Dataset.
+   *
+   * @param blocking
+   *   Whether to block until all blocks are deleted.
+   * @group basic
+   * @since 3.4.0
+   */
   def unpersist(blocking: Boolean): this.type = {
-    throw new UnsupportedOperationException("unpersist() is not implemented.")
+    sparkSession.analyze { builder =>
+      builder.getUnpersistBuilder
+        .setRelation(plan.getRoot)
+        .setBlocking(blocking)
+    }
+    this
   }
 
+  /**
+   * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk. This
+   * will not un-persist any cached data that is built upon this Dataset.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
   def unpersist(): this.type = unpersist(blocking = false)
 
-  def cache(): this.type = {
-    throw new UnsupportedOperationException("cache() is not implemented.")
+  /**
+   * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`).
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def cache(): this.type = persist()
+
+  /**
+   * Get the Dataset's current storage level, or StorageLevel.NONE if not persisted.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def storageLevel: StorageLevel = {
+    StorageLevelProtoConverter.toStorageLevel(
+      sparkSession
+        .analyze { builder =>
+          builder.getGetStorageLevelBuilder.setRelation(plan.getRoot)
+        }
+        .getGetStorageLevel
+        .getStorageLevel)
   }
 
   def withWatermark(eventTime: String, delayThreshold: String): Dataset[T] = {
 
@@ -400,6 +400,13 @@ class SparkSession private[sql] (
     client.analyze(method, Some(plan), explainMode)
   }
 
+  private[sql] def analyze(
+      f: proto.AnalyzePlanRequest.Builder => Unit): proto.AnalyzePlanResponse = {
+    val builder = proto.AnalyzePlanRequest.newBuilder()
+    f(builder)
+    client.analyze(builder)
+  }
+
   private[sql] def sameSemantics(plan: proto.Plan, otherPlan: proto.Plan): Boolean = {
     client.sameSemantics(plan, otherPlan).getSameSemantics.getResult
   }
 
@@ -166,7 +166,8 @@ private[sql] class SparkConnectClient(
     analyze(builder)
   }
 
-  private def analyze(builder: proto.AnalyzePlanRequest.Builder): proto.AnalyzePlanResponse = {
+  private[sql] def analyze(
+      builder: proto.AnalyzePlanRequest.Builder): proto.AnalyzePlanResponse = {
     val request = builder
       .setUserContext(userContext)
       .setSessionId(sessionId)
 
@@ -69,7 +69,7 @@ class PlanGenerationTestSuite
   // Borrowed from SparkFunSuite
   private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
 
-  protected val queryFilePath: Path = commonResourcePath.resolve("queries")
+  protected val queryFilePath: Path = commonResourcePath.resolve("query-tests/queries")
 
   // A relative path to /connector/connect/server, used by `ProtoToParsedPlanTestSuite` to run
   // with the datasource.
@@ -2162,4 +2162,9 @@ class PlanGenerationTestSuite
   test("replace") {
     simple.na.replace[Long]("id", Map(1L -> 8L))
   }
+
+  /* Reader API  */
+  test("table API with options") {
+    session.read.options(Map("p1" -> "v1", "p2" -> "v2")).table("tempdb.myTable")
+  }
 }
@@ -75,7 +75,7 @@ class ArtifactSuite extends ConnectFunSuite with BeforeAndAfterEach {
   }
 
   private val CHUNK_SIZE: Int = 32 * 1024
-  protected def artifactFilePath: Path = baseResourcePath.resolve("artifact-tests")
+  protected def artifactFilePath: Path = commonResourcePath.resolve("artifact-tests")
   protected def artifactCrcPath: Path = artifactFilePath.resolve("crc")
 
   private def getCrcValues(filePath: Path): Seq[Long] = {
 
@@ -160,7 +160,6 @@ object CheckConnectJvmClientCompatibility {
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.flatMap"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.foreach"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.foreachPartition"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.storageLevel"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.rdd"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.toJavaRDD"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.javaRDD"),
 
@@ -52,7 +52,6 @@ trait ConnectFunSuite extends AnyFunSuite { // scalastyle:ignore funsuite
       "common",
       "src",
       "test",
-      "resources",
-      "query-tests").toAbsolutePath
+      "resources").toAbsolutePath
   }
 }
@@ -21,6 +21,7 @@ package spark.connect;
 
 import "google/protobuf/any.proto";
 import "spark/connect/commands.proto";
+import "spark/connect/common.proto";
 import "spark/connect/expressions.proto";
 import "spark/connect/relations.proto";
 import "spark/connect/types.proto";
@@ -54,20 +55,6 @@ message UserContext {
   repeated google.protobuf.Any extensions = 999;
 }
 
-// StorageLevel for persisting Datasets/Tables.
-message StorageLevel {
-  // (Required) Whether the cache should use disk or not.
-  bool use_disk = 1;
-  // (Required) Whether the cache should use memory or not.
-  bool use_memory = 2;
-  // (Required) Whether the cache should use off-heap or not.
-  bool use_off_heap = 3;
-  // (Required) Whether the cached data is deserialized or not.
-  bool deserialized = 4;
-  // (Required) The number of replicas.
-  int32 replication = 5;
-}
-
 // Request to perform plan analyze, optionally to explain the plan.
 message AnalyzePlanRequest {
   // (Required)
 
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.proto";
+
+// StorageLevel for persisting Datasets/Tables.
+message StorageLevel {
+  // (Required) Whether the cache should use disk or not.
+  bool use_disk = 1;
+  // (Required) Whether the cache should use memory or not.
+  bool use_memory = 2;
+  // (Required) Whether the cache should use off-heap or not.
+  bool use_off_heap = 3;
+  // (Required) Whether the cached data is deserialized or not.
+  bool deserialized = 4;
+  // (Required) The number of replicas.
+  int32 replication = 5;
+}
Original file line number	Diff line number	Diff line change
`@@ -458,7 +458,9 @@ class DataFrameReader private[sql] (sparkSession: SparkSession) extends Logging`
`458`	`458`	`*/`
`459`	`459`	`def table(tableName: String): DataFrame = {`
`460`	`460`	`sparkSession.newDataFrame { builder =>`
`461`		`- builder.getReadBuilder.getNamedTableBuilder.setUnparsedIdentifier(tableName)`
	`461`	`+ builder.getReadBuilder.getNamedTableBuilder`
	`462`	`+ .setUnparsedIdentifier(tableName)`
	`463`	`+ .putAllOptions(extraOptions.toMap.asJava)`
`462`	`464`	`}`
`463`	`465`	`}`
`464`	`466`
Original file line number	Diff line number	Diff line change
`@@ -400,6 +400,13 @@ class SparkSession private[sql] (`
`400`	`400`	`client.analyze(method, Some(plan), explainMode)`
`401`	`401`	`}`
`402`	`402`
	`403`	`+ private[sql] def analyze(`
	`404`	`+ f: proto.AnalyzePlanRequest.Builder => Unit): proto.AnalyzePlanResponse = {`
	`405`	`+ val builder = proto.AnalyzePlanRequest.newBuilder()`
	`406`	`+ f(builder)`
	`407`	`+ client.analyze(builder)`
	`408`	`+ }`
	`409`	`+`
`403`	`410`	`private[sql] def sameSemantics(plan: proto.Plan, otherPlan: proto.Plan): Boolean = {`
`404`	`411`	`client.sameSemantics(plan, otherPlan).getSameSemantics.getResult`
`405`	`412`	`}`
Original file line number	Diff line number	Diff line change
`@@ -166,7 +166,8 @@ private[sql] class SparkConnectClient(`
`166`	`166`	`analyze(builder)`
`167`	`167`	`}`
`168`	`168`
`169`		`- private def analyze(builder: proto.AnalyzePlanRequest.Builder): proto.AnalyzePlanResponse = {`
	`169`	`+ private[sql] def analyze(`
	`170`	`+ builder: proto.AnalyzePlanRequest.Builder): proto.AnalyzePlanResponse = {`
`170`	`171`	`val request = builder`
`171`	`172`	`.setUserContext(userContext)`
`172`	`173`	`.setSessionId(sessionId)`
Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,7 @@ class ArtifactSuite extends ConnectFunSuite with BeforeAndAfterEach {`
`75`	`75`	`}`
`76`	`76`
`77`	`77`	`private val CHUNK_SIZE: Int = 32 * 1024`
`78`		`- protected def artifactFilePath: Path = baseResourcePath.resolve("artifact-tests")`
	`78`	`+ protected def artifactFilePath: Path = commonResourcePath.resolve("artifact-tests")`
`79`	`79`	`protected def artifactCrcPath: Path = artifactFilePath.resolve("crc")`
`80`	`80`
`81`	`81`	`private def getCrcValues(filePath: Path): Seq[Long] = {`
Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,6 @@ trait ConnectFunSuite extends AnyFunSuite { // scalastyle:ignore funsuite`
`52`	`52`	`"common",`
`53`	`53`	`"src",`
`54`	`54`	`"test",`
`55`		`- "resources",`
`56`		`- "query-tests").toAbsolutePath`
	`55`	`+ "resources").toAbsolutePath`
`57`	`56`	`}`
`58`	`57`	`}`