-
Notifications
You must be signed in to change notification settings - Fork 29.1k
[SPARK-22646] [Submission] Spark on Kubernetes - basic submission client #19717
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
dcaac45
27c67ff
6d597d0
5b9fa39
5ccadb5
12f2797
c35fe48
faa2849
347ed69
0e8ca01
3a0b8e3
83d0b9c
44c40b1
67bc847
7d2b303
caf2206
2e7810b
cbcd30e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -394,8 +394,8 @@ class SparkSubmitSuite | |
| "--master", "k8s://host:port", | ||
| "--executor-memory", "5g", | ||
| "--class", "org.SomeClass", | ||
| "--kubernetes-namespace", "foo", | ||
| "--driver-memory", "4g", | ||
| "--conf", "spark.kubernetes.namespace=spark", | ||
| "--conf", "spark.kubernetes.driver.docker.image=bar", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we also test the arg "--kubernetes-namespace"?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| "/home/thejar.jar", | ||
| "arg1") | ||
|
|
@@ -410,7 +410,7 @@ class SparkSubmitSuite | |
| classpath should have length (0) | ||
| conf.get("spark.executor.memory") should be ("5g") | ||
|
||
| conf.get("spark.driver.memory") should be ("4g") | ||
| conf.get("spark.kubernetes.namespace") should be ("foo") | ||
| conf.get("spark.kubernetes.namespace") should be ("spark") | ||
| conf.get("spark.kubernetes.driver.docker.image") should be ("bar") | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -157,6 +157,15 @@ of the most common options to set are: | |
| or in your default properties file. | ||
| </td> | ||
| </tr> | ||
| <tr> | ||
| <td><code>spark.driver.memoryOverhead</code></td> | ||
| <td>driverMemory * 0.10, with minimum of 384 </td> | ||
| <td> | ||
| The amount of off-heap memory (in megabytes) to be allocated per driver in cluster mode. This is | ||
| memory that accounts for things like VM overheads, interned strings, other native overheads, etc. | ||
| This tends to grow with the container size (typically 6-10%). | ||
|
||
| </td> | ||
| </tr> | ||
| <tr> | ||
| <td><code>spark.executor.memory</code></td> | ||
| <td>1g</td> | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -234,18 +234,11 @@ To use a custom metrics.properties for the application master and executors, upd | |
| The amount of off-heap memory (in megabytes) to be allocated per executor. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the executor size (typically 6-10%). | ||
| </td> | ||
| </tr> | ||
| <tr> | ||
| <td><code>spark.yarn.driver.memoryOverhead</code></td> | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should make this configuration backward compatible, user should still be able to use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there an example PR for deprecating a config property that I can follow?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Look for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, done. |
||
| <td>driverMemory * 0.10, with minimum of 384 </td> | ||
| <td> | ||
| The amount of off-heap memory (in megabytes) to be allocated per driver in cluster mode. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the container size (typically 6-10%). | ||
| </td> | ||
| </tr> | ||
| <tr> | ||
| <td><code>spark.yarn.am.memoryOverhead</code></td> | ||
| <td>AM memory * 0.10, with minimum of 384 </td> | ||
| <td> | ||
| Same as <code>spark.yarn.driver.memoryOverhead</code>, but for the YARN Application Master in client mode. | ||
| Same as <code>spark.driver.memoryOverhead</code>, but for the YARN Application Master in client mode. | ||
| </td> | ||
| </tr> | ||
| <tr> | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -79,19 +79,10 @@ private[spark] object Config extends Logging { | |
|
|
||
| val KUBERNETES_EXECUTOR_LIMIT_CORES = | ||
| ConfigBuilder("spark.kubernetes.executor.limit.cores") | ||
| .doc("Specify the hard cpu limit for a single executor pod") | ||
| .doc("Specify the hard cpu limit for each executor pod") | ||
| .stringConf | ||
| .createOptional | ||
|
|
||
| val KUBERNETES_DRIVER_MEMORY_OVERHEAD = | ||
| ConfigBuilder("spark.kubernetes.driver.memoryOverhead") | ||
| .doc("The amount of off-heap memory (in megabytes) to be allocated for the driver and the " + | ||
| "driver submission server. This is memory that accounts for things like VM overheads, " + | ||
| "interned strings, other native overheads, etc. This tends to grow with the driver's " + | ||
| "memory size (typically 6-10%).") | ||
| .bytesConf(ByteUnit.MiB) | ||
| .createOptional | ||
|
|
||
| // Note that while we set a default for this when we start up the | ||
| // scheduler, the specific default value is dynamically determined | ||
| // based on the executor memory. | ||
|
|
@@ -150,6 +141,7 @@ private[spark] object Config extends Logging { | |
| ConfigBuilder("spark.kubernetes.report.interval") | ||
| .doc("Interval between reports of the current app status in cluster mode.") | ||
| .timeConf(TimeUnit.MILLISECONDS) | ||
| .checkValue(interval => interval > 0, s"Logging interval must be a positive time value.") | ||
| .createWithDefaultString("1s") | ||
|
|
||
| private[spark] val JARS_DOWNLOAD_LOCATION = | ||
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,10 @@ | |
| */ | ||
| package org.apache.spark.deploy.k8s.submit | ||
|
|
||
| import java.util.UUID | ||
|
|
||
| import com.google.common.primitives.Longs | ||
|
|
||
| import org.apache.spark.SparkConf | ||
| import org.apache.spark.deploy.k8s.Config._ | ||
| import org.apache.spark.deploy.k8s.ConfigurationUtils | ||
|
|
@@ -43,8 +47,11 @@ private[spark] class DriverConfigurationStepsOrchestrator( | |
| // label values are considerably restrictive, e.g. must be no longer than 63 characters in | ||
| // length. So we generate a separate identifier for the app ID itself, and bookkeeping that | ||
| // requires finding "all pods for this application" should use the kubernetesAppId. | ||
| private val kubernetesResourceNamePrefix = | ||
| s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") | ||
| private val kubernetesResourceNamePrefix = { | ||
| val uuid = UUID.nameUUIDFromBytes(Longs.toByteArray(launchTime)) | ||
| s"$appName-$uuid".toLowerCase.replaceAll("\\.", "-") | ||
| } | ||
|
|
||
| private val dockerImagePullPolicy = submissionSparkConf.get(DOCKER_IMAGE_PULL_POLICY) | ||
| private val jarsDownloadPath = submissionSparkConf.get(JARS_DOWNLOAD_LOCATION) | ||
| private val filesDownloadPath = submissionSparkConf.get(FILES_DOWNLOAD_LOCATION) | ||
|
|
@@ -91,7 +98,7 @@ private[spark] class DriverConfigurationStepsOrchestrator( | |
| } | ||
| mayBeResource | ||
| } else { | ||
| Option.empty | ||
| None | ||
| } | ||
|
|
||
| val sparkJars = submissionSparkConf.getOption("spark.jars") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we support jars/files on distributed file systems or Http/https?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In our fork, we use Kubernetes init-container col-located with the driver/executor containers for downloading remote dependencies, e.g., from http/https endpoints. The init-container will be introduced in a subsequent PR. |
||
|
|
@@ -109,7 +116,7 @@ private[spark] class DriverConfigurationStepsOrchestrator( | |
| jarsDownloadPath, | ||
| filesDownloadPath)) | ||
| } else { | ||
| Option.empty | ||
| None | ||
| } | ||
|
|
||
| Seq( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,6 +25,7 @@ import org.apache.spark.deploy.k8s.Config._ | |
| import org.apache.spark.deploy.k8s.ConfigurationUtils | ||
| import org.apache.spark.deploy.k8s.Constants._ | ||
| import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec | ||
| import org.apache.spark.internal.config.{DRIVER_CLASS_PATH, DRIVER_MEMORY, DRIVER_MEMORY_OVERHEAD} | ||
|
|
||
| /** | ||
| * Represents the initial setup required for the driver. | ||
|
|
@@ -43,7 +44,7 @@ private[spark] class BaseDriverConfigurationStep( | |
| .getOrElse(s"$kubernetesResourceNamePrefix-driver") | ||
|
|
||
| private val driverExtraClasspath = submissionSparkConf.get( | ||
| org.apache.spark.internal.config.DRIVER_CLASS_PATH) | ||
| DRIVER_CLASS_PATH) | ||
|
|
||
| private val driverDockerImage = submissionSparkConf | ||
| .get(DRIVER_DOCKER_IMAGE) | ||
|
|
@@ -55,18 +56,17 @@ private[spark] class BaseDriverConfigurationStep( | |
|
|
||
| // Memory settings | ||
| private val driverMemoryMiB = submissionSparkConf.get( | ||
| org.apache.spark.internal.config.DRIVER_MEMORY) | ||
| DRIVER_MEMORY) | ||
| private val driverMemoryString = submissionSparkConf.get( | ||
| org.apache.spark.internal.config.DRIVER_MEMORY.key, | ||
| org.apache.spark.internal.config.DRIVER_MEMORY.defaultValueString) | ||
| DRIVER_MEMORY.key, | ||
| DRIVER_MEMORY.defaultValueString) | ||
| private val memoryOverheadMiB = submissionSparkConf | ||
|
||
| .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) | ||
| .get(DRIVER_MEMORY_OVERHEAD) | ||
| .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * driverMemoryMiB).toInt, | ||
| MEMORY_OVERHEAD_MIN_MIB)) | ||
| private val driverContainerMemoryWithOverheadMiB = driverMemoryMiB + memoryOverheadMiB | ||
|
|
||
| override def configureDriver( | ||
| driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { | ||
| override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { | ||
| val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => | ||
| new EnvVarBuilder() | ||
| .withName(ENV_SUBMIT_EXTRA_CLASSPATH) | ||
|
|
@@ -83,11 +83,12 @@ private[spark] class BaseDriverConfigurationStep( | |
| " Spark bookkeeping operations.") | ||
|
|
||
| val driverCustomEnvs = submissionSparkConf.getAllWithPrefix(KUBERNETES_DRIVER_ENV_KEY).toSeq | ||
| .map(env => | ||
| .map { env => | ||
| new EnvVarBuilder() | ||
| .withName(env._1) | ||
| .withValue(env._2) | ||
| .build()) | ||
| .build() | ||
| } | ||
|
|
||
| val allDriverAnnotations = driverCustomAnnotations ++ Map(SPARK_APP_NAME_ANNOTATION -> appName) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: should also add doc for this config here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it's added under
spark.driver.memory. See https://github.com/apache-spark-on-k8s/spark/blob/0e8ca012dc6d5dfd5645f45b6d1bb84e1e9e72a5/docs/configuration.md.