Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix comments.
  • Loading branch information
Sun Rui committed Jul 13, 2015
commit 35ecfa3a5ad6594d5763083b221eeaab30366b61
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/api/r/RRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ private[r] object RRDD {
private def createRProcess(port: Int, script: String): BufferedStreamThread = {
val rCommand = SparkEnv.get.conf.get("spark.sparkr.r.command", "Rscript")
val rOptions = "--vanilla"
val rLibDir = RUtils.sparkRPackagePath(driver = false)
val rLibDir = RUtils.sparkRPackagePath(isDriver = false)
val rExecScript = rLibDir + "/SparkR/worker/" + script
val pb = new ProcessBuilder(List(rCommand, rOptions, rExecScript))
// Unset the R_TESTS environment variable for workers.
Expand Down
34 changes: 22 additions & 12 deletions core/src/main/scala/org/apache/spark/api/r/RUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.api.r

import java.io.File

import org.apache.spark.SparkException
import org.apache.spark.{SparkEnv, SparkException}

private[spark] object RUtils {
/**
Expand All @@ -34,22 +34,32 @@ private[spark] object RUtils {

/**
* Get the SparkR package path in various deployment modes.
* This assumes that Spark properties `spark.master` and `spark.submit.deployMode`
* and environment variable `SPARK_HOME` are set.
*/
def sparkRPackagePath(driver: Boolean): String = {
val yarnMode = sys.env.get("SPARK_YARN_MODE")
if (!yarnMode.isEmpty && yarnMode.get == "true" &&
!(driver && System.getProperty("spark.master") == "yarn-client")) {
// For workers in YARN modes and driver in yarn cluster mode,
// the SparkR package distributed as an archive resource should be pointed to
// by a symbol link "sparkr" in the current directory.
def sparkRPackagePath(isDriver: Boolean): String = {
val (master, deployMode) =
if (isDriver) {
(sys.props("spark.master"), sys.props("spark.submit.deployMode"))
} else {
val sparkConf = SparkEnv.get.conf
(sparkConf.get("spark.master"), sparkConf.get("spark.submit.deployMode"))
}

val isYarnCluster = master.contains("yarn") && deployMode == "cluster"
val isYarnClient = master.contains("yarn") && deployMode == "client"

// In YARN mode, the SparkR package is distributed as an archive symbolically
// linked to the "sparkr" file in the current directory. Note that this does not apply
// to the driver in client mode because it is run outside of the cluster.
if (isYarnCluster || (isYarnClient && !isDriver)) {
new File("sparkr").getAbsolutePath
} else {
// TBD: add support for MESOS
val rPackagePath = localSparkRPackagePath
if (rPackagePath.isEmpty) {
// Otherwise, assume the package is local
// TODO: support this for Mesos
localSparkRPackagePath.getOrElse {
throw new SparkException("SPARK_HOME not set. Can't locate SparkR package.")
}
rPackagePath.get
}
}
}
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/deploy/RRunner.scala
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ object RRunner {
val builder = new ProcessBuilder(Seq(rCommand, rFileNormalized) ++ otherArgs)
val env = builder.environment()
env.put("EXISTING_SPARKR_BACKEND_PORT", sparkRBackendPort.toString)
val rPackageDir = RUtils.sparkRPackagePath(driver = true)
val rPackageDir = RUtils.sparkRPackagePath(isDriver = true)
env.put("SPARKR_PACKAGE_DIR", rPackageDir)
env.put("R_PROFILE_USER",
Seq(rPackageDir, "SparkR", "profile", "general.R").mkString(File.separator))
Expand Down
13 changes: 10 additions & 3 deletions core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,11 @@ object SparkSubmit {
}
}

// Update args.deployMode if it is null. It will be passed down as a Spark property later.
(args.deployMode, deployMode) match {
case (null, CLIENT) => args.deployMode = "client"
case (null, CLUSTER) => args.deployMode = "cluster"
}
val isYarnCluster = clusterManager == YARN && deployMode == CLUSTER
val isMesosCluster = clusterManager == MESOS && deployMode == CLUSTER

Expand Down Expand Up @@ -349,16 +354,16 @@ object SparkSubmit {
}
}

// In yarn mode for an R app, add the SparkR package archive to archives
// In YARN mode for an R app, add the SparkR package archive to archives
// that can be distributed with the job
if (args.isR && clusterManager == YARN) {
val rPackagePath = RUtils.localSparkRPackagePath
if (rPackagePath.isEmpty) {
printErrorAndExit("SPARK_HOME does not exist for R application in yarn mode.")
printErrorAndExit("SPARK_HOME does not exist for R application in YARN mode.")
}
val rPackageFile = new File(rPackagePath.get, SPARKR_PACKAGE_ARCHIVE)
if (!rPackageFile.exists()) {
printErrorAndExit(s"$SPARKR_PACKAGE_ARCHIVE does not exist for R application in yarn mode.")
printErrorAndExit(s"$SPARKR_PACKAGE_ARCHIVE does not exist for R application in YARN mode.")
}
val localURI = Utils.resolveURI(rPackageFile.getAbsolutePath)

Expand Down Expand Up @@ -394,6 +399,8 @@ object SparkSubmit {

// All cluster managers
OptionAssigner(args.master, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.master"),
OptionAssigner(args.deployMode, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
sysProp = "spark.submit.deployMode"),
OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"),
OptionAssigner(args.jars, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.jars"),
OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.jars.ivy"),
Expand Down
3 changes: 3 additions & 0 deletions project/MimaExcludes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ object MimaExcludes {
"org.apache.spark.streaming.scheduler.InputInfo$"),
ProblemFilters.exclude[MissingClassProblem](
"org.apache.spark.streaming.scheduler.InputInfo")
// SPARK-6797 Support YARN modes for SparkR
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.api.r.BaseRRDD.this")
)

case v if v.startsWith("1.4") =>
Expand Down