Skip to content

Commit 6023c8b

Browse files
committed
Merge remote-tracking branch 'upstream/master' into orderby-agg
2 parents 8f73c40 + 47058ca commit 6023c8b

File tree

113 files changed

+1072
-1052
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+1072
-1052
lines changed

R/pkg/R/deserialize.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ readString <- function(con) {
6060
raw <- readBin(con, raw(), stringLen, endian = "big")
6161
string <- rawToChar(raw)
6262
Encoding(string) <- "UTF-8"
63-
string
63+
string
6464
}
6565

6666
readInt <- function(con) {

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,6 @@ object SparkSubmit {
319319

320320
// The following modes are not supported or applicable
321321
(clusterManager, deployMode) match {
322-
case (MESOS, CLUSTER) if args.isPython =>
323-
printErrorAndExit("Cluster deploy mode is currently not supported for python " +
324-
"applications on Mesos clusters.")
325322
case (MESOS, CLUSTER) if args.isR =>
326323
printErrorAndExit("Cluster deploy mode is currently not supported for R " +
327324
"applications on Mesos clusters.")
@@ -554,7 +551,15 @@ object SparkSubmit {
554551
if (isMesosCluster) {
555552
assert(args.useRest, "Mesos cluster mode is only supported through the REST submission API")
556553
childMainClass = "org.apache.spark.deploy.rest.RestSubmissionClient"
557-
childArgs += (args.primaryResource, args.mainClass)
554+
if (args.isPython) {
555+
// Second argument is main class
556+
childArgs += (args.primaryResource, "")
557+
if (args.pyFiles != null) {
558+
sysProps("spark.submit.pyFiles") = args.pyFiles
559+
}
560+
} else {
561+
childArgs += (args.primaryResource, args.mainClass)
562+
}
558563
if (args.childArgs != null) {
559564
childArgs ++= args.childArgs
560565
}

core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ import org.apache.mesos.Protos.Environment.Variable
2929
import org.apache.mesos.Protos.TaskStatus.Reason
3030
import org.apache.mesos.Protos.{TaskState => MesosTaskState, _}
3131
import org.apache.mesos.{Scheduler, SchedulerDriver}
32-
3332
import org.apache.spark.deploy.mesos.MesosDriverDescription
3433
import org.apache.spark.deploy.rest.{CreateSubmissionResponse, KillSubmissionResponse, SubmissionStatusResponse}
3534
import org.apache.spark.metrics.MetricsSystem
@@ -375,21 +374,20 @@ private[spark] class MesosClusterScheduler(
375374
val executorOpts = desc.schedulerProperties.map { case (k, v) => s"-D$k=$v" }.mkString(" ")
376375
envBuilder.addVariables(
377376
Variable.newBuilder().setName("SPARK_EXECUTOR_OPTS").setValue(executorOpts))
378-
val cmdOptions = generateCmdOption(desc).mkString(" ")
379377
val dockerDefined = desc.schedulerProperties.contains("spark.mesos.executor.docker.image")
380378
val executorUri = desc.schedulerProperties.get("spark.executor.uri")
381379
.orElse(desc.command.environment.get("SPARK_EXECUTOR_URI"))
382-
val appArguments = desc.command.arguments.mkString(" ")
383-
val (executable, jar) = if (dockerDefined) {
380+
// Gets the path to run spark-submit, and the path to the Mesos sandbox.
381+
val (executable, sandboxPath) = if (dockerDefined) {
384382
// Application jar is automatically downloaded in the mounted sandbox by Mesos,
385383
// and the path to the mounted volume is stored in $MESOS_SANDBOX env variable.
386-
("./bin/spark-submit", s"$$MESOS_SANDBOX/${desc.jarUrl.split("/").last}")
384+
("./bin/spark-submit", "$MESOS_SANDBOX")
387385
} else if (executorUri.isDefined) {
388386
builder.addUris(CommandInfo.URI.newBuilder().setValue(executorUri.get).build())
389387
val folderBasename = executorUri.get.split('/').last.split('.').head
390388
val cmdExecutable = s"cd $folderBasename*; $prefixEnv bin/spark-submit"
391-
val cmdJar = s"../${desc.jarUrl.split("/").last}"
392-
(cmdExecutable, cmdJar)
389+
// Sandbox path points to the parent folder as we chdir into the folderBasename.
390+
(cmdExecutable, "..")
393391
} else {
394392
val executorSparkHome = desc.schedulerProperties.get("spark.mesos.executor.home")
395393
.orElse(conf.getOption("spark.home"))
@@ -398,30 +396,50 @@ private[spark] class MesosClusterScheduler(
398396
throw new SparkException("Executor Spark home `spark.mesos.executor.home` is not set!")
399397
}
400398
val cmdExecutable = new File(executorSparkHome, "./bin/spark-submit").getCanonicalPath
401-
val cmdJar = desc.jarUrl.split("/").last
402-
(cmdExecutable, cmdJar)
399+
// Sandbox points to the current directory by default with Mesos.
400+
(cmdExecutable, ".")
403401
}
404-
builder.setValue(s"$executable $cmdOptions $jar $appArguments")
402+
val primaryResource = new File(sandboxPath, desc.jarUrl.split("/").last).toString()
403+
val cmdOptions = generateCmdOption(desc, sandboxPath).mkString(" ")
404+
val appArguments = desc.command.arguments.mkString(" ")
405+
builder.setValue(s"$executable $cmdOptions $primaryResource $appArguments")
405406
builder.setEnvironment(envBuilder.build())
406407
conf.getOption("spark.mesos.uris").map { uris =>
407408
setupUris(uris, builder)
408409
}
410+
desc.schedulerProperties.get("spark.mesos.uris").map { uris =>
411+
setupUris(uris, builder)
412+
}
413+
desc.schedulerProperties.get("spark.submit.pyFiles").map { pyFiles =>
414+
setupUris(pyFiles, builder)
415+
}
409416
builder.build()
410417
}
411418

412-
private def generateCmdOption(desc: MesosDriverDescription): Seq[String] = {
419+
private def generateCmdOption(desc: MesosDriverDescription, sandboxPath: String): Seq[String] = {
413420
var options = Seq(
414421
"--name", desc.schedulerProperties("spark.app.name"),
415-
"--class", desc.command.mainClass,
416422
"--master", s"mesos://${conf.get("spark.master")}",
417423
"--driver-cores", desc.cores.toString,
418424
"--driver-memory", s"${desc.mem}M")
425+
426+
// Assume empty main class means we're running python
427+
if (!desc.command.mainClass.equals("")) {
428+
options ++= Seq("--class", desc.command.mainClass)
429+
}
430+
419431
desc.schedulerProperties.get("spark.executor.memory").map { v =>
420432
options ++= Seq("--executor-memory", v)
421433
}
422434
desc.schedulerProperties.get("spark.cores.max").map { v =>
423435
options ++= Seq("--total-executor-cores", v)
424436
}
437+
desc.schedulerProperties.get("spark.submit.pyFiles").map { pyFiles =>
438+
val formattedFiles = pyFiles.split(",")
439+
.map { path => new File(sandboxPath, path.split("/").last).toString() }
440+
.mkString(",")
441+
options ++= Seq("--py-files", formattedFiles)
442+
}
425443
options
426444
}
427445

core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,7 @@ class DAGSchedulerSuite
812812
}
813813

814814
// The map stage should have been submitted.
815+
sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
815816
assert(countSubmittedMapStageAttempts() === 1)
816817

817818
complete(taskSets(0), Seq(

docs/running-on-mesos.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ From the client, you can submit a job to Mesos cluster by running `spark-submit`
157157
to the url of the MesosClusterDispatcher (e.g: mesos://dispatcher:7077). You can view driver statuses on the
158158
Spark cluster Web UI.
159159

160+
Note that jars or python files that are passed to spark-submit should be URIs reachable by Mesos slaves.
161+
160162
# Mesos Run Modes
161163

162164
Spark can run over Mesos in two modes: "fine-grained" (default) and "coarse-grained".

mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams
3232
with HasSeed with HasMaxIter with HasTol {
3333
/**
3434
* Layer sizes including input size and output size.
35+
* Default: Array(1, 1)
3536
* @group param
3637
*/
3738
final val layers: IntArrayParam = new IntArrayParam(this, "layers",
@@ -50,6 +51,7 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams
5051
* Data is stacked within partitions. If block size is more than remaining data in
5152
* a partition then it is adjusted to the size of this data.
5253
* Recommended size is between 10 and 1000.
54+
* Default: 128
5355
* @group expertParam
5456
*/
5557
final val blockSize: IntParam = new IntParam(this, "blockSize",

mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class BinaryClassificationEvaluator(override val uid: String)
3838

3939
/**
4040
* param for metric name in evaluation
41+
* Default: areaUnderROC
4142
* @group param
4243
*/
4344
val metricName: Param[String] = {

mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ final class Binarizer(override val uid: String)
4141
* Param for threshold used to binarize continuous features.
4242
* The features greater than the threshold, will be binarized to 1.0.
4343
* The features equal to or less than the threshold, will be binarized to 0.0.
44+
* Default: 0.0
4445
* @group param
4546
*/
4647
val threshold: DoubleParam =

mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol
3535

3636
/**
3737
* The minimum of documents in which a term should appear.
38+
* Default: 0
3839
* @group param
3940
*/
4041
final val minDocFreq = new IntParam(

mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ class StopWordsRemover(override val uid: String)
9898

9999
/**
100100
* the stop words set to be filtered out
101+
* Default: [[StopWords.English]]
101102
* @group param
102103
*/
103104
val stopWords: StringArrayParam = new StringArrayParam(this, "stopWords", "stop words")
@@ -110,6 +111,7 @@ class StopWordsRemover(override val uid: String)
110111

111112
/**
112113
* whether to do a case sensitive comparison over the stop words
114+
* Default: false
113115
* @group param
114116
*/
115117
val caseSensitive: BooleanParam = new BooleanParam(this, "caseSensitive",

0 commit comments

Comments
 (0)