Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ private[mesos] class MesosClusterDispatcher(
case _ => throw new IllegalArgumentException("Unsupported recovery mode: " + recoveryMode)
}

private val scheduler = new MesosClusterScheduler(engineFactory, conf)
private val scheduler = new MesosClusterScheduler(engineFactory, conf, args.driverFailOver)

private val server = new MesosRestServer(args.host, args.port, conf, scheduler)
private val webUi = new MesosClusterUI(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ private[mesos] class MesosClusterDispatcherArguments(args: Array[String], conf:
var masterUrl: String = _
var zookeeperUrl: Option[String] = None
var propertiesFile: String = _
var driverFailOver = true // keep the driver registered with Mesos after dispatcher is stopped

parse(args.toList)

Expand Down Expand Up @@ -70,6 +71,10 @@ private[mesos] class MesosClusterDispatcherArguments(args: Array[String], conf:
propertiesFile = value
parse(tail)

case ("--disable-failover") :: tail =>
driverFailOver = false
parse(tail)

case ("--help") :: tail =>
printUsageAndExit(0)

Expand Down Expand Up @@ -97,6 +102,7 @@ private[mesos] class MesosClusterDispatcherArguments(args: Array[String], conf:
" --webui-port WEBUI_PORT WebUI Port to listen on (default: 8081)\n" +
" --name NAME Framework name to show in Mesos UI\n" +
" -m --master MASTER URI for connecting to Mesos master\n" +
" --disable-failover Will kill the driver when dispatcher is stopped\n" +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Killing the driver doesn't sound right to me, how about "De-register the framework when dispatcher is stopped"

" -z --zk ZOOKEEPER Comma delimited URLs for connecting to \n" +
" Zookeeper for persistence\n" +
" --properties-file FILE Path to a custom Spark properties file.\n" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ private[spark] class MesosDriverState(
*/
private[spark] class MesosClusterScheduler(
engineFactory: MesosClusterPersistenceEngineFactory,
conf: SparkConf)
conf: SparkConf,
driverFailOver: Boolean = true)
extends Scheduler with MesosSchedulerUtils {
var frameworkUrl: String = _
private val metricsSystem =
Expand Down Expand Up @@ -318,7 +319,7 @@ private[spark] class MesosClusterScheduler(
ready = false
metricsSystem.report()
metricsSystem.stop()
mesosDriver.stop(true)
mesosDriver.stop(driverFailOver)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of killing without failover, we could also start it without failover.

In the start method, to use:

val driver = createSchedulerDriver(
       master,
       MesosClusterScheduler.this,
       Utils.getCurrentUserName(),
       appName,
       conf,
       Some(frameworkUrl),
       Some(driverFailOver),                                 // <-- with or without checkpoint data
       Some(if (driverFailOver) Double.MaxValue else 0.0),   // <-- timeout for failover recovery
       fwId)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great find @skyluc

I will make the change

}

override def registered(
Expand Down