apache
diff --git a/‎R/pkg/R/RDD.R‎
Lines changed: 5 additions & 13 deletions b/‎R/pkg/R/RDD.R‎
Lines changed: 5 additions & 13 deletions
diff --git a/‎R/pkg/R/context.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/context.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/pairRDD.R‎
Lines changed: 0 additions & 4 deletions b/‎R/pkg/R/pairRDD.R‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎R/pkg/inst/tests/test_rdd.R‎
Lines changed: 3 additions & 2 deletions b/‎R/pkg/inst/tests/test_rdd.R‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala‎
Lines changed: 17 additions & 17 deletions b/‎core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala‎
Lines changed: 17 additions & 17 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala‎
Lines changed: 9 additions & 6 deletions b/‎core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/HttpServer.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/HttpServer.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/SparkConf.scala‎
Lines changed: 36 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/SparkConf.scala‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala‎
Lines changed: 34 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala‎
Lines changed: 34 additions & 4 deletions
@@ -85,7 +85,7 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
 
   if (!inherits(prev, "PipelinedRDD") || !isPipelinable(prev)) {
     # This transformation is the first in its stage:
-    .Object@func <- func
+    .Object@func <- cleanClosure(func)
     .Object@prev_jrdd <- getJRDD(prev)
     .Object@env$prev_serializedMode <- prev@env$serializedMode
     # NOTE: We use prev_serializedMode to track the serialization mode of prev_JRDD
@@ -94,7 +94,7 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
     pipelinedFunc <- function(split, iterator) {
       func(split, prev@func(split, iterator))
     }
-    .Object@func <- pipelinedFunc
+    .Object@func <- cleanClosure(pipelinedFunc)
     .Object@prev_jrdd <- prev@prev_jrdd # maintain the pipeline
     # Get the serialization mode of the parent RDD
     .Object@env$prev_serializedMode <- prev@env$prev_serializedMode
@@ -144,17 +144,13 @@ setMethod("getJRDD", signature(rdd = "PipelinedRDD"),
               return(rdd@env$jrdd_val)
             }
 
-            computeFunc <- function(split, part) {
-              rdd@func(split, part)
-            }
-
             packageNamesArr <- serialize(.sparkREnv[[".packages"]],
                                          connection = NULL)
 
             broadcastArr <- lapply(ls(.broadcastNames),
                                    function(name) { get(name, .broadcastNames) })
 
-            serializedFuncArr <- serialize(computeFunc, connection = NULL)
+            serializedFuncArr <- serialize(rdd@func, connection = NULL)
 
             prev_jrdd <- rdd@prev_jrdd
 
@@ -279,7 +275,7 @@ setMethod("unpersist",
 #' @examples
 #'\dontrun{
 #' sc <- sparkR.init()
-#' setCheckpointDir(sc, "checkpoints")
+#' setCheckpointDir(sc, "checkpoint")
 #' rdd <- parallelize(sc, 1:10, 2L)
 #' checkpoint(rdd)
 #'}
@@ -551,11 +547,7 @@ setMethod("mapPartitions",
 setMethod("lapplyPartitionsWithIndex",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
-            FUN <- cleanClosure(FUN)
-            closureCapturingFunc <- function(split, part) {
-              FUN(split, part)
-            }
-            PipelinedRDD(X, closureCapturingFunc)
+            PipelinedRDD(X, FUN)
           })
 
 #' @rdname lapplyPartitionsWithIndex
 
@@ -216,7 +216,7 @@ broadcast <- function(sc, object) {
 #' @examples
 #'\dontrun{
 #' sc <- sparkR.init()
-#' setCheckpointDir(sc, "~/checkpoints")
+#' setCheckpointDir(sc, "~/checkpoint")
 #' rdd <- parallelize(sc, 1:2, 2L)
 #' checkpoint(rdd)
 #'}
 
@@ -694,10 +694,6 @@ setMethod("cogroup",
             for (i in 1:rddsLen) {
               rdds[[i]] <- lapply(rdds[[i]], 
                                   function(x) { list(x[[1]], list(i, x[[2]])) })
-              # TODO(hao): As issue [SparkR-142] mentions, the right value of i
-              # will not be captured into UDF if getJRDD is not invoked.
-              # It should be resolved together with that issue.
-              getJRDD(rdds[[i]])  # Capture the closure.
             }
             union.rdd <- Reduce(unionRDD, rdds)
             group.func <- function(vlist) {
 
@@ -141,7 +141,8 @@ test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkp
   unpersist(rdd2)
   expect_false(rdd2@env$isCached)
 
-  setCheckpointDir(sc, "checkpoints")
+  tempDir <- tempfile(pattern = "checkpoint")
+  setCheckpointDir(sc, tempDir)
   checkpoint(rdd2)
   expect_true(rdd2@env$isCheckpointed)
 
@@ -152,7 +153,7 @@ test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkp
   # make sure the data is collectable
   collect(rdd2)
 
-  unlink("checkpoints")
+  unlink(tempDir)
 })
 
 test_that("reduce on RDD", {
 
@@ -80,16 +80,16 @@ private[spark] class ExecutorAllocationManager(
     Integer.MAX_VALUE)
 
   // How long there must be backlogged tasks for before an addition is triggered (seconds)
-  private val schedulerBacklogTimeout = conf.getLong(
-    "spark.dynamicAllocation.schedulerBacklogTimeout", 5)
+  private val schedulerBacklogTimeoutS = conf.getTimeAsSeconds(
+    "spark.dynamicAllocation.schedulerBacklogTimeout", "5s")
 
-  // Same as above, but used only after `schedulerBacklogTimeout` is exceeded
-  private val sustainedSchedulerBacklogTimeout = conf.getLong(
-    "spark.dynamicAllocation.sustainedSchedulerBacklogTimeout", schedulerBacklogTimeout)
+  // Same as above, but used only after `schedulerBacklogTimeoutS` is exceeded
+  private val sustainedSchedulerBacklogTimeoutS = conf.getTimeAsSeconds(
+    "spark.dynamicAllocation.sustainedSchedulerBacklogTimeout", s"${schedulerBacklogTimeoutS}s")
 
   // How long an executor must be idle for before it is removed (seconds)
-  private val executorIdleTimeout = conf.getLong(
-    "spark.dynamicAllocation.executorIdleTimeout", 600)
+  private val executorIdleTimeoutS = conf.getTimeAsSeconds(
+    "spark.dynamicAllocation.executorIdleTimeout", "600s")
 
   // During testing, the methods to actually kill and add executors are mocked out
   private val testing = conf.getBoolean("spark.dynamicAllocation.testing", false)
@@ -150,14 +150,14 @@ private[spark] class ExecutorAllocationManager(
       throw new SparkException(s"spark.dynamicAllocation.minExecutors ($minNumExecutors) must " +
         s"be less than or equal to spark.dynamicAllocation.maxExecutors ($maxNumExecutors)!")
     }
-    if (schedulerBacklogTimeout <= 0) {
+    if (schedulerBacklogTimeoutS <= 0) {
       throw new SparkException("spark.dynamicAllocation.schedulerBacklogTimeout must be > 0!")
     }
-    if (sustainedSchedulerBacklogTimeout <= 0) {
+    if (sustainedSchedulerBacklogTimeoutS <= 0) {
       throw new SparkException(
         "spark.dynamicAllocation.sustainedSchedulerBacklogTimeout must be > 0!")
     }
-    if (executorIdleTimeout <= 0) {
+    if (executorIdleTimeoutS <= 0) {
       throw new SparkException("spark.dynamicAllocation.executorIdleTimeout must be > 0!")
     }
     // Require external shuffle service for dynamic allocation
@@ -262,8 +262,8 @@ private[spark] class ExecutorAllocationManager(
     } else if (addTime != NOT_SET && now >= addTime) {
       val delta = addExecutors(maxNeeded)
       logDebug(s"Starting timer to add more executors (to " +
-        s"expire in $sustainedSchedulerBacklogTimeout seconds)")
-      addTime += sustainedSchedulerBacklogTimeout * 1000
+        s"expire in $sustainedSchedulerBacklogTimeoutS seconds)")
+      addTime += sustainedSchedulerBacklogTimeoutS * 1000
       delta
     } else {
       0
@@ -351,7 +351,7 @@ private[spark] class ExecutorAllocationManager(
     val removeRequestAcknowledged = testing || client.killExecutor(executorId)
     if (removeRequestAcknowledged) {
       logInfo(s"Removing executor $executorId because it has been idle for " +
-        s"$executorIdleTimeout seconds (new desired total will be ${numExistingExecutors - 1})")
+        s"$executorIdleTimeoutS seconds (new desired total will be ${numExistingExecutors - 1})")
       executorsPendingToRemove.add(executorId)
       true
     } else {
@@ -407,8 +407,8 @@ private[spark] class ExecutorAllocationManager(
   private def onSchedulerBacklogged(): Unit = synchronized {
     if (addTime == NOT_SET) {
       logDebug(s"Starting timer to add executors because pending tasks " +
-        s"are building up (to expire in $schedulerBacklogTimeout seconds)")
-      addTime = clock.getTimeMillis + schedulerBacklogTimeout * 1000
+        s"are building up (to expire in $schedulerBacklogTimeoutS seconds)")
+      addTime = clock.getTimeMillis + schedulerBacklogTimeoutS * 1000
     }
   }
 
@@ -431,8 +431,8 @@ private[spark] class ExecutorAllocationManager(
     if (executorIds.contains(executorId)) {
       if (!removeTimes.contains(executorId) && !executorsPendingToRemove.contains(executorId)) {
         logDebug(s"Starting idle timer for $executorId because there are no more tasks " +
-          s"scheduled to run on the executor (to expire in $executorIdleTimeout seconds)")
-        removeTimes(executorId) = clock.getTimeMillis + executorIdleTimeout * 1000
+          s"scheduled to run on the executor (to expire in $executorIdleTimeoutS seconds)")
+        removeTimes(executorId) = clock.getTimeMillis + executorIdleTimeoutS * 1000
       }
     } else {
       logWarning(s"Attempted to mark unknown executor $executorId idle")
 
@@ -62,14 +62,17 @@ private[spark] class HeartbeatReceiver(sc: SparkContext)
 
   // "spark.network.timeout" uses "seconds", while `spark.storage.blockManagerSlaveTimeoutMs` uses
   // "milliseconds"
-  private val executorTimeoutMs = sc.conf.getOption("spark.network.timeout").map(_.toLong * 1000).
-    getOrElse(sc.conf.getLong("spark.storage.blockManagerSlaveTimeoutMs", 120000))
-
+  private val slaveTimeoutMs = 
+    sc.conf.getTimeAsMs("spark.storage.blockManagerSlaveTimeoutMs", "120s")
+  private val executorTimeoutMs = 
+    sc.conf.getTimeAsSeconds("spark.network.timeout", s"${slaveTimeoutMs}ms") * 1000
+  
   // "spark.network.timeoutInterval" uses "seconds", while
   // "spark.storage.blockManagerTimeoutIntervalMs" uses "milliseconds"
-  private val checkTimeoutIntervalMs =
-    sc.conf.getOption("spark.network.timeoutInterval").map(_.toLong * 1000).
-      getOrElse(sc.conf.getLong("spark.storage.blockManagerTimeoutIntervalMs", 60000))
+  private val timeoutIntervalMs = 
+    sc.conf.getTimeAsMs("spark.storage.blockManagerTimeoutIntervalMs", "60s")
+  private val checkTimeoutIntervalMs = 
+    sc.conf.getTimeAsSeconds("spark.network.timeoutInterval", s"${timeoutIntervalMs}ms") * 1000
 
   private var timeoutCheckingTask: ScheduledFuture[_] = null
 
 
@@ -160,7 +160,7 @@ private[spark] class HttpServer(
       throw new ServerStateException("Server is not started")
     } else {
       val scheme = if (securityManager.fileServerSSLOptions.enabled) "https" else "http"
-      s"$scheme://${Utils.localIpAddress}:$port"
+      s"$scheme://${Utils.localHostNameForURI()}:$port"
     }
   }
 }
@@ -174,6 +174,42 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
     getOption(key).getOrElse(defaultValue)
   }
 
+  /** 
+   * Get a time parameter as seconds; throws a NoSuchElementException if it's not set. If no 
+   * suffix is provided then seconds are assumed.
+   * @throws NoSuchElementException
+   */
+  def getTimeAsSeconds(key: String): Long = {
+    Utils.timeStringAsSeconds(get(key))
+  }
+
+  /** 
+   * Get a time parameter as seconds, falling back to a default if not set. If no 
+   * suffix is provided then seconds are assumed.
+   * 
+   */
+  def getTimeAsSeconds(key: String, defaultValue: String): Long = {
+    Utils.timeStringAsSeconds(get(key, defaultValue))
+  }
+
+  /** 
+   * Get a time parameter as milliseconds; throws a NoSuchElementException if it's not set. If no 
+   * suffix is provided then milliseconds are assumed. 
+   * @throws NoSuchElementException
+   */
+  def getTimeAsMs(key: String): Long = {
+    Utils.timeStringAsMs(get(key))
+  }
+
+  /** 
+   * Get a time parameter as milliseconds, falling back to a default if not set. If no 
+   * suffix is provided then milliseconds are assumed. 
+   */
+  def getTimeAsMs(key: String, defaultValue: String): Long = {
+    Utils.timeStringAsMs(get(key, defaultValue))
+  }
+  
+
   /** Get a parameter as an Option */
   def getOption(key: String): Option[String] = {
     Option(settings.get(key))
 
@@ -53,7 +53,7 @@ class LocalSparkCluster(
     /* Start the Master */
     val (masterSystem, masterPort, _, _) = Master.startSystemAndActor(localHostname, 0, 0, _conf)
     masterActorSystems += masterSystem
-    val masterUrl = "spark://" + localHostname + ":" + masterPort
+    val masterUrl = "spark://" + Utils.localHostNameForURI() + ":" + masterPort
     val masters = Array(masterUrl)
 
     /* Start the Workers */
 
@@ -24,11 +24,10 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
 import org.apache.hadoop.fs.FileSystem.Statistics
 import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext}
-import org.apache.hadoop.security.Credentials
-import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.mapreduce.JobContext
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 
-import org.apache.spark.{Logging, SparkContext, SparkConf, SparkException}
+import org.apache.spark.{Logging, SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.Utils
 
@@ -201,6 +200,37 @@ class SparkHadoopUtil extends Logging {
     val baseStatus = fs.getFileStatus(basePath)
     if (baseStatus.isDir) recurse(basePath) else Array(baseStatus)
   }
+
+  private val HADOOP_CONF_PATTERN = "(\\$\\{hadoopconf-[^\\}\\$\\s]+\\})".r.unanchored
+
+  /**
+   * Substitute variables by looking them up in Hadoop configs. Only variables that match the
+   * ${hadoopconf- .. } pattern are substituted.
+   */
+  def substituteHadoopVariables(text: String, hadoopConf: Configuration): String = {
+    text match {
+      case HADOOP_CONF_PATTERN(matched) => {
+        logDebug(text + " matched " + HADOOP_CONF_PATTERN)
+        val key = matched.substring(13, matched.length() - 1) // remove ${hadoopconf- .. }
+        val eval = Option[String](hadoopConf.get(key))
+          .map { value =>
+            logDebug("Substituted " + matched + " with " + value)
+            text.replace(matched, value)
+          }
+        if (eval.isEmpty) {
+          // The variable was not found in Hadoop configs, so return text as is.
+          text
+        } else {
+          // Continue to substitute more variables.
+          substituteHadoopVariables(eval.get, hadoopConf)
+        }
+      }
+      case _ => {
+        logDebug(text + " didn't match " + HADOOP_CONF_PATTERN)
+        text
+      }
+    }
+  }
 }
 
 object SparkHadoopUtil {
Original file line number	Diff line number	Diff line change
`@@ -160,7 +160,7 @@ private[spark] class HttpServer(`
`160`	`160`	`throw new ServerStateException("Server is not started")`
`161`	`161`	`} else {`
`162`	`162`	`val scheme = if (securityManager.fileServerSSLOptions.enabled) "https" else "http"`
`163`		`- s"$scheme://${Utils.localIpAddress}:$port"`
	`163`	`+ s"$scheme://${Utils.localHostNameForURI()}:$port"`
`164`	`164`	`}`
`165`	`165`	`}`
`166`	`166`	`}`