From 2e143ee2d9867f591e4a242934eac977177a359e Mon Sep 17 00:00:00 2001 From: jerryshao Date: Thu, 12 Jan 2017 17:11:53 +0800 Subject: [PATCH 1/2] Deprecate spark.yarn.access.namenodes Change-Id: Id382d90a1b3e5cd726ee1d0badedeb433088e8b5 --- .../scala/org/apache/spark/SparkConf.scala | 6 ++++-- docs/running-on-yarn.md | 19 ++++++++++--------- .../org/apache/spark/deploy/yarn/config.scala | 7 ++++++- .../security/HadoopFSCredentialProvider.scala | 8 ++++---- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index d78b9f1b2968..601d24191eec 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -699,8 +699,10 @@ private[spark] object SparkConf extends Logging { "spark.rpc.message.maxSize" -> Seq( AlternateConfig("spark.akka.frameSize", "1.6")), "spark.yarn.jars" -> Seq( - AlternateConfig("spark.yarn.jar", "2.0")) - ) + AlternateConfig("spark.yarn.jar", "2.0")), + "spark.yarn.access.hadoopFileSystems" -> Seq( + AlternateConfig("spark.yarn.access.namenodes", "2.2")) + ) /** * A view of `configsWithAlternatives` that makes it more efficient to look up deprecated diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index f7513454c785..051f64e1beab 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -276,15 +276,16 @@ To use a custom metrics.properties for the application master and executors, upd - spark.yarn.access.namenodes + spark.yarn.access.hadoopFileSystems (none) - A comma-separated list of secure HDFS namenodes your Spark application is going to access. For - example, spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032, - webhdfs://nn3.com:50070. The Spark application must have access to the namenodes listed + A comma-separated list of secure Hadoop filesystems your Spark application is going to access. For + example, spark.yarn.access.hadoopFileSystems=hdfs://nn1.com:8032,hdfs://nn2.com:8032, + webhdfs://nn3.com:50070. The Spark application must have access to the filesystems listed and Kerberos must be properly configured to be able to access them (either in the same realm - or in a trusted realm). Spark acquires security tokens for each of the namenodes so that - the Spark application can access those remote HDFS clusters. + or in a trusted realm). Spark acquires security tokens for each of the filesystems so that + the Spark application can access those remote Hadoop filesystems. spark.yarn.access.namenodes + is deprecated, please use this instead. @@ -496,10 +497,10 @@ includes a URI of the metadata store in `"hive.metastore.uris`, and If an application needs to interact with other secure Hadoop filesystems, then the tokens needed to access these clusters must be explicitly requested at -launch time. This is done by listing them in the `spark.yarn.access.namenodes` property. +launch time. This is done by listing them in the `spark.yarn.access.hadoopFileSystems` property. ``` -spark.yarn.access.namenodes hdfs://ireland.example.org:8020/,webhdfs://frankfurt.example.org:50070/ +spark.yarn.access.hadoopFileSystems hdfs://ireland.example.org:8020/,webhdfs://frankfurt.example.org:50070/ ``` Spark supports integrating with other security-aware services through Java Services mechanism (see @@ -574,7 +575,7 @@ spark.yarn.security.credentials.hive.enabled false spark.yarn.security.credentials.hbase.enabled false ``` -The configuration option `spark.yarn.access.namenodes` must be unset. +The configuration option `spark.yarn.access.hadoopFileSystems` must be unset. ## Troubleshooting Kerberos diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala index 666cb456a9be..725f6cc4b8f0 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala @@ -241,13 +241,18 @@ package object config { .intConf .createWithDefault(5) - private[spark] val NAMENODES_TO_ACCESS = ConfigBuilder("spark.yarn.access.namenodes") + val NAMENODES_TO_ACCESS = ConfigBuilder("spark.yarn.access.namenodes") .doc("Extra NameNode URLs for which to request delegation tokens. The NameNode that hosts " + "fs.defaultFS does not need to be listed here.") .stringConf .toSequence .createWithDefault(Nil) + val FILESYSTEMS_TO_ACCESS = ConfigBuilder("spark.yarn.access.hadoopFileSystems") + .doc("Extra Hadoop filesystem URLs for which to request delegation tokens. The filesystem " + + "that hosts fs.defaultFS does not need to be listed here.") + .fallbackConf(NAMENODES_TO_ACCESS) + /* Rolled log aggregation configuration. */ private[spark] val ROLLED_LOG_INCLUDE_PATTERN = diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala index b4fb4a790adc..f65c886db944 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala @@ -47,7 +47,7 @@ private[security] class HadoopFSCredentialProvider // NameNode to access, used to get tokens from different FileSystems val tmpCreds = new Credentials() val tokenRenewer = getTokenRenewer(hadoopConf) - nnsToAccess(hadoopConf, sparkConf).foreach { dst => + hadoopFSsToAccess(hadoopConf, sparkConf).foreach { dst => val dstFs = dst.getFileSystem(hadoopConf) logInfo("getting token for: " + dst) dstFs.addDelegationTokens(tokenRenewer, tmpCreds) @@ -80,7 +80,7 @@ private[security] class HadoopFSCredentialProvider // user as renewer. sparkConf.get(PRINCIPAL).flatMap { renewer => val creds = new Credentials() - nnsToAccess(hadoopConf, sparkConf).foreach { dst => + hadoopFSsToAccess(hadoopConf, sparkConf).foreach { dst => val dstFs = dst.getFileSystem(hadoopConf) dstFs.addDelegationTokens(renewer, creds) } @@ -112,8 +112,8 @@ private[security] class HadoopFSCredentialProvider delegTokenRenewer } - private def nnsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = { - sparkConf.get(NAMENODES_TO_ACCESS).map(new Path(_)).toSet + + private def hadoopFSsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = { + sparkConf.get(FILESYSTEMS_TO_ACCESS).map(new Path(_)).toSet + sparkConf.get(STAGING_DIR).map(new Path(_)) .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory) } From d2c651cbd4b97a9703e86c42f709dd71390e9a59 Mon Sep 17 00:00:00 2001 From: jerryshao Date: Thu, 12 Jan 2017 17:25:00 +0800 Subject: [PATCH 2/2] revert some unnecessary changes Change-Id: I5105b6bc7b4412ef8778d445ef5b985b1cfc0b10 --- .../src/main/scala/org/apache/spark/deploy/yarn/config.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala index 725f6cc4b8f0..f19a5b22a757 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala @@ -241,14 +241,14 @@ package object config { .intConf .createWithDefault(5) - val NAMENODES_TO_ACCESS = ConfigBuilder("spark.yarn.access.namenodes") + private[spark] val NAMENODES_TO_ACCESS = ConfigBuilder("spark.yarn.access.namenodes") .doc("Extra NameNode URLs for which to request delegation tokens. The NameNode that hosts " + "fs.defaultFS does not need to be listed here.") .stringConf .toSequence .createWithDefault(Nil) - val FILESYSTEMS_TO_ACCESS = ConfigBuilder("spark.yarn.access.hadoopFileSystems") + private[spark] val FILESYSTEMS_TO_ACCESS = ConfigBuilder("spark.yarn.access.hadoopFileSystems") .doc("Extra Hadoop filesystem URLs for which to request delegation tokens. The filesystem " + "that hosts fs.defaultFS does not need to be listed here.") .fallbackConf(NAMENODES_TO_ACCESS)