-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-34472][YARN] Ship ivySettings file to driver in cluster mode #31591
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
0d62c5c
f3101b9
3f49d07
bf91027
998580e
a4d76d7
d99cc86
fab9f9e
fd3ddb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -793,6 +793,25 @@ private[spark] class Client( | |
| // distributed file. | ||
| amKeytabFileName.foreach { kt => props.setProperty(KEYTAB.key, kt) } | ||
|
|
||
| // Upload user provided ivysettings.xml file to the distributed cache | ||
| val ivySettings = sparkConf.getOption("spark.jars.ivySettings") | ||
| if (isClusterMode && ivySettings.isDefined) { | ||
| val ivySettingsFile = new File(ivySettings.get) | ||
| require(ivySettingsFile.exists(), s"Ivy settings file $ivySettingsFile not found") | ||
|
||
| require(ivySettingsFile.isFile(), | ||
| s"Ivy settings file $ivySettingsFile is not a normal file") | ||
| // Generate a file name that can be used for the ivySettings file, that does not conflict | ||
| // with any other conf file. | ||
| val amIvySettingsFileName = ivySettingsFile.getName() + "-" + UUID.randomUUID().toString | ||
| confStream.putNextEntry(new ZipEntry(amIvySettingsFileName)) | ||
|
||
| Files.copy(ivySettingsFile, confStream) | ||
|
||
| confStream.closeEntry() | ||
|
|
||
| // Override the ivySettings file name with the name of the distributed file | ||
| props.setProperty("spark.jars.ivySettings", s"$LOCALIZED_CONF_DIR/$amIvySettingsFileName") | ||
| } | ||
|
|
||
|
|
||
| writePropertiesToArchive(props, SPARK_CONF_FILE, confStream) | ||
|
|
||
| // Write the distributed cache config to the archive. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ import scala.concurrent.duration._ | |
| import scala.io.Source | ||
|
|
||
| import com.google.common.io.{ByteStreams, Files} | ||
| import org.apache.commons.io.FileUtils | ||
| import org.apache.hadoop.yarn.conf.YarnConfiguration | ||
| import org.apache.hadoop.yarn.util.ConverterUtils | ||
| import org.scalatest.concurrent.Eventually._ | ||
|
|
@@ -368,6 +369,19 @@ class YarnClusterSuite extends BaseYarnClusterSuite { | |
| ) | ||
| checkResult(finalState, result, "true") | ||
| } | ||
|
|
||
| test("SPARK-34472: ivySettings file should be localized on driver in cluster mode") { | ||
|
|
||
| val emptyIvySettings = File.createTempFile("ivy", ".xml") | ||
| FileUtils.write(emptyIvySettings, "<ivysettings />", StandardCharsets.UTF_8) | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use NIO for these? No need for commons-io now that NIO supports this kind of stuff built-in.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed this to use Guava's Files which is used in many places within this file. Can I create a followup PR to replace these with NIO?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just saw this comment, sounds fine to me. |
||
| val result = File.createTempFile("result", null, tempDir) | ||
| val finalState = runSpark(clientMode = false, | ||
| mainClassName(YarnAddJarTest.getClass), | ||
| appArgs = Seq(result.getAbsolutePath), | ||
| extraConf = Map("spark.jars.ivySettings" -> emptyIvySettings.getAbsolutePath)) | ||
| checkResult(finalState, result) | ||
| } | ||
| } | ||
|
|
||
| private[spark] class SaveExecutorInfo extends SparkListener { | ||
|
|
@@ -583,6 +597,44 @@ private object YarnClasspathTest extends Logging { | |
|
|
||
| } | ||
|
|
||
| private object YarnAddJarTest extends Logging { | ||
shardulm94 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| def main(args: Array[String]): Unit = { | ||
| if (args.length != 1) { | ||
| // scalastyle:off println | ||
| System.err.println( | ||
| s""" | ||
| |Invalid command line: ${args.mkString(" ")} | ||
| | | ||
| |Usage: YarnAddJarTest [result file] | ||
| """.stripMargin) | ||
| // scalastyle:on println | ||
| System.exit(1) | ||
| } | ||
|
|
||
| val resultPath = args(0) | ||
| val sc = new SparkContext(new SparkConf()) | ||
|
|
||
| var result = "failure" | ||
| try { | ||
| val settingsFile = sc.getConf.get("spark.jars.ivySettings") | ||
| // Make sure that ivySettings conf was set to the localized file | ||
| assert(settingsFile.startsWith(Client.LOCALIZED_CONF_DIR)) | ||
|
|
||
| val caught = intercept[RuntimeException] { | ||
| sc.addJar("ivy://org.fake-project.test:test:1.0.0") | ||
| } | ||
| if (caught.getMessage.contains("unresolved dependency: org.fake-project.test#test")) { | ||
| // "unresolved dependency" is expected as the dependency does not exist | ||
| // but exception like "Ivy settings file <file> does not exist should result in failure" | ||
|
||
| result = "success" | ||
| } | ||
| } finally { | ||
| Files.write(result, new File(resultPath), StandardCharsets.UTF_8) | ||
| sc.stop() | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private object YarnLauncherTestApp { | ||
|
|
||
| def main(args: Array[String]): Unit = { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it would be nice to move this into the configuration package.scala and use ConfigBuilder. Even if we just reference it by the .key option in the SparkSubmitArguments file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be okay to handle this as a followup immediately after this PR? There are about 7-8 other places where this string is hardcoded and we can also refactor them out into config package.scala
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that's fine, please file an issue
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Filed https://issues.apache.org/jira/browse/SPARK-35074