-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-34472][YARN] Ship ivySettings file to driver in cluster mode #31591
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
0d62c5c
f3101b9
3f49d07
bf91027
998580e
a4d76d7
d99cc86
fab9f9e
fd3ddb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ import scala.concurrent.duration._ | |
| import scala.io.Source | ||
|
|
||
| import com.google.common.io.{ByteStreams, Files} | ||
| import org.apache.commons.io.FileUtils | ||
| import org.apache.hadoop.yarn.conf.YarnConfiguration | ||
| import org.apache.hadoop.yarn.util.ConverterUtils | ||
| import org.scalatest.concurrent.Eventually._ | ||
|
|
@@ -368,6 +369,19 @@ class YarnClusterSuite extends BaseYarnClusterSuite { | |
| ) | ||
| checkResult(finalState, result, "true") | ||
| } | ||
|
|
||
| test("SPARK-34472: ivySettings file should be localized on driver in cluster mode") { | ||
|
|
||
| val emptyIvySettings = File.createTempFile("ivy", ".xml") | ||
| FileUtils.write(emptyIvySettings, "<ivysettings />", StandardCharsets.UTF_8) | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use NIO for these? No need for commons-io now that NIO supports this kind of stuff built-in.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed this to use Guava's Files which is used in many places within this file. Can I create a followup PR to replace these with NIO?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just saw this comment, sounds fine to me. |
||
| val result = File.createTempFile("result", null, tempDir) | ||
| val finalState = runSpark(clientMode = false, | ||
| mainClassName(YarnAddJarTest.getClass), | ||
| appArgs = Seq(result.getAbsolutePath), | ||
| extraConf = Map("spark.jars.ivySettings" -> emptyIvySettings.getAbsolutePath)) | ||
| checkResult(finalState, result) | ||
| } | ||
| } | ||
|
|
||
| private[spark] class SaveExecutorInfo extends SparkListener { | ||
|
|
@@ -583,6 +597,47 @@ private object YarnClasspathTest extends Logging { | |
|
|
||
| } | ||
|
|
||
| private object YarnAddJarTest extends Logging { | ||
shardulm94 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| def main(args: Array[String]): Unit = { | ||
| if (args.length != 1) { | ||
| // scalastyle:off println | ||
| System.err.println( | ||
| s""" | ||
| |Invalid command line: ${args.mkString(" ")} | ||
| | | ||
| |Usage: YarnAddJarTest [result file] | ||
| """.stripMargin) | ||
| // scalastyle:on println | ||
| System.exit(1) | ||
| } | ||
|
|
||
| val resultPath = args(0) | ||
| val sc = new SparkContext(new SparkConf()) | ||
|
|
||
| var result = "failure" | ||
| try { | ||
| val settingsFile = sc.getConf.get("spark.jars.ivySettings") | ||
| // Delete the original ivySettings file, so we ensure that the YARN localized file | ||
| // is used by the addJar call | ||
| // In a real cluster mode, the original settings file at the absolute path won't be present | ||
| // on the driver | ||
| new File(settingsFile).delete() | ||
|
|
||
| val caught = intercept[Exception] { | ||
shardulm94 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| sc.addJar("ivy://org.fake-project.test:test:1.0.0") | ||
| } | ||
| if (caught.getMessage.contains("unresolved dependency: org.fake-project.test#test")) { | ||
| // "unresolved dependency" is expected as the dependency does not exist | ||
| // but exception like "Ivy settings file <file> does not exist should result in failure | ||
shardulm94 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| result = "success" | ||
| } | ||
| } finally { | ||
| Files.write(result, new File(resultPath), StandardCharsets.UTF_8) | ||
| sc.stop() | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private object YarnLauncherTestApp { | ||
|
|
||
| def main(args: Array[String]): Unit = { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@shardulm94, does it work if you set
spark.jars.ivySettingsto./ivysettings.xmland passivysettings.xmltospark.yarn.files?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know you might have to copy
ivysettings.xmlto the current working directory when Spark submit runs but I think it might work for your usecase.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, as you pointed out it only works if I have the copy in the current working directory. However we cannot control which directory our users launch spark-submit from. So ideally we would want something which works without user intervention.
What do you think of this? In Yarn Client, we can add the ivySettings file to
spark.yarn.dist.filesor__spark__conf__.zipand then we can modify the propertyspark.jars.ivySettingsto change it to./ivysettings.xmlor__spark__conf__/ivysettings.xmlwithin Yarn Client.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding it into
__spark__conf__sounds okay to me ... but I would prefer to have second opinions from Yarn experts such as @tgravescs, @mridulm or @jerryshao.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried this out, and it looks like it can be handled pretty cleanly this way targeting just YARN. shardulm94@12709f0
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I updated the PR with this approach.