Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
SPARK-25004: Optionally capture stdout/stderr in YarnClusterSuite.
This is needed to debug the tests.
  • Loading branch information
rdblue committed Aug 23, 2018
commit fbac4a5311a556683e696bc1aef283252af2001d
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ abstract class BaseYarnClusterSuite
extraClassPath: Seq[String] = Nil,
extraJars: Seq[String] = Nil,
extraConf: Map[String, String] = Map(),
extraEnv: Map[String, String] = Map()): SparkAppHandle.State = {
extraEnv: Map[String, String] = Map(),
outFile: Option[File] = None): SparkAppHandle.State = {
val deployMode = if (clientMode) "client" else "cluster"
val propsFile = createConfFile(extraClassPath = extraClassPath, extraConf = extraConf)
val env = Map("YARN_CONF_DIR" -> hadoopConfDir.getAbsolutePath()) ++ extraEnv
Expand Down Expand Up @@ -161,6 +162,11 @@ abstract class BaseYarnClusterSuite
}
extraJars.foreach(launcher.addJar)

if (outFile.isDefined) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

outFile.foreach?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that foreach on an option is unclear (looks like a loop) and should be avoided unless it really simplifies the logic, which it doesn't do here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you do a foreach then the .get goes away and the code could be a little cleaner, but it's pretty minor.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like I said, I think foreach is a bad practice with options, so I'd rather not change to use it. I'd be happy to change this to a pattern match if you think it is really desirable to get rid of the .get.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the pattern match would be better than the get.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To me, either way is fine.

launcher.redirectOutput(outFile.get)
launcher.redirectError()
}

val handle = launcher.startApplication()
try {
eventually(timeout(2 minutes), interval(1 second)) {
Expand All @@ -179,17 +185,22 @@ abstract class BaseYarnClusterSuite
* the tests enforce that something is written to a file after everything is ok to indicate
* that the job succeeded.
*/
protected def checkResult(finalState: SparkAppHandle.State, result: File): Unit = {
checkResult(finalState, result, "success")
}

protected def checkResult(
finalState: SparkAppHandle.State,
result: File,
expected: String): Unit = {
finalState should be (SparkAppHandle.State.FINISHED)
expected: String = "success",
outFile: Option[File] = None): Unit = {
// the context message is passed to assert as Any instead of a function. to lazily load the
// output from the file, this passes an anonymous object that loads it in toString when building
// an error message
val output = new Object() {
override def toString: String = outFile
.map(Files.toString(_, StandardCharsets.UTF_8))
.getOrElse("(stdout/stderr was not captured)")
}
assert(finalState === SparkAppHandle.State.FINISHED, output)
val resultString = Files.toString(result, StandardCharsets.UTF_8)
resultString should be (expected)
assert(resultString === expected, output)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes aren't required for the Python memory setting, but without them the YarnClusterSuite produces no helpful information for debugging what went wrong in the tests. I think it is helpful to add this as part of this PR so that the errors will be shown in future test runs.

}

protected def mainClassName(klass: Class[_]): String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,15 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
val mod2Archive = TestUtils.createJarWithFiles(Map("mod2.py" -> TEST_PYMODULE), moduleDir)
val pyFiles = Seq(pyModule.getAbsolutePath(), mod2Archive.getPath()).mkString(",")
val result = File.createTempFile("result", null, tempDir)
val outFile = Some(File.createTempFile("stdout", null, tempDir))

val finalState = runSpark(clientMode, primaryPyFile.getAbsolutePath(),
sparkArgs = Seq("--py-files" -> pyFiles),
appArgs = Seq(result.getAbsolutePath()),
extraEnv = extraEnvVars,
extraConf = extraConf)
checkResult(finalState, result)
extraConf = extraConf,
outFile = outFile)
checkResult(finalState, result, outFile = outFile)
}

private def testUseClassPathFirst(clientMode: Boolean): Unit = {
Expand Down