Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
2568a6c
Rename JobProgressPage to AllStagesPage:
JoshRosen Oct 29, 2014
4487dcb
[SPARK-4145] Web UI job pages
JoshRosen Oct 30, 2014
bfce2b9
Address review comments, except for progress bar.
JoshRosen Nov 6, 2014
4b206fb
Merge remote-tracking branch 'origin/master' into job-page
JoshRosen Nov 6, 2014
45343b8
More comments
JoshRosen Nov 6, 2014
a475ea1
Add progress bars to jobs page.
JoshRosen Nov 11, 2014
56701fa
Move last stage name / description logic out of markup.
JoshRosen Nov 11, 2014
1cf4987
Fix broken kill links; add Selenium test to avoid future regressions.
JoshRosen Nov 11, 2014
85e9c85
Extract startTime into separate variable.
JoshRosen Nov 11, 2014
4d58e55
Change label to "Tasks (for all stages)"
JoshRosen Nov 11, 2014
4846ce4
Hide "(Job Group") if no jobs were submitted in job groups.
JoshRosen Nov 12, 2014
b7bf30e
Add stages progress bar; fix bug where active stages show as completed.
JoshRosen Nov 12, 2014
8a2351b
Add help tooltip to Spark Jobs page.
JoshRosen Nov 12, 2014
3d0a007
Merge remote-tracking branch 'origin/master' into job-page
JoshRosen Nov 17, 2014
1145c60
Display text instead of progress bar for stages.
JoshRosen Nov 17, 2014
d62ea7b
Add failing Selenium test for stage overcounting issue.
JoshRosen Nov 17, 2014
79793cd
Track indices of completed stage to avoid overcounting when failures …
JoshRosen Nov 18, 2014
5884f91
Add StageInfos to SparkListenerJobStart event.
JoshRosen Nov 18, 2014
8ab6c28
Compute numTasks from job start stage infos.
JoshRosen Nov 18, 2014
8955f4c
Display information for pending stages on jobs page.
JoshRosen Nov 19, 2014
e2f2c43
Fix sorting of stages in job details page.
JoshRosen Nov 19, 2014
171b53c
Move `startTime` to the start of SparkContext.
JoshRosen Nov 19, 2014
f2a15da
Add status field to job details page.
JoshRosen Nov 19, 2014
5eb39dc
Add pending stages table to job page.
JoshRosen Nov 19, 2014
d69c775
Fix table sorting on all jobs page.
JoshRosen Nov 19, 2014
7d10b97
Merge remote-tracking branch 'apache/master' into job-page
JoshRosen Nov 20, 2014
67080ba
Ensure that "phantom stages" don't cause memory leaks.
JoshRosen Nov 20, 2014
eebdc2c
Don’t display pending stages for completed jobs.
JoshRosen Nov 20, 2014
034aa8d
Use `.max()` to find result stage for job.
JoshRosen Nov 20, 2014
0b77e3e
More bug fixes for phantom stages.
JoshRosen Nov 20, 2014
1f45d44
Incorporate a bunch of minor review feedback.
JoshRosen Nov 20, 2014
61c265a
Add “skipped stages” table; only display non-empty tables.
JoshRosen Nov 20, 2014
2bbf41a
Update job progress bar to reflect skipped tasks/stages.
JoshRosen Nov 20, 2014
6f17f3f
Only store StageInfos in SparkListenerJobStart event.
JoshRosen Nov 21, 2014
ff804cd
Don't write "Stage Ids" field in JobStartEvent JSON.
JoshRosen Nov 21, 2014
b89c258
More JSON protocol backwards-compatibility fixes.
JoshRosen Nov 21, 2014
f00c851
Fix JsonProtocol compatibility
JoshRosen Nov 21, 2014
eb05e90
Disable kill button in completed stages tables.
JoshRosen Nov 24, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add “skipped stages” table; only display non-empty tables.
  • Loading branch information
JoshRosen committed Nov 20, 2014
commit 61c265aac63d1621bf212245ddb25b703cf3a807
108 changes: 74 additions & 34 deletions core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,14 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
new StageInfo(stageId, 0, "Unknown", 0, Seq.empty, "Unknown"))
}

val pendingStages = mutable.Buffer[StageInfo]()
val activeStages = mutable.Buffer[StageInfo]()
val completedStages = mutable.Buffer[StageInfo]()
// If the job is completed, then any pending stages are displayed as "skipped":
val pendingOrSkippedStages = mutable.Buffer[StageInfo]()
val failedStages = mutable.Buffer[StageInfo]()
for (stage <- stages) {
if (stage.submissionTime.isEmpty) {
if (!isComplete) {
pendingStages += stage
} else {
// Do nothing so that we don't display pending stages for completed jobs
}
pendingOrSkippedStages += stage
} else if (stage.completionTime.isDefined) {
if (stage.failureReason.isDefined) {
failedStages += stage
Expand All @@ -77,8 +74,8 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
new StageTableBase(activeStages.sortBy(_.submissionTime).reverse,
parent.basePath, parent.listener, isFairScheduler = parent.isFairScheduler,
killEnabled = parent.killEnabled)
val pendingStagesTable =
new StageTableBase(pendingStages.sortBy(_.stageId).reverse,
val pendingOrSkippedStagesTable =
new StageTableBase(pendingOrSkippedStages.sortBy(_.stageId).reverse,
parent.basePath, parent.listener, isFairScheduler = parent.isFairScheduler,
killEnabled = false)
val completedStagesTable =
Expand All @@ -89,6 +86,12 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
new FailedStageTable(failedStages.sortBy(_.submissionTime).reverse, parent.basePath,
parent.listener, isFairScheduler = parent.isFairScheduler)

val shouldShowActiveStages = activeStages.nonEmpty
val shouldShowPendingStages = !isComplete && pendingOrSkippedStages.nonEmpty
val shouldShowCompletedStages = completedStages.nonEmpty
val shouldShowSkippedStages = isComplete && pendingOrSkippedStages.nonEmpty
val shouldShowFailedStages = failedStages.nonEmpty

val summary: NodeSeq =
<div>
<ul class="unstyled">
Expand All @@ -102,36 +105,73 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
<strong>Job Group:</strong>
{jobData.jobGroup.get}
</li>
} else Seq.empty
}
}
{
if (shouldShowActiveStages) {
<li>
<a href="#active"><strong>Active Stages:</strong></a>
{activeStages.size}
</li>
}
}
{
if (shouldShowPendingStages) {
<li>
<a href="#pending">
<strong>Pending Stages:</strong>
</a>{pendingOrSkippedStages.size}
</li>
}
}
{
if (shouldShowCompletedStages) {
<li>
<a href="#completed"><strong>Completed Stages:</strong></a>
{completedStages.size}
</li>
}
}
{
if (shouldShowSkippedStages) {
<li>
<a href="#skipped"><strong>Skipped Stages:</strong></a>
{pendingOrSkippedStages.size}
</li>
}
}
{
if (shouldShowFailedStages) {
<li>
<a href="#failed"><strong>Failed Stages:</strong></a>
{failedStages.size}
</li>
}
}
<li>
<a href="#active"><strong>Active Stages:</strong></a>
{activeStages.size}
</li>
<li>
<a href="#pending"><strong>Pending Stages:</strong></a>
{pendingStages.size}
</li>
<li>
<a href="#completed"><strong>Completed Stages:</strong></a>
{completedStages.size}
</li>
<li>
<a href="#failed"><strong>Failed Stages:</strong></a>
{failedStages.size}
</li>
</ul>
</div>

val content = summary ++
<h4 id="active">Active Stages ({activeStages.size})</h4> ++
activeStagesTable.toNodeSeq ++
<h4 id="pending">Pending Stages ({pendingStages.size})</h4> ++
pendingStagesTable.toNodeSeq ++
<h4 id="completed">Completed Stages ({completedStages.size})</h4> ++
completedStagesTable.toNodeSeq ++
<h4 id ="failed">Failed Stages ({failedStages.size})</h4> ++
failedStagesTable.toNodeSeq
var content = summary
if (shouldShowActiveStages) {
content ++= <h4 id="active">Active Stages ({activeStages.size})</h4> ++
activeStagesTable.toNodeSeq
}
if (shouldShowPendingStages) {
content ++= <h4 id="pending">Pending Stages ({pendingOrSkippedStages.size})</h4> ++
pendingOrSkippedStagesTable.toNodeSeq
}
if (shouldShowCompletedStages) {
content ++= <h4 id="completed">Completed Stages ({completedStages.size})</h4> ++
completedStagesTable.toNodeSeq
}
if (shouldShowSkippedStages) {
content ++= <h4 id="skipped">Skipped Stages ({pendingOrSkippedStages.size})</h4> ++
pendingOrSkippedStagesTable.toNodeSeq
}
if (shouldShowFailedStages) {
content ++= <h4 id ="failed">Failed Stages ({failedStages.size})</h4> ++
failedStagesTable.toNodeSeq
}
UIUtils.headerSparkPage(s"Details for Job $jobId", content, parent)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
// Stages:
val activeStages = new HashMap[StageId, StageInfo]
val completedStages = ListBuffer[StageInfo]()
val skippedStages = ListBuffer[StageInfo]()
val failedStages = ListBuffer[StageInfo]()
val stageIdToData = new HashMap[(StageId, StageAttemptId), StageUIData]
val stageIdToInfo = new HashMap[StageId, StageInfo]
Expand Down Expand Up @@ -106,6 +107,7 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
"completedJobs" -> completedJobs.size,
"failedJobs" -> failedJobs.size,
"completedStages" -> completedStages.size,
"skippedStages" -> skippedStages.size,
"failedStages" -> failedStages.size
)
}
Expand Down Expand Up @@ -199,13 +201,10 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
stageIdToActiveJobIds.get(stageId).foreach { jobsUsingStage =>
jobsUsingStage.remove(jobEnd.jobId)
stageIdToInfo.get(stageId).foreach { stageInfo =>
// If this is a pending stage and no other job depends on it, then it won't be run.
// To prevent memory leaks, remove this data since it won't be cleaned up as stages
// finish / fail:
val isPendingStage = stageInfo.submissionTime.isEmpty && stageInfo.completionTime.isEmpty
if (isPendingStage && jobsUsingStage.isEmpty) {
stageIdToInfo.remove(stageId)
stageIdToData.remove((stageId, stageInfo.attemptId))
if (stageInfo.submissionTime.isEmpty) {
// if this stage is pending, it won't complete, so mark it as "skipped":
skippedStages += stageInfo
trimStagesIfNecessary(skippedStages)
}
}
}
Expand Down
20 changes: 15 additions & 5 deletions core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers {
rdd.countAsync()
eventually(timeout(10 seconds), interval(50 milliseconds)) {
go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs/job/?id=0")
find(id("active")).get.text should be ("Active Stages (1)")
find(id("pending")).get.text should be ("Pending Stages (2)")
// Essentially, we want to check that none of the stage rows show
// "No data available for this stage". Checking for the absence of that string is brittle
// because someone could change the error message and cause this test to pass by accident.
Expand All @@ -222,7 +224,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers {
}
}

test("stages that aren't run do not show up in 'pending stages' after a job finishes") {
test("stages that aren't run appear as 'skipped stages' after a job finishes") {
withSpark(newSparkContext()) { sc =>
// Create an RDD that involves multiple stages:
val rdd =
Expand All @@ -233,15 +235,23 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers {
rdd.count()
eventually(timeout(10 seconds), interval(50 milliseconds)) {
go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs/job/?id=1")
find(id("pending")).get.text should be ("Pending Stages (0)")
find(id("active")).get.text should be ("Active Stages (0)")
find(id("pending")) should be (None)
find(id("active")) should be (None)
find(id("failed")) should be (None)
find(id("completed")).get.text should be ("Completed Stages (1)")
find(id("failed")).get.text should be ("Failed Stages (0)")
find(id("skipped")).get.text should be ("Skipped Stages (2)")
// Essentially, we want to check that none of the stage rows show
// "No data available for this stage". Checking for the absence of that string is brittle
// because someone could change the error message and cause this test to pass by accident.
// Instead, it's safer to check that each row contains a link to a stage details page.
findAll(cssSelector("tbody tr")).foreach { row =>
val link = row.underlying.findElement(By.xpath(".//a"))
link.getAttribute("href") should include("stage")
}
}
}
}


test("jobs with stages that are skipped should show correct link descriptions on all jobs page") {
withSpark(newSparkContext()) { sc =>
// Create an RDD that involves multiple stages:
Expand Down