Skip to content

Commit edc3cb7

Browse files
jian-hezjshen14
authored andcommitted
YARN-3273. Improve scheduler UI to facilitate scheduling analysis and debugging. Contributed Rohith Sharmaks
(cherry picked from commit 658097d) Conflicts: hadoop-yarn-project/CHANGES.txt hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppAttemptBlock.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/CapacitySchedulerLeafQueueInfo.java
1 parent 3f1d1d0 commit edc3cb7

File tree

21 files changed

+348
-60
lines changed

21 files changed

+348
-60
lines changed

hadoop-yarn-project/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,9 @@ Release 2.7.0 - UNRELEASED
338338
YARN-2777. Mark the end of individual log in aggregated log.
339339
(Varun Saxena via xgong)
340340

341+
YARN-3273. Improve scheduler UI to facilitate scheduling analysis and
342+
debugging. (Rohith Sharmaks via jianhe)
343+
341344
OPTIMIZATIONS
342345

343346
YARN-2990. FairScheduler's delay-scheduling always waits for node-local and

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppAttemptBlock.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,6 @@ public Collection<ContainerReport> run() throws Exception {
162162
._("Diagnostics Info:", appAttempt.getDiagnosticsInfo() == null ?
163163
"" : appAttempt.getDiagnosticsInfo());
164164

165-
html._(InfoBlock.class);
166-
167165
if (exceptionWhenGetContainerReports) {
168166
html
169167
.p()

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ public class RMAppAttemptMetrics {
4141
private ApplicationAttemptId attemptId = null;
4242
// preemption info
4343
private Resource resourcePreempted = Resource.newInstance(0, 0);
44+
// application headroom
45+
private volatile Resource applicationHeadroom = Resource.newInstance(0, 0);
4446
private AtomicInteger numNonAMContainersPreempted = new AtomicInteger(0);
4547
private AtomicBoolean isPreempted = new AtomicBoolean(false);
4648

@@ -145,4 +147,12 @@ public int[][] getLocalityStatistics() {
145147
public int getTotalAllocatedContainers() {
146148
return this.totalAllocatedContainers;
147149
}
150+
151+
public Resource getApplicationAttemptHeadroom() {
152+
return applicationHeadroom;
153+
}
154+
155+
public void setApplicationAttemptHeadRoom(Resource headRoom) {
156+
this.applicationHeadroom = headRoom;
157+
}
148158
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,4 +632,14 @@ public void incNumAllocatedContainers(NodeType containerType,
632632
requestType);
633633
}
634634
}
635+
636+
public void setApplicationHeadroomForMetrics(Resource headroom) {
637+
RMAppAttempt attempt =
638+
rmContext.getRMApps().get(attemptId.getApplicationId())
639+
.getCurrentAppAttempt();
640+
if (attempt != null) {
641+
attempt.getRMAppAttemptMetrics().setApplicationAttemptHeadRoom(
642+
Resources.clone(headroom));
643+
}
644+
}
635645
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -420,10 +420,13 @@ public synchronized User getUser(String userName) {
420420
*/
421421
public synchronized ArrayList<UserInfo> getUsers() {
422422
ArrayList<UserInfo> usersToReturn = new ArrayList<UserInfo>();
423-
for (Map.Entry<String, User> entry: users.entrySet()) {
424-
usersToReturn.add(new UserInfo(entry.getKey(), Resources.clone(
425-
entry.getValue().getUsed()), entry.getValue().getActiveApplications(),
426-
entry.getValue().getPendingApplications()));
423+
for (Map.Entry<String, User> entry : users.entrySet()) {
424+
User user = entry.getValue();
425+
usersToReturn.add(new UserInfo(entry.getKey(), Resources.clone(user
426+
.getUsed()), user.getActiveApplications(), user
427+
.getPendingApplications(), Resources.clone(user
428+
.getConsumedAMResources()), Resources.clone(user
429+
.getUserResourceLimit())));
427430
}
428431
return usersToReturn;
429432
}
@@ -1158,7 +1161,7 @@ private Resource computeUserLimit(FiCaSchedulerApp application,
11581161
" clusterCapacity: " + clusterResource
11591162
);
11601163
}
1161-
1164+
user.setUserResourceLimit(limit);
11621165
return limit;
11631166
}
11641167

@@ -1818,6 +1821,7 @@ resourceCalculator, this, getParent(), clusterResource,
18181821
@VisibleForTesting
18191822
public static class User {
18201823
ResourceUsage userResourceUsage = new ResourceUsage();
1824+
volatile Resource userResourceLimit = Resource.newInstance(0, 0);
18211825
int pendingApplications = 0;
18221826
int activeApplications = 0;
18231827

@@ -1887,6 +1891,14 @@ public void releaseContainer(Resource resource, Set<String> nodeLabels) {
18871891
}
18881892
}
18891893
}
1894+
1895+
public Resource getUserResourceLimit() {
1896+
return userResourceLimit;
1897+
}
1898+
1899+
public void setUserResourceLimit(Resource userResourceLimit) {
1900+
this.userResourceLimit = userResourceLimit;
1901+
}
18901902
}
18911903

18921904
@Override

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UserInfo.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,19 @@ public class UserInfo {
3232
protected ResourceInfo resourcesUsed;
3333
protected int numPendingApplications;
3434
protected int numActiveApplications;
35+
protected ResourceInfo AMResourceUsed;
36+
protected ResourceInfo userResourceLimit;
3537

3638
UserInfo() {}
3739

38-
UserInfo(String username, Resource resUsed, int activeApps, int pendingApps) {
40+
UserInfo(String username, Resource resUsed, int activeApps, int pendingApps,
41+
Resource amResUsed, Resource resourceLimit) {
3942
this.username = username;
4043
this.resourcesUsed = new ResourceInfo(resUsed);
4144
this.numActiveApplications = activeApps;
4245
this.numPendingApplications = pendingApps;
46+
this.AMResourceUsed = new ResourceInfo(amResUsed);
47+
this.userResourceLimit = new ResourceInfo(resourceLimit);
4348
}
4449

4550
public String getUsername() {
@@ -57,4 +62,12 @@ public int getNumPendingApplications() {
5762
public int getNumActiveApplications() {
5863
return numActiveApplications;
5964
}
65+
66+
public ResourceInfo getAMResourcesUsed() {
67+
return AMResourceUsed;
68+
}
69+
70+
public ResourceInfo getUserResourceLimit() {
71+
return userResourceLimit;
72+
}
6073
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,9 @@ public synchronized Allocation getAllocation(ResourceCalculator rc,
268268
minimumAllocation, numCont);
269269
ContainersAndNMTokensAllocation allocation =
270270
pullNewlyAllocatedContainersAndNMTokens();
271-
return new Allocation(allocation.getContainerList(), getHeadroom(), null,
271+
Resource headroom = getHeadroom();
272+
setApplicationHeadroomForMetrics(headroom);
273+
return new Allocation(allocation.getContainerList(), headroom, null,
272274
currentContPreemption, Collections.singletonList(rr),
273275
allocation.getNMTokenList());
274276
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -938,9 +938,10 @@ clusterResource, minimumAllocation, getMaximumResourceCapability(),
938938
application.updateBlacklist(blacklistAdditions, blacklistRemovals);
939939
ContainersAndNMTokensAllocation allocation =
940940
application.pullNewlyAllocatedContainersAndNMTokens();
941-
return new Allocation(allocation.getContainerList(),
942-
application.getHeadroom(), preemptionContainerIds, null, null,
943-
allocation.getNMTokenList());
941+
Resource headroom = application.getHeadroom();
942+
application.setApplicationHeadroomForMetrics(headroom);
943+
return new Allocation(allocation.getContainerList(), headroom,
944+
preemptionContainerIds, null, null, allocation.getNMTokenList());
944945
}
945946
}
946947

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -343,9 +343,10 @@ public Allocation allocate(
343343
application.updateBlacklist(blacklistAdditions, blacklistRemovals);
344344
ContainersAndNMTokensAllocation allocation =
345345
application.pullNewlyAllocatedContainersAndNMTokens();
346-
return new Allocation(allocation.getContainerList(),
347-
application.getHeadroom(), null, null, null,
348-
allocation.getNMTokenList());
346+
Resource headroom = application.getHeadroom();
347+
application.setApplicationHeadroomForMetrics(headroom);
348+
return new Allocation(allocation.getContainerList(), headroom, null,
349+
null, null, allocation.getNMTokenList());
349350
}
350351
}
351352

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java

Lines changed: 42 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
3838
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
3939
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.LI;
40+
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
41+
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY;
4042
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.UL;
4143
import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
4244
import org.apache.hadoop.yarn.webapp.view.InfoBlock;
@@ -67,41 +69,8 @@ static class LeafQueueInfoBlock extends HtmlBlock {
6769
lqinfo = (CapacitySchedulerLeafQueueInfo) info.qinfo;
6870
}
6971

70-
//Return a string describing one resource as a percentage of another
71-
private String getPercentage(ResourceInfo numerator, ResourceInfo denominator) {
72-
StringBuilder percentString = new StringBuilder("Memory: ");
73-
if (numerator != null) {
74-
percentString.append(numerator.getMemory());
75-
}
76-
if (denominator.getMemory() != 0) {
77-
percentString.append(" (<span title='of used resources in this queue'>")
78-
.append(StringUtils.format("%.2f", numerator.getMemory() * 100.0 /
79-
denominator.getMemory()) + "%</span>)");
80-
}
81-
percentString.append(", vCores: ");
82-
if (numerator != null) {
83-
percentString.append(numerator.getvCores());
84-
}
85-
if (denominator.getvCores() != 0) {
86-
percentString.append(" (<span title='of used resources in this queue'>")
87-
.append(StringUtils.format("%.2f", numerator.getvCores() * 100.0 /
88-
denominator.getvCores()) + "%</span>)");
89-
}
90-
return percentString.toString();
91-
}
92-
9372
@Override
9473
protected void render(Block html) {
95-
StringBuilder activeUserList = new StringBuilder("");
96-
ResourceInfo usedResources = lqinfo.getResourcesUsed();
97-
ArrayList<UserInfo> users = lqinfo.getUsers().getUsersList();
98-
for (UserInfo entry: users) {
99-
activeUserList.append(entry.getUsername()).append(" &lt;")
100-
.append(getPercentage(entry.getResourcesUsed(), usedResources))
101-
.append(", Schedulable Apps: " + entry.getNumActiveApplications())
102-
.append(", Non-Schedulable Apps: " + entry.getNumPendingApplications())
103-
.append("&gt;<br style='display:block'>"); //Force line break
104-
}
10574

10675
ResponseInfo ri = info("\'" + lqinfo.getQueuePath().substring(5) + "\' Queue Status").
10776
_("Queue State:", lqinfo.getQueueState()).
@@ -116,12 +85,12 @@ protected void render(Block html) {
11685
_("Max Applications:", Integer.toString(lqinfo.getMaxApplications())).
11786
_("Max Applications Per User:", Integer.toString(lqinfo.getMaxApplicationsPerUser())).
11887
_("Max Application Master Resources:", lqinfo.getAMResourceLimit().toString()).
88+
_("Used Application Master Resources:", lqinfo.getUsedAMResource().toString()).
11989
_("Max Application Master Resources Per User:", lqinfo.getUserAMResourceLimit().toString()).
12090
_("Configured Capacity:", percent(lqinfo.getCapacity() / 100)).
12191
_("Configured Max Capacity:", percent(lqinfo.getMaxCapacity() / 100)).
12292
_("Configured Minimum User Limit Percent:", Integer.toString(lqinfo.getUserLimit()) + "%").
12393
_("Configured User Limit Factor:", String.format("%.1f", lqinfo.getUserLimitFactor())).
124-
_r("Active Users: ", activeUserList.toString()).
12594
_("Accessible Node Labels:", StringUtils.join(",", lqinfo.getNodeLabels())).
12695
_("Preemption:", lqinfo.getPreemptionDisabled() ? "disabled" : "enabled");
12796

@@ -132,6 +101,44 @@ protected void render(Block html) {
132101
}
133102
}
134103

104+
static class QueueUsersInfoBlock extends HtmlBlock {
105+
final CapacitySchedulerLeafQueueInfo lqinfo;
106+
107+
@Inject
108+
QueueUsersInfoBlock(ViewContext ctx, CSQInfo info) {
109+
super(ctx);
110+
lqinfo = (CapacitySchedulerLeafQueueInfo) info.qinfo;
111+
}
112+
113+
@Override
114+
protected void render(Block html) {
115+
TBODY<TABLE<Hamlet>> tbody =
116+
html.table("#userinfo").thead().$class("ui-widget-header").tr().th()
117+
.$class("ui-state-default")._("User Name")._().th()
118+
.$class("ui-state-default")._("Max Resource")._().th()
119+
.$class("ui-state-default")._("Used Resource")._().th()
120+
.$class("ui-state-default")._("Max AM Resource")._().th()
121+
.$class("ui-state-default")._("Used AM Resource")._().th()
122+
.$class("ui-state-default")._("Schedulable Apps")._().th()
123+
.$class("ui-state-default")._("Non-Schedulable Apps")._()._()._()
124+
.tbody();
125+
126+
ArrayList<UserInfo> users = lqinfo.getUsers().getUsersList();
127+
for (UserInfo userInfo : users) {
128+
tbody.tr().td(userInfo.getUsername())
129+
.td(userInfo.getUserResourceLimit().toString())
130+
.td(userInfo.getResourcesUsed().toString())
131+
.td(lqinfo.getUserAMResourceLimit().toString())
132+
.td(userInfo.getAMResourcesUsed().toString())
133+
.td(Integer.toString(userInfo.getNumActiveApplications()))
134+
.td(Integer.toString(userInfo.getNumPendingApplications()))._();
135+
}
136+
137+
html.div().$class("usersinfo").h5("Active Users Info")._();
138+
tbody._()._();
139+
}
140+
}
141+
135142
public static class QueueBlock extends HtmlBlock {
136143
final CSQInfo csqinfo;
137144

@@ -166,6 +173,7 @@ public void render(Block html) {
166173
csqinfo.qinfo = info;
167174
if (info.getQueues() == null) {
168175
li.ul("#lq").li()._(LeafQueueInfoBlock.class)._()._();
176+
li.ul("#lq").li()._(QueueUsersInfoBlock.class)._()._();
169177
} else {
170178
li._(QueueBlock.class);
171179
}

0 commit comments

Comments
 (0)