@@ -34,45 +34,45 @@ private[mesos] class MesosCoarseGrainedSchedulerSource(
3434 scheduler : MesosCoarseGrainedSchedulerBackend )
3535 extends Source with MesosSchedulerUtils {
3636
37- override val sourceName : String = " mesos_cluster "
37+ override val sourceName : String = " mesos "
3838 override val metricRegistry : MetricRegistry = new MetricRegistry
3939
4040 // EXECUTOR STATE POLLING METRICS:
4141 // These metrics periodically poll the scheduler for its state, including resource allocation and
4242 // task states.
4343
4444 // Number of CPUs used
45- metricRegistry.register(MetricRegistry .name(" executor " , " resource" , " cores" ), new Gauge [Double ] {
45+ metricRegistry.register(MetricRegistry .name(" resource" , " cores" ), new Gauge [Double ] {
4646 override def getValue : Double = scheduler.getCoresUsed
4747 })
4848 // Number of CPUs vs max
4949 if (scheduler.getMaxCores != 0 ) {
50- metricRegistry.register(MetricRegistry .name(" executor " , " resource" , " cores_of_max" ),
50+ metricRegistry.register(MetricRegistry .name(" resource" , " cores_of_max" ),
5151 new Gauge [Double ] {
5252 // Note: See above div0 check before calling register()
5353 override def getValue : Double = scheduler.getCoresUsed / scheduler.getMaxCores
5454 })
5555 }
5656 // Number of CPUs per task
57- metricRegistry.register(MetricRegistry .name(" executor " , " resource" , " mean_cores_per_task" ),
57+ metricRegistry.register(MetricRegistry .name(" resource" , " mean_cores_per_task" ),
5858 new Gauge [Double ] {
5959 override def getValue : Double = scheduler.getMeanCoresPerTask
6060 })
6161
6262 // Number of GPUs used
63- metricRegistry.register(MetricRegistry .name(" executor " , " resource" , " gpus" ), new Gauge [Double ] {
63+ metricRegistry.register(MetricRegistry .name(" resource" , " gpus" ), new Gauge [Double ] {
6464 override def getValue : Double = scheduler.getGpusUsed
6565 })
6666 // Number of GPUs vs max
6767 if (scheduler.getMaxGpus != 0 ) {
68- metricRegistry.register(MetricRegistry .name(" executor " , " resource" , " gpus_of_max" ),
68+ metricRegistry.register(MetricRegistry .name(" resource" , " gpus_of_max" ),
6969 new Gauge [Double ] {
7070 // Note: See above div0 check before calling register()
7171 override def getValue : Double = scheduler.getGpusUsed / scheduler.getMaxGpus
7272 })
7373 }
7474 // Number of GPUs per task
75- metricRegistry.register(MetricRegistry .name(" executor " , " resource" , " mean_gpus_per_task" ),
75+ metricRegistry.register(MetricRegistry .name(" resource" , " mean_gpus_per_task" ),
7676 new Gauge [Double ] {
7777 override def getValue : Double = scheduler.getMeanGpusPerTask
7878 })
@@ -84,7 +84,7 @@ private[mesos] class MesosCoarseGrainedSchedulerSource(
8484 // Number of tasks vs max
8585 if (scheduler.isExecutorLimitEnabled) {
8686 // executorLimit is assigned asynchronously, so it may start off with a zero value.
87- metricRegistry.register(MetricRegistry .name(" executor " , " count_of_max" ), new Gauge [Int ] {
87+ metricRegistry.register(MetricRegistry .name(" count_of_max" ), new Gauge [Int ] {
8888 override def getValue : Int = {
8989 if (scheduler.getExecutorLimit == 0 ) {
9090 0
@@ -95,19 +95,19 @@ private[mesos] class MesosCoarseGrainedSchedulerSource(
9595 })
9696 }
9797 // Number of task failures
98- metricRegistry.register(MetricRegistry .name(" executor " , " failures" ), new Gauge [Int ] {
98+ metricRegistry.register(MetricRegistry .name(" failures" ), new Gauge [Int ] {
9999 override def getValue : Int = scheduler.getTaskFailureCount
100100 })
101101 // Number of tracked agents regardless of whether we're currently present on them
102- metricRegistry.register(MetricRegistry .name(" executor " , " known_agents" ), new Gauge [Int ] {
102+ metricRegistry.register(MetricRegistry .name(" known_agents" ), new Gauge [Int ] {
103103 override def getValue : Int = scheduler.getKnownAgentsCount
104104 })
105105 // Number of tracked agents with tasks on them
106- metricRegistry.register(MetricRegistry .name(" executor " , " occupied_agents" ), new Gauge [Int ] {
106+ metricRegistry.register(MetricRegistry .name(" occupied_agents" ), new Gauge [Int ] {
107107 override def getValue : Int = scheduler.getOccupiedAgentsCount
108108 })
109109 // Number of blacklisted agents (too many failures)
110- metricRegistry.register(MetricRegistry .name(" executor " , " blacklisted_agents" ), new Gauge [Int ] {
110+ metricRegistry.register(MetricRegistry .name(" blacklisted_agents" ), new Gauge [Int ] {
111111 override def getValue : Int = scheduler.getBlacklistedAgentCount
112112 })
113113
@@ -116,63 +116,63 @@ private[mesos] class MesosCoarseGrainedSchedulerSource(
116116
117117 // Rate of offers received (total number of offers, not offer RPCs)
118118 private val offerCounter =
119- metricRegistry.counter(MetricRegistry .name(" executor " , " mesos " , " offer " ))
119+ metricRegistry.counter(MetricRegistry .name(" offers " , " received " ))
120120 // Rate of all offers declined, sum of the following reasons for declines
121121 private val declineCounter =
122- metricRegistry.counter(MetricRegistry .name(" executor " , " mesos " , " decline " ))
122+ metricRegistry.counter(MetricRegistry .name(" offers " , " declined " ))
123123 // Offers declined for unmet requirements (with RejectOfferDurationForUnmetConstraints)
124124 private val declineUnmetCounter =
125- metricRegistry.counter(MetricRegistry .name(" executor " , " mesos " , " decline_unmet " ))
125+ metricRegistry.counter(MetricRegistry .name(" offers " , " declined_unmet " ))
126126 // Offers declined when the deployment is finished (with RejectOfferDurationForReachedMaxCores)
127127 private val declineFinishedCounter =
128- metricRegistry.counter(MetricRegistry .name(" executor " , " mesos " , " decline_finished " ))
128+ metricRegistry.counter(MetricRegistry .name(" offers " , " declined_finished " ))
129129 // Offers declined when offers are being unused (no duration in the decline filter)
130130 private val declineUnusedCounter =
131- metricRegistry.counter(MetricRegistry .name(" executor " , " mesos " , " decline_unused " ))
131+ metricRegistry.counter(MetricRegistry .name(" offers " , " declined_unused " ))
132132 // Rate of revive operations
133133 private val reviveCounter =
134- metricRegistry.counter(MetricRegistry .name(" executor " , " mesos " , " revive " ))
134+ metricRegistry.counter(MetricRegistry .name(" offers " , " revived " ))
135135 // Rate of launch operations
136136 private val launchCounter =
137- metricRegistry.counter(MetricRegistry .name(" executor " , " mesos " , " launch " ))
137+ metricRegistry.counter(MetricRegistry .name(" offers " , " launched " ))
138138
139139 // Counters for Spark states on launched executors (LAUNCHING, RUNNING, ...)
140140 private val sparkStateCounters = TaskState .values
141141 .map(state => (state, metricRegistry.counter(
142- MetricRegistry .name(" executor " , " spark_state" , state.toString.toLowerCase))))
142+ MetricRegistry .name(" spark_state" , state.toString.toLowerCase))))
143143 .toMap
144144 private val sparkUnknownStateCounter =
145- metricRegistry.counter(MetricRegistry .name(" executor " , " spark_state" , " UNKNOWN" ))
145+ metricRegistry.counter(MetricRegistry .name(" spark_state" , " UNKNOWN" ))
146146 // Counters for Mesos states on launched executors (TASK_RUNNING, TASK_LOST, ...),
147147 // more granular than sparkStateCounters
148148 private val mesosStateCounters = MesosTaskState .values
149149 .map(state => (state, metricRegistry.counter(
150- MetricRegistry .name(" executor " , " mesos_state" , state.name.toLowerCase))))
150+ MetricRegistry .name(" mesos_state" , state.name.toLowerCase))))
151151 .toMap
152152 private val mesosUnknownStateCounter =
153- metricRegistry.counter(MetricRegistry .name(" executor " , " mesos_state" , " UNKNOWN" ))
153+ metricRegistry.counter(MetricRegistry .name(" mesos_state" , " UNKNOWN" ))
154154
155155 // TASK TIMER METRICS:
156156 // These metrics measure the duration to launch and run executors
157157
158158 // Duration from driver start to the first task launching.
159159 private val startToFirstLaunched =
160- metricRegistry.timer(MetricRegistry .name(" executor " , " start_to_first_launched" ))
160+ metricRegistry.timer(MetricRegistry .name(" start_to_first_launched" ))
161161 // Duration from driver start to the first task running.
162162 private val startToFirstRunning =
163- metricRegistry.timer(MetricRegistry .name(" executor " , " start_to_first_running" ))
163+ metricRegistry.timer(MetricRegistry .name(" start_to_first_running" ))
164164
165165 // Duration from driver start to maxCores footprint being filled
166166 private val startToAllLaunched =
167- metricRegistry.timer(MetricRegistry .name(" executor " , " start_to_all_launched" ))
167+ metricRegistry.timer(MetricRegistry .name(" start_to_all_launched" ))
168168
169169 // Duration between an executor launch and the executor entering a given spark state, e.g. RUNNING
170170 private val launchToSparkStateTimers = TaskState .values
171171 .map(state => (state, metricRegistry.timer(
172- MetricRegistry .name(" executor " , " launch_to_spark_state" , state.toString.toLowerCase))))
172+ MetricRegistry .name(" launch_to_spark_state" , state.toString.toLowerCase))))
173173 .toMap
174174 private val launchToUnknownSparkStateTimer = metricRegistry.timer(
175- MetricRegistry .name(" executor " , " launch_to_spark_state" , " UNKNOWN" ))
175+ MetricRegistry .name(" launch_to_spark_state" , " UNKNOWN" ))
176176
177177 // Time that the scheduler was initialized. This is the 'start time'.
178178 private val schedulerInitTime = new Date
0 commit comments