diff --git a/.github/labeler.yml b/.github/labeler.yml
new file mode 100644
index 00000000..1b2a9700
--- /dev/null
+++ b/.github/labeler.yml
@@ -0,0 +1,13 @@
+#
+#  Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+#  file except in compliance with the License. You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+#  an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#
+
+# Add label to any changes within the folder or any subfolders
+xinfra-monitor:
+  - src/main/java/com/linkedin/xinfra/monitor/**/*
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 00000000..8b410884
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,27 @@
+name: CI
+
+on:
+  pull_request:
+    types: ['opened', 'synchronize']
+
+jobs:
+  ci:
+    name: ci
+    strategy:
+      matrix:
+        version: ['11.0.13']
+        dist: ['microsoft']
+
+    runs-on: ubuntu-latest
+    steps:
+    - name: checkout code
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 0
+    - name: set up JDK ${{matrix.version}} (${{matrix.dist}})
+      uses: actions/setup-java@v3
+      with:
+        java-version: ${{ matrix.version }}
+        distribution: ${{ matrix.dist }}
+    - name: test
+      run: ./gradlew --info test --no-daemon
diff --git a/.github/workflows/greetings.yml b/.github/workflows/greetings.yml
new file mode 100644
index 00000000..aa455d62
--- /dev/null
+++ b/.github/workflows/greetings.yml
@@ -0,0 +1,17 @@
+name: Greetings
+
+on:
+  pull_request:
+    types: ['opened']
+  issues:
+    types: ['opened']
+
+jobs:
+  greeting:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/first-interaction@v1
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        issue-message: 'This is your first issue in the repository. Thank you for raising this issue.'' first issue'
+        pr-message: 'This is your first pull request in the repository. Thank you for this patch. Please review the Wiki page in the repository before submitting a PR.'' first pr'
diff --git a/.github/workflows/label.yml b/.github/workflows/label.yml
new file mode 100644
index 00000000..96317667
--- /dev/null
+++ b/.github/workflows/label.yml
@@ -0,0 +1,25 @@
+# This workflow will triage pull requests and apply a label based on the
+# paths that are modified in the pull request.
+#
+# To use this workflow, you will need to set up a .github/labeler.yml
+# file with configuration.  For more information, see:
+# https://github.com/actions/labeler/blob/master/README.md
+
+name: "Pull Request Labeler"
+on:
+  - pull_request
+
+jobs:
+  triage:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/labeler@v3-preview
+        with:
+          repo-token: "${{ secrets.GITHUB_TOKEN }}"
+          
+          
+          
+          
+          
+          
+          
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 00000000..34045177
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,19 @@
+name: Mark stale issues and pull requests
+
+on:
+  schedule:
+  - cron: "30 1 * * *"
+
+jobs:
+  stale:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/stale@v1
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        stale-issue-message: 'Stale issue message'
+        stale-pr-message: 'Stale pull request message'
+        stale-issue-label: 'no-issue-activity'
+        stale-pr-label: 'no-pr-activity'
diff --git a/.github/workflows/tag.yml b/.github/workflows/tag.yml
new file mode 100644
index 00000000..e5356484
--- /dev/null
+++ b/.github/workflows/tag.yml
@@ -0,0 +1,35 @@
+name: tag (release) flow
+
+on:
+  create:
+    tags:
+      - '*'
+
+jobs:
+  gradle-java8:
+    name: Java 8 release
+    runs-on: ubuntu-latest
+    steps:
+    - name: checkout code
+      uses: actions/checkout@v3
+      with:
+        # bring in all history because the gradle versions plugin needs to "walk back" to the closest ancestor tag
+        # to figure out what version this is. optimizing this is left as a challenge to future committers
+        fetch-depth: 0
+    - name: Set up JDK
+      uses: actions/setup-java@v3
+      with:
+        java-version: 11
+        distribution: microsoft
+    - name: Build with Gradle
+        # add --info or --debug below for more details when trying to understand issues
+      run: ./gradlew clean build javadoc --stacktrace --warning-mode all --no-daemon
+    - name: Branch tag
+      id: branch_tag
+      run: echo "RELEASE_TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
+    - name: Publish to Jfrog
+      env:
+        JFROG_USER: ${{ secrets.JFROG_USER }}
+        JFROG_KEY: ${{ secrets.JFROG_KEY }}
+        RELEASE_TAG: ${{ steps.branch_tag.outputs.RELEASE_TAG }}
+      run: ./scripts/publishToJfrog.sh
diff --git a/.gitignore b/.gitignore
index b8ae435e..5b4afd4a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,15 @@
 .DS_Store
 build/
 logs/
+.classpath
+.idea/
+.project
+.settings/
+src/test/java/com/linkedin/xinfra/monitor/RandomTests.java
+
+config/andrew-choi.properties
+config/andrew-multi-cluster-monitor.properties
+
+kafka-monitor.iml
+kafka-monitor.ipr
+kafka-monitor.iws
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 0d5889e9..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-language: java
-
-jdk:
-  - oraclejdk7
-  - openjdk7
-  - oraclejdk8
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 00000000..118ac0fd
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,80 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+ advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+ address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at andchoi@linkedin.com. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+## FAQ
+
+For LinkedIn Code of Conduct (OSS Code of Conduct) issues or inquiries, Global Compliance & Integrity inbox, please email integrity@linkedin.com.
+
+
+
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000..7d1d7f31
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,27 @@
+Contribution Agreement
+======================
+
+As a contributor, you represent that the code you submit is your
+original work or that of your employer (in which case you represent you
+have the right to bind your employer).  By submitting code, you (and, if
+applicable, your employer) are licensing the submitted code to LinkedIn
+and the open source community subject to the Apache 2.0 license. 
+
+Responsible Disclosure of Security Vulnerabilities
+==================================================
+
+Please do not file reports on Github for security issues.
+Please review the guidelines on at 
+https://www.linkedin.com/help/linkedin/answer/62924/security-vulnerabilities?lang=en
+
+Tips for Getting Your Pull Request Accepted
+===========================================
+
+1. Make sure all new features are tested and the tests pass.
+2. Bug fixes must include a test case demonstrating the error that it fixes.
+
+Reporting Issues
+===============
+Please use the [link](https://github.com/linkedin/kafka-monitor/issues/new) for reporting any issues.
+
+
diff --git a/LICENSE b/LICENSE
index 02c5bb4d..0d5476d3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,3 @@
-
                                  Apache License
                            Version 2.0, January 2004
                         http://www.apache.org/licenses/
@@ -187,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2016 LinkedIn Corp. All rights reserved.
+   Copyright 2016, 2017, 2018, 2019, 2020, 2021 LinkedIn Corp. All rights reserved.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/NOTICE b/NOTICE
index ee9a4345..d02fd50c 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,4 +1,4 @@
-Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+Copyright 2016, 2017, 2018, 2019, 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
 file except in compliance with the License. You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 
@@ -34,18 +34,3 @@ License: http://www.json.org/license.html
 This product includes/uses JUnit (https://http://junit.org/)
 Copyright 2002-2016 JUnit
 License: Eclipse Public License 1.0
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/README.md b/README.md
index 7b5ba295..8313ba42 100644
--- a/README.md
+++ b/README.md
@@ -1,95 +1,186 @@
-# Kafka Monitor
+<p align="center">
+  <img src="/docs/images/xinfra_monitor.png" width="510"/>
+</p>
 
+# Xinfra Monitor
 [![Build Status](https://travis-ci.org/linkedin/kafka-monitor.svg?branch=master)](https://travis-ci.org/linkedin/kafka-monitor)
+![Greetings](https://github.com/linkedin/kafka-monitor/workflows/Greetings/badge.svg)
+![Mark stale issues and pull requests](https://github.com/linkedin/kafka-monitor/workflows/Mark%20stale%20issues%20and%20pull%20requests/badge.svg)
+![Pull Request Labeler](https://github.com/linkedin/kafka-monitor/workflows/Pull%20Request%20Labeler/badge.svg)
 
-Kafka Monitor is a framework to implement and execute long-running kafka
+Xinfra Monitor (formerly Kafka Monitor) is a framework to implement and execute long-running kafka
 system tests in a real cluster. It complements Kafka’s existing system
 tests by capturing potential bugs or regressions that are only likely to occur
 after prolonged period of time or with low probability. Moreover, it allows you to monitor Kafka
 cluster using end-to-end pipelines to obtain a number of derived vital stats
-such as end-to-end latency, service availability and message loss rate. You can easily
-deploy Kafka Monitor to test and monitor your Kafka cluster without requiring
+such as
+
+<ol>
+ <li> 
+  End-to-end latency
+ </li>
+  <li> 
+  Service availability
+</li>
+  <li> 
+  Produce and Consume availability
+    </li>
+  <li> 
+  Consumer offset commit availability
+    </li>
+  <li> 
+  Consumer offset commit latency
+    </li>
+  <li> 
+  Kafka message loss rate
+    </li>
+  <li> 
+  And many, many more.
+  </li>
+  </ol>
+  
+You can easily
+deploy Xinfra Monitor to test and monitor your Kafka cluster without requiring
 any change to your application.
 
-Kafka Monitor can automatically create the monitor topic with the specified config
+Xinfra Monitor can automatically create the monitor topic with the specified config
 and increase partition count of the monitor topic to ensure partition# >=
 broker#. It can also reassign partition and trigger preferred leader election
 to ensure that each broker acts as leader of at least one partition of the
-monitor topic. This allows Kafka Monitor to detect performance issue on every
+monitor topic. This allows Xinfra Monitor to detect performance issue on every
 broker without requiring users to manually manage the partition assignment of
 the monitor topic.
 
+Xinfra Monitor is used in conjunction with different middle-layer services such as li-apache-kafka-clients in order to monitor single clusters, pipeline desination clusters, and other types of clusters as done in Linkedin engineering for real-time cluster healthchecks.
+
+These are some of the metrics emitted from a Xinfra Monitor instance.
+
+```
+kmf:type=kafka-monitor:offline-runnable-count
+kmf.services:type=produce-service,name=*:produce-availability-avg
+kmf.services:type=consume-service,name=*:consume-availability-avg
+kmf.services:type=produce-service,name=*:records-produced-total
+kmf.services:type=consume-service,name=*:records-consumed-total
+kmf.services:type=produce-service,name=*:records-produced-rate
+kmf.services:type=produce-service,name=*:produce-error-rate
+kmf.services:type=consume-service,name=*:consume-error-rate
+kmf.services:type=consume-service,name=*:records-lost-total
+kmf.services:type=consume-service,name=*:records-lost-rate
+kmf.services:type=consume-service,name=*:records-duplicated-total
+kmf.services:type=consume-service,name=*:records-delay-ms-avg
+kmf.services:type=commit-availability-service,name=*:offsets-committed-avg
+kmf.services:type=commit-availability-service,name=*:offsets-committed-total
+kmf.services:type=commit-availability-service,name=*:failed-commit-offsets-avg
+kmf.services:type=commit-availability-service,name=*:failed-commit-offsets-total
+kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-avg
+kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-max
+kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-99th
+kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-999th
+kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-9999th
+```
+
 ## Getting Started
 
 ### Prerequisites
-Kafka Monitor requires Gradle 2.0 or higher. Java 7 should be used for
+Xinfra Monitor requires Gradle 2.0 or higher. Java 7 should be used for
 building in order to support both Java 7 and Java 8 at runtime.
 
-Kafka Monitor supports Apache Kafka 0.8 and 0.9. Use branch 0.8.2.2 to monitor Apache
-Kafka cluster 0.8. Use branch 0.9.0.1 to compile with Kafka 0.9. Use master
-branch to compile with Kafka 0.10.
+Xinfra Monitor supports Apache Kafka 0.8 to 2.0:
+- Use branch 0.8.2.2 to work with Apache Kafka 0.8
+- Use branch 0.9.0.1 to work with Apache Kafka 0.9
+- Use branch 0.10.2.1 to work with Apache Kafka 0.10
+- Use branch 0.11.x to work with Apache Kafka 0.11
+- Use branch 1.0.x to work with Apache Kafka 1.0
+- Use branch 1.1.x to work with Apache Kafka 1.1
+- Use master branch to work with Apache Kafka 2.0
+
 
 ### Configuration Tips
 
-- We advise advanced users to run Kafka Monitor with
-`./bin/kafka-monitor-start.sh config/kafka-monitor.properties`. The default
-kafka-monitor.properties in the repo provides an simple example of how to
+<ol>
+<li> We advise advanced users to run Xinfra Monitor with
+<code>./bin/xinfra-monitor-start.sh config/xinfra-monitor.properties</code>. The default
+xinfra-monitor.properties in the repo provides an simple example of how to
 monitor a single cluster. You probably need to change the value of
-`zookeeper.connect` and `bootstrap.servers` to point to your cluster.
-
-- The full list of configs and their documentation can be found in the code of
+<code>zookeeper.connect</code> and <code>bootstrap.servers</code> to point to your cluster.
+  </li>
+  <br />
+<li> The full list of configs and their documentation can be found in the code of
 Config class for respective service, e.g. ProduceServiceConfig.java and
-ConsumeServiceConfig.java.
-
-- You can specify multiple SingleClusterMonitor in the kafka-monitor.properties to
-monitor multiple Kafka clusters in one Kafka Monitor process. As another
-advanced use-cse, you can point ProduceService and ConsumeService to two
-different Kafka clusters that are connected by MirrorMaker to monitor their
-end-to-end latency.
-
-- Kafka Monitor by default will automatically create the monitor topic based on
-the e.g.  `topic-management.replicationFactor` and `topic-management.partitionsToBrokersRatio`
+ConsumeServiceConfig.java.</li>
+<br />
+<li> You can specify multiple SingleClusterMonitor in the xinfra-monitor.properties to
+monitor multiple Kafka clusters in one Xinfra Monitor process. As another
+advanced use-case, you can point ProduceService and ConsumeService to two different Kafka clusters that are connected by MirrorMaker to monitor their end-to-end latency.</li>
+<br />  
+<li> Xinfra Monitor by default will automatically create the monitor topic based on
+the e.g.  <code>topic-management.replicationFactor</code> and <code>topic-management.partitionsToBrokersRatio</code>
 specified in the config. replicationFactor is 1 by default and you probably
 want to change it to the same replication factor as used for your existing
-topics. You can disable auto topic creation by setting `produce.topic.topicCreationEnabled` to false.
-
-- Kafka Monitor can automatically increase partition count of the monitor topic
+topics. You can disable auto topic creation by setting <code>produce.topic.topicCreationEnabled</code> to false.
+</li>
+<br />
+<li> Xinfra Monitor can automatically increase partition count of the monitor topic
 to ensure partition# >= broker#. It can also reassign partition and trigger
 preferred leader election to ensure that each broker acts as leader of at least
 one partition of the monitor topic. To use this feature, use either
-EndToEndTest or TopicManagementService in the properties file.
-
-
-### Build Kafka Monitor
+EndToEndTest or TopicManagementService in the properties file. </li>
+<br />
+  <li> When using <code>Secure Sockets Layer</code> (SSL) or any non-plaintext security protocol for AdminClient, please configure the following entries in the <code>single-cluster-monitor</code> props, <code>produce.producer.props</code>, as well as <code>consume.consumer.props</code>. https://docs.confluent.io/current/installation/configuration/admin-configs.html 
+<ol>
+  <li> ssl.key.password	</li>
+  <li> ssl.keystore.location</li>
+  <li> ssl.keystore.password </li>
+  <li> ssl.truststore.location</li>
+  <li> ssl.truststore.password</li>
+</ol>
+</ol>
+
+
+### Build Xinfra Monitor
 ```
 $ git clone https://github.com/linkedin/kafka-monitor.git
 $ cd kafka-monitor 
 $ ./gradlew jar
 ```
 
-### Start KafkaMonitor to run tests/services specified in the config file
+### Start XinfraMonitor to run tests/services specified in the config file
+```
+$ ./bin/xinfra-monitor-start.sh config/xinfra-monitor.properties
+```
+
+### Run Xinfra Monitor with arbitrary producer/consumer configuration (e.g. SASL enabled client)
+Edit `config/xinfra-monitor.properties` to specify custom configurations for producer in the key/value map `produce.producer.props` in
+`config/xinfra-monitor.properties`. Similarly specify configurations for
+consumer as well. The documentation for producer and consumer in the key/value maps can be found in the Apache Kafka wiki.
+
 ```
-$ ./bin/kafka-monitor-start.sh config/kafka-monitor.properties
+$ ./bin/xinfra-monitor-start.sh config/xinfra-monitor.properties
 ```
 
 ### Run SingleClusterMonitor app to monitor kafka cluster
+
+Metrics `produce-availability-avg` and `consume-availability-avg` demonstrate
+whether messages can be properly produced to and consumed from this cluster.
+See Service Overview wiki for how these metrics are derived.
+
 ```
 $ ./bin/single-cluster-monitor.sh --topic test --broker-list localhost:9092 --zookeeper localhost:2181
 ```
 
-### Get metric values (e.g. service availability, message loss rate) in real-time as time series graphs
-Open ```localhost:8000/index.html``` in your web browser
+### Run MultiClusterMonitor app to monitor a pipeline of Kafka clusters connected by MirrorMaker
+Edit `config/multi-cluster-monitor.properties` to specify the right broker and
+zookeeper url as suggested by the comment in the properties file
 
-You can edit webapp/index.html to easily add new metrics to be displayed.
+Metrics `produce-availability-avg` and `consume-availability-avg` demonstrate
+whether messages can be properly produced to the source cluster and consumed
+from the destination cluster. See config/multi-cluster-monitor.properties for
+the full jmx path for these metrics.
 
-### Query metric value (e.g. service availability) via HTTP request
 ```
-curl localhost:8778/jolokia/read/kmf.services:type=produce-service,name=*/produce-availability-avg
+$ ./bin/xinfra-monitor-start.sh config/multi-cluster-monitor.properties
 ```
 
-You can query other JMX metric value as well by substituting object-name and
-attribute-name of the JMX metric in the query above.
-
 ### Run checkstyle on the java code
 ```
 ./gradlew checkstyleMain checkstyleTest
@@ -105,8 +196,6 @@ attribute-name of the JMX metric in the query above.
 
 - [Motivation](https://github.com/linkedin/kafka-monitor/wiki/Motivation)
 - [Design Overview](https://github.com/linkedin/kafka-monitor/wiki/Design-Overview)
-- [Service Design](https://github.com/linkedin/kafka-monitor/wiki/Service-Design)
+- [Service and App Overview](https://github.com/linkedin/kafka-monitor/wiki)
 - [Future Work](https://github.com/linkedin/kafka-monitor/wiki/Future-Work)
-
-
-
+- [Application Configuration](https://github.com/linkedin/kafka-monitor/wiki/App-Configuration)
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 00000000..f645bac2
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,17 @@
+# Security Policy
+
+## Supported Versions
+
+
+
+| Version | Supported          |
+| ------- | ------------------ |
+| 1.1.x   | :white_check_mark: |
+
+
+
+## Reporting a Vulnerability
+
+Use this section to tell people how to report a vulnerability.
+
+Please report a vulnerability on issues at https://github.com/linkedin/kafka-monitor/issues/new.
diff --git a/bin/kmf-run-class.sh b/bin/kmf-run-class.sh
index d694ee0b..0fb1f8a2 100755
--- a/bin/kmf-run-class.sh
+++ b/bin/kmf-run-class.sh
@@ -50,7 +50,7 @@ fi
 
 # Log4j settings
 if [ -z "$KAFKA_LOG4J_OPTS" ]; then
-  KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:$base_dir/config/log4j.properties"
+  KAFKA_LOG4J_OPTS="-Dlog4j.configurationFile=$base_dir/config/log4j2.properties"
 fi
 
 KAFKA_LOG4J_OPTS="-Dkafka.logs.dir=$LOG_DIR $KAFKA_LOG4J_OPTS"
@@ -74,7 +74,7 @@ fi
 
 # JVM performance options
 if [ -z "$KAFKA_JVM_PERFORMANCE_OPTS" ]; then
-  KAFKA_JVM_PERFORMANCE_OPTS="-server -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:+CMSScavengeBeforeRemark -XX:+DisableExplicitGC -Djava.awt.headless=true"
+  KAFKA_JVM_PERFORMANCE_OPTS="-server -Djava.awt.headless=true"
 fi
 
 
diff --git a/bin/single-cluster-monitor.sh b/bin/single-cluster-monitor.sh
index 7c83636f..79f9eacb 100755
--- a/bin/single-cluster-monitor.sh
+++ b/bin/single-cluster-monitor.sh
@@ -9,4 +9,4 @@
 
 base_dir=$(dirname $0)
 
-exec $base_dir/kmf-run-class.sh com/linkedin/kmf/apps/SingleClusterMonitor $@
+exec $base_dir/kmf-run-class.sh com/linkedin/xinfra/monitor/apps/SingleClusterMonitor $@
diff --git a/bin/windows/kafka-monitor-start.bat b/bin/windows/kafka-monitor-start.bat
index abba5640..45eedad7 100644
--- a/bin/windows/kafka-monitor-start.bat
+++ b/bin/windows/kafka-monitor-start.bat
@@ -15,11 +15,11 @@ popd
 
 
 IF [%1] EQU [] (
-	echo USAGE: %0 config/kafka-monitor.properties
+	echo USAGE: %0 config/xinfra-monitor.properties
 	EXIT /B 1
 )
 
-set COMMAND=%BASE_DIR%\kmf-run-class.bat com.linkedin.kmf.KafkaMonitor %*
+set COMMAND=%BASE_DIR%\kmf-run-class.bat com.linkedin.xinfra.monitor.XinfraMonitor %*
 
 rem echo basedir: %BASE_DIR%
 
diff --git a/bin/windows/kmf-run-class.bat b/bin/windows/kmf-run-class.bat
index 559a965d..caddf261 100644
--- a/bin/windows/kmf-run-class.bat
+++ b/bin/windows/kmf-run-class.bat
@@ -10,12 +10,12 @@ REM an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expre
 setlocal enabledelayedexpansion
 
 IF [%1] EQU [] (
-	echo USAGE: %0 com.linkedin.kmf.KafkaMonitor config/kafka-monitor.properties
+	echo USAGE: %0 com.linkedin.xinfra.monitor.XinfraMonitor config/xinfra-monitor.properties
 	EXIT /B 1
 )
 
 IF [%2] EQU [] (
-	echo USAGE: %0 %1 config/kafka-monitor.properties
+	echo USAGE: %0 %1 config/xinfra-monitor.properties
 	EXIT /B 1
 )
 
@@ -60,7 +60,7 @@ IF ["%LOG_DIR%"] EQU [""] (
 
 rem Log4j settings
 IF ["%KAFKA_LOG4J_OPTS%"] EQU [""] (
-	set KAFKA_LOG4J_OPTS=-Dlog4j.configuration=file:%BASE_DIR%\config\log4j.properties
+	set KAFKA_LOG4J_OPTS=-Dlog4j.configurationFile=%BASE_DIR%\config\log4j2.properties
 ) ELSE (
   # create logs directory
   IF not exist %LOG_DIR% (
@@ -170,7 +170,7 @@ REM fi
 
 REM Launch mode
 REM if [ "x$DAEMON_MODE" = "xtrue" ]; then
-REM   nohup $JAVA $KAFKA_HEAP_OPTS $KAFKA_JVM_PERFORMANCE_OPTS $KAFKA_GC_LOG_OPTS $KAFKA_JMX_OPTS -cp $CLASSPATH REM $KAFKA_OPTS "$@" > "$CONSOLE_OUTPUT_FILE" 2>&1 < /dev/null &REM 
-REM elseREM 
+REM   nohup $JAVA $KAFKA_HEAP_OPTS $KAFKA_JVM_PERFORMANCE_OPTS $KAFKA_GC_LOG_OPTS $KAFKA_JMX_OPTS -cp $CLASSPATH REM $KAFKA_OPTS "$@" > "$CONSOLE_OUTPUT_FILE" 2>&1 < /dev/null &REM
+REM elseREM
 REM   exec $JAVA $KAFKA_HEAP_OPTS $KAFKA_JVM_PERFORMANCE_OPTS $KAFKA_GC_LOG_OPTS $KAFKA_JMX_OPTS REM $KAFKA_LOG4J_OPTS -cp $CLASSPATH $KAFKA_OPTS "$@"
 REM fi
diff --git a/bin/kafka-monitor-start.sh b/bin/xinfra-monitor-start.sh
similarity index 86%
rename from bin/kafka-monitor-start.sh
rename to bin/xinfra-monitor-start.sh
index eb4863dc..2a341a16 100755
--- a/bin/kafka-monitor-start.sh
+++ b/bin/xinfra-monitor-start.sh
@@ -9,4 +9,4 @@
 
 base_dir=$(dirname $0)
 
-exec $base_dir/kmf-run-class.sh com/linkedin/kmf/KafkaMonitor $@
+exec $base_dir/kmf-run-class.sh com/linkedin/xinfra/monitor/XinfraMonitor $@
diff --git a/build.gradle b/build.gradle
index 9db41aca..230cba17 100644
--- a/build.gradle
+++ b/build.gradle
@@ -1,51 +1,134 @@
+
+def configDocDir = "${buildDir}/configDocs"
+
+apply plugin: 'maven-publish'
+apply plugin: 'distribution'
+
+
 allprojects {
   apply plugin: 'idea'
   apply plugin: 'eclipse'
   apply plugin: 'java'
   apply plugin: 'checkstyle'
 
-  version = "1.0.0"
+  sourceCompatibility = 8
+  targetCompatibility = 8
 
-  sourceCompatibility = 1.7
+  group = 'com.linkedin.kmf'
 
   repositories {
     mavenCentral()
+    maven {
+      url  "https://linkedin.jfrog.io/artifactory/avro-util/"
+    }
   }
 
   dependencies {
     compile 'net.sourceforge.argparse4j:argparse4j:0.5.0'
-    compile 'org.slf4j:slf4j-log4j12:1.7.6'
-    compile 'org.apache.avro:avro:1.4.0'
-    compile 'org.apache.kafka:kafka_2.11:0.10.1.1'
-    compile 'org.apache.kafka:kafka-clients:0.10.1.1'
-    compile 'org.testng:testng:6.8.8'
-    compile 'org.eclipse.jetty:jetty-server:8.1.19.v20160209'
+    compile 'org.apache.logging.log4j:log4j-slf4j-impl:2.17.1'
+    compile 'org.apache.avro:avro:1.9.2'
     compile 'org.json:json:20140107'
-    compile 'com.fasterxml.jackson.core:jackson-databind:2.7.1'
-    compile 'org.jolokia:jolokia-jvm:1.3.3'
+    compile 'org.jolokia:jolokia-jvm:1.6.2'
     compile 'net.savantly:graphite-client:1.1.0-RELEASE'
     compile 'com.timgroup:java-statsd-client:3.0.1'
-
+    compile 'com.signalfx.public:signalfx-codahale:0.0.47'
+    compile group: 'org.apache.kafka', name: 'kafka_2.12', version: '2.8.2'
+    compile group: 'org.apache.kafka', name: 'kafka-clients', version: '2.8.2'
+    compile 'org.apache.commons:commons-lang3:3.12.0'
+    compile 'com.linkedin.avroutil1:helper-all:0.2.118'
+    compile 'org.apache.zookeeper:zookeeper:3.8.0'
+    testCompile 'org.mockito:mockito-core:2.24.0'
     testCompile 'org.testng:testng:6.8.8'
   }
 
   tasks.create(name: "copyDependantLibs", type: Copy) {
-    from (configurations.testRuntime) {
+    from(configurations.testRuntime) {
       include('slf4j-log4j12*')
     }
-    from (configurations.runtime) {
-    }
+    from(configurations.runtime) {}
     into "build/dependant-libs"
     duplicatesStrategy 'exclude'
   }
 
   jar {
+    doFirst {
+      manifest {
+        // embed version information into jar manifests
+        attributes('Name': "${project.name}",
+            'Specification-Title': "${project.name}",
+            'Specification-Version': "${project.version}",
+            'Specification-Vendor': "LinkedIn",
+            'Implementation-Title': "${project.name}",
+            'Implementation-Version': "${project.version}",
+            'Implementation-Vendor': "LinkedIn")
+      }
+    }
+
     dependsOn 'copyDependantLibs'
   }
 
+  task sourceJar(type: Jar) {
+    from sourceSets.main.allJava
+    classifier "sources"
+  }
+
+  task javadocJar(type: Jar) {
+    from javadoc
+    classifier = 'javadoc'
+  }
+
   task testJar(type: Jar) {
-    classifier = 'test'
-    from sourceSets.test.output
+    from sourceSets.test.allJava
+    classifier = 'tests'
+  }
+
+  publishing {
+    publications {
+      MyPublication(MavenPublication) {
+        groupId project.group
+        artifactId project.name
+        version project.version
+
+        from components.java
+        artifact sourceJar
+        artifact javadocJar
+        artifact testJar
+        artifact distZip
+        artifact distTar
+
+        pom {
+          name = 'kafka-monitor'
+          description = 'kafka monitor'
+          url = 'https://github.com/linkedin/kafka-monitor'
+
+          licenses {
+            license {
+              name = 'The Apache Software License, Version 2.0'
+              url = 'http://www.apache.org/licenses/LICENSE-2.0.txt'
+            }
+          }
+          scm {
+            connection = 'scm:git:git://github.com:linkedin/kafka-monitor.git'
+            developerConnection = 'scm:git:ssh://github.com:linkedin/kafka-monitor.git'
+            url = 'https://github.com/linkedin/kafka-monitor'
+          }
+        }
+
+        repositories {
+          mavenLocal()
+          maven {
+            name "LinkedInJfrog"
+            url "https://linkedin.jfrog.io/artifactory/kafka-monitor"
+            credentials {
+              if (System.getenv('JFROG_USER') != null && System.getenv('JFROG_KEY') != null) {
+                username System.getenv('JFROG_USER')
+                password System.getenv('JFROG_KEY')
+              }
+            }
+          }
+        }
+      }
+    }
   }
 
   artifacts {
@@ -54,8 +137,24 @@ allprojects {
 
   checkstyle {
     configFile = new File(rootDir, "checkstyle/checkstyle.xml")
+    configProperties = ["suppressionFile": new File(rootDir, "checkstyle/suppressions.xml")]
+  }
+
+  task createConfigDocs( dependsOn : compileJava, type : JavaExec) {
+    outputs.dir configDocDir
+    classpath sourceSets.main.runtimeClasspath
+    main = 'com.linkedin.xinfra.monitor.common.ConfigDocumentationGenerator'
+    args = [configDocDir,
+            'com.linkedin.xinfra.monitor.services.configs.ConsumeServiceConfig',
+            'com.linkedin.xinfra.monitor.services.configs.DefaultMetricsReporterServiceConfig',
+            'com.linkedin.xinfra.monitor.services.configs.JettyServiceConfig',
+            'com.linkedin.xinfra.monitor.services.configs.ProduceServiceConfig',
+            'com.linkedin.xinfra.monitor.services.configs.TopicManagementServiceConfig',
+            'com.linkedin.xinfra.monitor.apps.configs.MultiClusterMonitorConfig']
   }
 
+  build.dependsOn createConfigDocs
+
   test.dependsOn('checkstyleMain', 'checkstyleTest')
 
   test {
@@ -66,8 +165,43 @@ allprojects {
       exceptionFormat = 'full'
     }
   }
+
+  distributions {
+    main {
+      contents {
+        into('bin') {
+          from 'bin'
+        }
+        into('build/libs') {
+          from jar
+        }
+        into('build/dependant-libs') {
+          from copyDependantLibs
+        }
+        into('config') {
+          from 'config'
+        }
+        into('build/configDocs') {
+          from createConfigDocs
+        }
+        into('webapp') {
+          from 'webapp'
+        }
+        from('.') {
+          include 'README.md'
+        }
+      }
+    }
+  }
+  tasks.withType(Tar){
+    compression = Compression.GZIP
+    extension = 'tar.gz'
+  }
+
 }
 
-task wrapper(type: Wrapper) {
-   gradleVersion = '2.11'
+wrapper {
+  gradleVersion = '5.2.1'
+  distributionType = Wrapper.DistributionType.ALL
+
 }
diff --git a/checkstyle/checkstyle.xml b/checkstyle/checkstyle.xml
index f7edb531..da75cff1 100644
--- a/checkstyle/checkstyle.xml
+++ b/checkstyle/checkstyle.xml
@@ -18,11 +18,14 @@
 
   <!-- header -->
   <module name="RegexpHeader">
-    <property name="header" value="/\*\*\nCopyright 2016 LinkedIn Corp. Licensed under the Apache License.*"/>
+    <property name="header" value="/\*\*\nCopyright 2020 LinkedIn Corp. Licensed under the Apache License.*"/>
   </module>
 
-  <module name="TreeWalker">
+  <module name="SuppressionFilter">
+    <property name="file" value="${suppressionFile}"/>
+  </module>
 
+  <module name="TreeWalker">
     <!-- code cleanup -->
     <module name="UnusedImports"/>
     <module name="RedundantImport"/>
@@ -32,6 +35,7 @@
     <module name="OneStatementPerLine"/>
     <module name="UnnecessaryParentheses" />
     <module name="SimplifyBooleanReturn"/>
+    <module name="NoLineWrap"/>
 
     <!-- style -->
     <module name="DefaultComesLast"/>
@@ -77,4 +81,6 @@
     <module name="ParenPad"/>
     <module name="TypecastParenPad"/>
   </module>
+
+  <module name="NewlineAtEndOfFile"/>
 </module>
diff --git a/checkstyle/suppressions.xml b/checkstyle/suppressions.xml
new file mode 100644
index 00000000..b5062103
--- /dev/null
+++ b/checkstyle/suppressions.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE suppressions PUBLIC
+    "-//Puppy Crawl//DTD Suppressions 1.1//EN"
+    "http://www.puppycrawl.com/dtds/suppressions_1_1.dtd">
+
+<suppressions>
+    <suppress checks="RegexpHeader" files="SignalFx*" />
+</suppressions>
\ No newline at end of file
diff --git a/config/kafka-monitor.properties b/config/kafka-monitor.properties
deleted file mode 100644
index 91779669..00000000
--- a/config/kafka-monitor.properties
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
-# file except in compliance with the License. You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-
-# This properties file specifies the tests/services that KafkaMonitor
-# should instantiate and run, together with the key/value pairs used to
-# configure these tests/services. It should have the following format:
-#
-# {
-#   "name1" : {
-#     "type": TestClassName
-#     "key1": value1,
-#     "key2": value2,
-#     ...
-#   },
-#   "name2" : {
-#     "type": ServiceClassName
-#     "key1": value1,
-#     "key2": value2,
-#     ...
-#   },
-#   ...
-# }
-#
-# TestClassName can be canonical name or simple name of any class that implements
-# interface com.linkedin.kmf.services.Test. These classes should be under
-# package com.linkedin.kmf.tests.
-#
-# ServiceClassName can be canonical name or simple name of any class that implements
-# interface com.linkedin.kmf.services.Service. These classes should be under
-# package com.linkedin.kmf.services.
-#
-# Each test/service should be configured with class.name which can be either TestClassName
-# or ServiceClassName. The key for the test/service in the json map is used as name to
-# identify the test/service in the log or JMX metrics, which is useful if multiple
-# test/service with the same class.name are run in the same Kafka Monitor process.
-#
-
-{
-  "single-cluster-monitor": {
-    "class.name": "com.linkedin.kmf.apps.SingleClusterMonitor",
-    "topic": "kafka-monitor-topic",
-    "zookeeper.connect": "localhost:2181",
-    "bootstrap.servers": "localhost:9092",
-    "produce.record.delay.ms": 100,
-    "topic-management.topicCreationEnabled": true,
-    "topic-management.replicationFactor" : 1,
-    "topic-management.partitionsToBrokersRatio" : 2.0,
-    "topic-management.rebalance.interval.ms" : 600000,
-    "topic-management.topicFactory.props": {
-    },
-    "produce.producer.props": {
-      "client.id": "kmf-client-id"
-    },
-
-    "consume.latency.sla.ms": "20000",
-    "consume.consumer.props": {
-
-    }
-
-  },
-
-  "reporter-service": {
-    "class.name": "com.linkedin.kmf.services.DefaultMetricsReporterService",
-    "report.interval.sec": 1,
-    "report.metrics.list": [
-      "kmf:type=kafka-monitor:offline-runnable-count",
-      "kmf.services:type=produce-service,name=*:produce-availability-avg",
-      "kmf.services:type=consume-service,name=*:consume-availability-avg",
-      "kmf.services:type=produce-service,name=*:records-produced-total",
-      "kmf.services:type=consume-service,name=*:records-consumed-total",
-      "kmf.services:type=consume-service,name=*:records-lost-total",
-      "kmf.services:type=consume-service,name=*:records-duplicated-total",
-      "kmf.services:type=consume-service,name=*:records-delay-ms-avg",
-      "kmf.services:type=produce-service,name=*:records-produced-rate",
-      "kmf.services:type=produce-service,name=*:produce-error-rate",
-      "kmf.services:type=consume-service,name=*:consume-error-rate"
-    ]
-  },
-
-  "jetty-service": {
-    "class.name": "com.linkedin.kmf.services.JettyService",
-    "jetty.port": 8000
-  },
-
-  "jolokia-service": {
-    "class.name": "com.linkedin.kmf.services.JolokiaService"
-  }
-}
-
-
diff --git a/config/log4j.properties b/config/log4j.properties
deleted file mode 100644
index 8d4b79a9..00000000
--- a/config/log4j.properties
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
-# file except in compliance with the License. You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-
-log4j.rootLogger=INFO, stdout
-
-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
-
-log4j.appender.kafkaClientAppender=org.apache.log4j.DailyRollingFileAppender
-log4j.appender.kafkaClientAppender.DatePattern='.'yyyy-MM-dd-HH
-log4j.appender.kafkaClientAppender.File=${kafka.logs.dir}/kafka-client.log
-log4j.appender.kafkaClientAppender.layout=org.apache.log4j.PatternLayout
-log4j.appender.kafkaClientAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
-
-log4j.logger.com.linkedin.kmf.core.KafkaMonitor=INFO, stdout
-log4j.additivity.com.linkedin.kmf.core.KafkaMonitor=false
-
-log4j.logger.org.apache.kafka=WARN, kafkaClientAppender
-log4j.additivity.org.apache.kafka=false
-
-log4j.logger.kafka=WARN, kafkaClientAppender
-log4j.additivity.kafka=false
-
diff --git a/config/log4j2.properties b/config/log4j2.properties
new file mode 100644
index 00000000..4896697d
--- /dev/null
+++ b/config/log4j2.properties
@@ -0,0 +1,47 @@
+# Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+# file except in compliance with the License. You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+
+status = error
+dest = err
+name = PropertiesConfig
+
+filter.threshold.type = ThresholdFilter
+filter.threshold.level = debug
+
+appender.console.type = Console
+appender.console.name = STDOUT
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = [%d] %p %m (%c)%n
+
+appender.kafka.type = RollingFile
+appender.kafka.name = KAFKA
+appender.kafka.filename = ${sys:kafka.logs.dir}/kafka-client.log
+appender.kafka.filePattern = ${sys:kafka.logs.dir}/kafka-client.log.%d{yyyy-MM-dd-HH}
+appender.kafka.layout.type = PatternLayout
+appender.kafka.layout.pattern = [%d] %p %m (%c)%n
+appender.kafka.policies.type = Policies
+appender.kafka.policies.time.type = TimeBasedTriggeringPolicy
+
+# Modify this as needed when working on dev box. Trace -> Debug -> Info -> Warn -> Error -> Fatal
+rootLogger.level = info
+rootLogger.appenderRef.console.ref = STDOUT
+
+logger.kmf.name = com.linkedin.kmf.core.KafkaMonitor
+logger.kmf.level = info
+logger.kmf.additivity = false
+logger.kmf.appenderRef.console.ref = STDOUT
+
+logger.kafkaClient.name = org.apache.kafka
+logger.kafkaClient.level = warn
+logger.kafkaClient.additivity = false
+logger.kafkaClient.appenderRef.kafka.ref = KAFKA
+
+logger.kafka.name = kafka
+logger.kafka.level = warn
+logger.kafka.additivity = false
+logger.kafka.appenderRef.kafka.ref = KAFKA
diff --git a/config/multi-cluster-monitor.properties b/config/multi-cluster-monitor.properties
index 6c35ec19..dd40b035 100644
--- a/config/multi-cluster-monitor.properties
+++ b/config/multi-cluster-monitor.properties
@@ -12,22 +12,23 @@
 # each cluster in the pipeline. The "produce.service.props" should use the first cluster and
 # the "consume.service.props" should use the last cluster in the pipeline.
 
+# Produce service: Configure Produce Service to produce to the first cluster of the pipeline
+# Consume service: Configure Consume Service to consume from the last cluster of the pipeline
+# Last cluster: If there are more than two clusters in the pipeline, add one property map for each one of them.
 {
   "multi-cluster-monitor": {
     "class.name": "com.linkedin.kmf.apps.MultiClusterMonitor",
     "topic": "kafka-monitor-topic",
-
     "produce.service.props": {
-      "zookeeper.connect": "localhost:2181/cluster1",
+      "zookeeper.connect": "localhost:2181/first_cluster",
       "bootstrap.servers": "localhost:9092",
       "produce.record.delay.ms": 100,
       "produce.producer.props": {
         "client.id": "kafka-monitor-client-id"
       }
     },
-
     "consume.service.props": {
-      "zookeeper.connect": "localhost:2181/cluster2",
+      "zookeeper.connect": "localhost:2181/last_cluster",
       "bootstrap.servers": "localhost:9095",
       "consume.latency.sla.ms": "20000",
       "consume.consumer.props": {
@@ -37,7 +38,8 @@
 
     "topic.management.props.per.cluster" : {
       "first-cluster" : {
-        "zookeeper.connect": "localhost:2181/cluster1",
+        "bootstrap.servers": "localhost:9092",
+        "zookeeper.connect": "localhost:2181/first_cluster",
         "topic-management.topicCreationEnabled": true,
         "topic-management.replicationFactor" : 1,
         "topic-management.partitionsToBrokersRatio" : 2.0,
@@ -47,7 +49,8 @@
       },
 
       "last-cluster" : {
-        "zookeeper.connect": "localhost:2181/cluster2",
+        "bootstrap.servers": "localhost:9095",
+        "zookeeper.connect": "localhost:2181/last_cluster",
         "topic-management.topicCreationEnabled": true,
         "topic-management.replicationFactor" : 1,
         "topic-management.partitionsToBrokersRatio" : 2.0,
@@ -68,22 +71,22 @@
       "kmf.services:type=produce-service,name=*:records-produced-total",
       "kmf.services:type=consume-service,name=*:records-consumed-total",
       "kmf.services:type=consume-service,name=*:records-lost-total",
+      "kmf.services:type=consume-service,name=*:records-lost-rate",
       "kmf.services:type=consume-service,name=*:records-duplicated-total",
       "kmf.services:type=consume-service,name=*:records-delay-ms-avg",
       "kmf.services:type=produce-service,name=*:records-produced-rate",
       "kmf.services:type=produce-service,name=*:produce-error-rate",
-      "kmf.services:type=consume-service,name=*:consume-error-rate"
+      "kmf.services:type=consume-service,name=*:consume-error-rate",
+      "kmf.services:type=commit-availability-service,name=*:offsets-committed-avg",
+      "kmf.services:type=commit-availability-service,name=*:commit-latency-avg",
+      "kmf.services:type=commit-availability-service,name=*:commit-availability-avg",
+      "kmf.services:type=commit-availability-service,name=*:failed-commit-offsets-avg",
+      "kmf.services:type=commit-availability-service,name=*:offsets-committed-total",
+      "kmf.services:type=commit-availability-service,name=*:failed-commit-offsets-total"
     ]
   },
 
-  "jetty-service": {
-    "class.name": "com.linkedin.kmf.services.JettyService",
-    "jetty.port": 8000
-  },
-
   "jolokia-service": {
     "class.name": "com.linkedin.kmf.services.JolokiaService"
   }
 }
-
-
diff --git a/config/prometheus-exporter.yaml b/config/prometheus-exporter.yaml
new file mode 100644
index 00000000..7401e3a4
--- /dev/null
+++ b/config/prometheus-exporter.yaml
@@ -0,0 +1,7 @@
+---
+lowercaseOutputName: true
+rules:
+- pattern : kmf<type=(.+)><>([\w\d-]+)
+  name: kmf_$1_$2
+- pattern : kmf.services<type=(.+), name=(.+)><>([\w\d-]+)
+  name: kmf_services_$1_$2_$3
diff --git a/config/xinfra-monitor.properties b/config/xinfra-monitor.properties
new file mode 100644
index 00000000..6993bf47
--- /dev/null
+++ b/config/xinfra-monitor.properties
@@ -0,0 +1,197 @@
+# Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+# file except in compliance with the License. You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+
+# This properties file specifies the tests/services that XinfraMonitor
+# should instantiate and run, together with the key/value pairs used to
+# configure these tests/services. It should have the following format:
+#
+# {
+#   "name1" : {
+#     "type": TestClassName
+#     "key1": value1,
+#     "key2": value2,
+#     ...
+#   },
+#   "name2" : {
+#     "type": ServiceClassName
+#     "key1": value1,
+#     "key2": value2,
+#     ...
+#   },
+#   ...
+# }
+#
+# TestClassName can be canonical name or simple name of any class that implements
+# interface com.linkedin.kmf.services.Test. These classes should be under
+# package com.linkedin.kmf.tests.
+#
+# ServiceClassName can be canonical name or simple name of any class that implements
+# interface com.linkedin.kmf.services.Service. These classes should be under
+# package com.linkedin.kmf.services.
+#
+# Each test/service should be configured with class.name which can be either TestClassName
+# or ServiceClassName. The key for the test/service in the json map is used as name to
+# identify the test/service in the log or JMX metrics, which is useful if multiple
+# test/service with the same class.name are run in the same Kafka Monitor process.
+#
+# If using Secure Socket Layer for security protocol, SSL properties must be defined under
+# produce.producer.props, consume.consumer.props, as well as single-cluster-monitor props
+
+{
+  "single-cluster-monitor": {
+    "class.name": "com.linkedin.xinfra.monitor.apps.SingleClusterMonitor",
+    "topic": "xinfra-monitor-topic",
+    "zookeeper.connect": "localhost:2181",
+    "bootstrap.servers": "localhost:9092,localhost:9093",
+    "request.timeout.ms": 9000,
+    "produce.record.delay.ms": 100,
+    "topic-management.topicManagementEnabled": true,
+    "topic-management.topicCreationEnabled": true,
+    "topic-management.replicationFactor" : 1,
+    "topic-management.partitionsToBrokersRatio" : 2.0,
+    "topic-management.rebalance.interval.ms" : 600000,
+    "topic-management.preferred.leader.election.check.interval.ms" : 300000,
+    "topic-management.topicFactory.props": {
+    },
+    "topic-management.topic.props": {
+      "retention.ms": "3600000"
+    },
+    "produce.producer.props": {
+      "client.id": "kmf-client-id"
+    },
+
+    "consume.latency.sla.ms": "20000",
+    "consume.consumer.props": {
+    }
+  },
+
+  "offset-commit-service": {
+       "class.name": "com.linkedin.xinfra.monitor.services.OffsetCommitService",
+       "zookeeper.connect": "localhost:2181",
+       "bootstrap.servers": "localhost:9092,localhost:9093",
+       "consumer.props": {
+           "group.id": "target-consumer-group"
+       }
+  },
+
+  "jolokia-service": {
+    "class.name": "com.linkedin.xinfra.monitor.services.JolokiaService"
+  },
+
+  "reporter-service": {
+    "class.name": "com.linkedin.xinfra.monitor.services.DefaultMetricsReporterService",
+    "report.interval.sec": 1,
+    "report.metrics.list": [
+        "kmf:type=kafka-monitor:offline-runnable-count",
+        "kmf.services:type=produce-service,name=*:produce-availability-avg",
+        "kmf.services:type=consume-service,name=*:consume-availability-avg",
+        "kmf.services:type=produce-service,name=*:records-produced-total",
+        "kmf.services:type=consume-service,name=*:records-consumed-total",
+        "kmf.services:type=produce-service,name=*:records-produced-rate",
+        "kmf.services:type=produce-service,name=*:produce-error-rate",
+        "kmf.services:type=consume-service,name=*:consume-error-rate",
+        "kmf.services:type=consume-service,name=*:records-lost-total",
+        "kmf.services:type=consume-service,name=*:records-lost-rate",
+        "kmf.services:type=consume-service,name=*:records-duplicated-total",
+        "kmf.services:type=consume-service,name=*:records-delay-ms-avg",
+        "kmf.services:type=commit-availability-service,name=*:offsets-committed-avg",
+        "kmf.services:type=commit-availability-service,name=*:offsets-committed-total",
+        "kmf.services:type=commit-availability-service,name=*:failed-commit-offsets-avg",
+        "kmf.services:type=commit-availability-service,name=*:failed-commit-offsets-total",
+        "kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-avg",
+        "kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-max",
+        "kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-99th",
+        "kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-999th",
+        "kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-9999th",
+        "kmf.services:type=cluster-topic-manipulation-service,name=*:topic-creation-metadata-propagation-ms-avg",
+        "kmf.services:type=cluster-topic-manipulation-service,name=*:topic-creation-metadata-propagation-ms-max",
+        "kmf.services:type=cluster-topic-manipulation-service,name=*:topic-deletion-metadata-propagation-ms-avg",
+        "kmf.services:type=cluster-topic-manipulation-service,name=*:topic-deletion-metadata-propagation-ms-max",
+        "kmf.services:type=offset-commit-service,name=*:offset-commit-availability-avg",
+        "kmf.services:type=offset-commit-service,name=*:offset-commit-service-success-rate",
+        "kmf.services:type=offset-commit-service,name=*:offset-commit-service-success-total",
+        "kmf.services:type=offset-commit-service,name=*:offset-commit-service-failure-rate",
+        "kmf.services:type=offset-commit-service,name=*:offset-commit-service-failure-total"
+    ]
+  },
+
+  "cluster-topic-manipulation-service":{
+     "class.name":"com.linkedin.xinfra.monitor.services.ClusterTopicManipulationService",
+     "zookeeper.connect": "localhost:2181",
+     "bootstrap.servers":"localhost:9092,localhost:9093",
+     "topic": "xinfra-monitor-topic"
+  },
+
+#  Example produce-service to produce messages to cluster
+#    "produce-service": {
+#        "class.name": "com.linkedin.kmf.services.ProduceService",
+#        "topic": "xinfra-monitor-topic",
+#        "zookeeper.connect": "localhost:2181",
+#        "bootstrap.servers": "localhost:9092",
+#        "consume.latency.sla.ms": "20000",
+#        "consume.consumer.props": {
+#        }
+#    },
+
+#  Example consume-service to consume messages
+#    "consume-service": {
+#        "class.name": "com.linkedin.kmf.services.ConsumeService",
+#        "topic": "xinfra-monitor-topic",
+#        "zookeeper.connect": "localhost:2181",
+#        "bootstrap.servers": "localhost:9092",
+#        "consume.latency.sla.ms": "20000",
+#        "consume.consumer.props": {
+#        }
+#    },
+
+#  Example statsd-service to report metrics
+#  "statsd-service": {
+#      "class.name": "com.linkedin.xinfra.monitor.services.StatsdMetricsReporterService",
+#      "report.statsd.host": "localhost",
+#      "report.statsd.port": "8125",
+#      "report.statsd.prefix": "xinfra-monitor",
+#      "report.interval.sec": 1,
+#      "report.metrics.list": [
+#      "kmf.services:type=produce-service,name=*:produce-availability-avg",
+#      "kmf.services:type=consume-service,name=*:consume-availability-avg"
+#     ]
+#  },
+
+#  Example kafka-service to report metrics
+  "reporter-kafka-service": {
+    "class.name": "com.linkedin.xinfra.monitor.services.KafkaMetricsReporterService",
+    "report.interval.sec": 3,
+    "zookeeper.connect": "localhost:2181",
+    "bootstrap.servers": "localhost:9092",
+    "topic": "xinfra-monitor-topic-metrics",
+    "report.kafka.topic.replication.factor": 1,
+    "report.metrics.list": [
+      "kmf.services:type=produce-service,name=*:produce-availability-avg",
+      "kmf.services:type=consume-service,name=*:consume-availability-avg",
+      "kmf.services:type=produce-service,name=*:records-produced-total",
+      "kmf.services:type=consume-service,name=*:records-consumed-total",
+      "kmf.services:type=consume-service,name=*:records-lost-total",
+      "kmf.services:type=consume-service,name=*:records-duplicated-total",
+      "kmf.services:type=consume-service,name=*:records-delay-ms-avg",
+      "kmf.services:type=produce-service,name=*:records-produced-rate",
+      "kmf.services:type=produce-service,name=*:produce-error-rate",
+      "kmf.services:type=consume-service,name=*:consume-error-rate"
+    ]
+  }
+
+#  Example signalfx-service to report metrics
+# "signalfx-service": {
+#   "class.name": "com.linkedin.kmf.services.SignalFxMetricsReporterService",
+#   "report.interval.sec": 1,
+#   "report.metric.dimensions": {
+#   },
+#   "report.signalfx.url": "",
+#   "report.signalfx.token" : ""
+# }
+
+}
diff --git a/docker/Dockerfile b/docker/Dockerfile
index fcd49e39..baff1023 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -17,14 +17,11 @@ MAINTAINER coffeepac@gmail.com
 
 WORKDIR /opt/kafka-monitor
 ADD build/ build/
-ADD core/build/ core/build/
-ADD services/build/ services/build/
-ADD tests/build/ tests/build/
-ADD bin/kafka-monitor-start.sh bin/kafka-monitor-start.sh
+ADD bin/xinfra-monitor-start.sh bin/xinfra-monitor-start.sh
 ADD bin/kmf-run-class.sh bin/kmf-run-class.sh
-ADD config/kafka-monitor.properties config/kafka-monitor.properties
-ADD config/log4j.properties config/log4j.properties
+ADD config/xinfra-monitor.properties config/xinfra-monitor.properties
+ADD config/log4j2.properties config/log4j2.properties
 ADD docker/kafka-monitor-docker-entry.sh kafka-monitor-docker-entry.sh
 ADD webapp/ webapp/
 
-CMD ["/opt/kafka-monitor/kafka-monitor-docker-entry.sh"]
\ No newline at end of file
+CMD ["/opt/kafka-monitor/kafka-monitor-docker-entry.sh"]
diff --git a/docker/kafka-monitor-docker-entry.sh b/docker/kafka-monitor-docker-entry.sh
index 3abc003b..97554bb0 100755
--- a/docker/kafka-monitor-docker-entry.sh
+++ b/docker/kafka-monitor-docker-entry.sh
@@ -15,7 +15,13 @@
 
 set -x
 
+# SIGTERM-handler
+trap 'pkill java; exit 130' SIGINT
+trap 'pkill java; exit 143' SIGTERM
+
 #  wait for DNS services to be available
 sleep 10
 
-bin/kafka-monitor-start.sh config/kafka-monitor.properties
+bin/xinfra-monitor-start.sh config/xinfra-monitor.properties &
+
+wait $!
\ No newline at end of file
diff --git a/docs/images/xinfra_monitor.png b/docs/images/xinfra_monitor.png
new file mode 100644
index 00000000..d0dfce61
Binary files /dev/null and b/docs/images/xinfra_monitor.png differ
diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar
index 05ef575b..51288f9c 100644
Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
index c0abcf1d..842c8c5a 100644
--- a/gradle/wrapper/gradle-wrapper.properties
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -1,6 +1,6 @@
-#Tue Dec 06 22:38:25 EST 2016
+#Mon Apr 01 18:19:43 PDT 2019
 distributionBase=GRADLE_USER_HOME
 distributionPath=wrapper/dists
 zipStoreBase=GRADLE_USER_HOME
 zipStorePath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
+distributionUrl=https\://services.gradle.org/distributions/gradle-5.2.1-all.zip
diff --git a/gradlew b/gradlew
index 9d82f789..2477741a 100755
--- a/gradlew
+++ b/gradlew
@@ -1,4 +1,4 @@
-#!/usr/bin/env bash
+#!/usr/bin/env sh
 
 ##############################################################################
 ##
@@ -6,12 +6,30 @@
 ##
 ##############################################################################
 
-# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-DEFAULT_JVM_OPTS=""
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
 
 APP_NAME="Gradle"
 APP_BASE_NAME=`basename "$0"`
 
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS=""
+
 # Use the maximum available, or set MAX_FD != -1 to use that value.
 MAX_FD="maximum"
 
@@ -30,6 +48,7 @@ die ( ) {
 cygwin=false
 msys=false
 darwin=false
+nonstop=false
 case "`uname`" in
   CYGWIN* )
     cygwin=true
@@ -40,26 +59,11 @@ case "`uname`" in
   MINGW* )
     msys=true
     ;;
+  NONSTOP* )
+    nonstop=true
+    ;;
 esac
 
-# Attempt to set APP_HOME
-# Resolve links: $0 may be a link
-PRG="$0"
-# Need this for relative symlinks.
-while [ -h "$PRG" ] ; do
-    ls=`ls -ld "$PRG"`
-    link=`expr "$ls" : '.*-> \(.*\)$'`
-    if expr "$link" : '/.*' > /dev/null; then
-        PRG="$link"
-    else
-        PRG=`dirname "$PRG"`"/$link"
-    fi
-done
-SAVED="`pwd`"
-cd "`dirname \"$PRG\"`/" >/dev/null
-APP_HOME="`pwd -P`"
-cd "$SAVED" >/dev/null
-
 CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
 
 # Determine the Java command to use to start the JVM.
@@ -85,7 +89,7 @@ location of your Java installation."
 fi
 
 # Increase the maximum file descriptors if we can.
-if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
     MAX_FD_LIMIT=`ulimit -H -n`
     if [ $? -eq 0 ] ; then
         if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
@@ -100,18 +104,18 @@ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
     fi
 fi
 
-# For Darwin, add options to specify how the application appears in the dock
+# For Darwin, add options to specify how the application appears in the dock.
 if $darwin; then
     GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
 fi
 
-# For Cygwin, switch paths to Windows format before running java
+# For Cygwin, switch paths to Windows format before running java.
 if $cygwin ; then
     APP_HOME=`cygpath --path --mixed "$APP_HOME"`
     CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
     JAVACMD=`cygpath --unix "$JAVACMD"`
 
-    # We build the pattern for arguments to be converted via cygpath
+    # We build the pattern for arguments to be converted via cygpath.
     ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
     SEP=""
     for dir in $ROOTDIRSRAW ; do
@@ -119,7 +123,7 @@ if $cygwin ; then
         SEP="|"
     done
     OURCYGPATTERN="(^($ROOTDIRS))"
-    # Add a user-defined pattern to the cygpath arguments
+    # Add a user-defined pattern to the cygpath arguments.
     if [ "$GRADLE_CYGPATTERN" != "" ] ; then
         OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
     fi
@@ -150,11 +154,19 @@ if $cygwin ; then
     esac
 fi
 
-# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
-function splitJvmOpts() {
-    JVM_OPTS=("$@")
+# Escape application args
+save ( ) {
+    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+    echo " "
 }
-eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
-JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
+APP_ARGS=$(save "$@")
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
+if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
+  cd "$(dirname "$0")"
+fi
 
-exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
+exec "$JAVACMD" "$@"
diff --git a/gradlew.bat b/gradlew.bat
index aec99730..e95643d6 100644
--- a/gradlew.bat
+++ b/gradlew.bat
@@ -8,14 +8,14 @@
 @rem Set local scope for the variables with windows NT shell
 if "%OS%"=="Windows_NT" setlocal
 
-@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-set DEFAULT_JVM_OPTS=
-
 set DIRNAME=%~dp0
 if "%DIRNAME%" == "" set DIRNAME=.
 set APP_BASE_NAME=%~n0
 set APP_HOME=%DIRNAME%
 
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS=
+
 @rem Find java.exe
 if defined JAVA_HOME goto findJavaFromJavaHome
 
@@ -46,10 +46,9 @@ echo location of your Java installation.
 goto fail
 
 :init
-@rem Get command-line arguments, handling Windowz variants
+@rem Get command-line arguments, handling Windows variants
 
 if not "%OS%" == "Windows_NT" goto win9xME_args
-if "%@eval[2+2]" == "4" goto 4NT_args
 
 :win9xME_args
 @rem Slurp the command line arguments.
@@ -60,11 +59,6 @@ set _SKIP=2
 if "x%~1" == "x" goto execute
 
 set CMD_LINE_ARGS=%*
-goto execute
-
-:4NT_args
-@rem Get arguments from the 4NT Shell from JP Software
-set CMD_LINE_ARGS=%$
 
 :execute
 @rem Setup the command line
diff --git a/scripts/publishToJfrog.sh b/scripts/publishToJfrog.sh
new file mode 100755
index 00000000..cc12364d
--- /dev/null
+++ b/scripts/publishToJfrog.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+result=${PWD##*/}
+if [[ "$result" = "scripts" ]]
+then
+    echo "script must be run from root project folder, not $PWD"
+    exit 1
+else
+    echo "we are in $PWD and tag is $RELEASE_TAG"
+
+    if [[ $RELEASE_TAG =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]
+    then
+        echo "publishing: tag $RELEASE_TAG looks like a semver"
+        git status
+        git describe --tags
+        ./gradlew printVersion
+        ./gradlew publishMyPublicationPublicationToLinkedInJfrogRepository
+    else
+        echo "not publishing: tag $RELEASE_TAG is NOT a valid semantic version (x.y.z)"
+    fi
+fi
diff --git a/semantic-build-versioning.gradle b/semantic-build-versioning.gradle
new file mode 100644
index 00000000..bee379f7
--- /dev/null
+++ b/semantic-build-versioning.gradle
@@ -0,0 +1,2 @@
+
+/* This is used by vivin:gradle-semantic-build-versioning plugin to generate versioned jar files. */
diff --git a/settings.gradle b/settings.gradle
index e69de29b..f37e0228 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -0,0 +1,15 @@
+buildscript {
+  repositories {
+    maven {
+      url 'https://plugins.gradle.org/m2/'
+    }
+  }
+  dependencies {
+    classpath 'gradle.plugin.net.vivin:gradle-semantic-build-versioning:4.0.0'
+  }
+}
+
+apply plugin: 'net.vivin.gradle-semantic-build-versioning'
+
+// otherwise it defaults to the folder name
+rootProject.name = 'kafka-monitor'
diff --git a/src/main/java/com/linkedin/kmf/common/Utils.java b/src/main/java/com/linkedin/kmf/common/Utils.java
deleted file mode 100644
index e1c8494b..00000000
--- a/src/main/java/com/linkedin/kmf/common/Utils.java
+++ /dev/null
@@ -1,199 +0,0 @@
-/**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
- * file except in compliance with the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
- * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-package com.linkedin.kmf.common;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.lang.management.ManagementFactory;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.Properties;
-import java.util.Set;
-
-import kafka.admin.AdminUtils;
-import kafka.admin.RackAwareMode;
-import kafka.server.KafkaConfig;
-import kafka.utils.ZkUtils;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.io.Encoder;
-import org.apache.avro.io.JsonEncoder;
-import org.apache.kafka.common.errors.TopicExistsException;
-import org.apache.kafka.common.security.JaasUtils;
-import org.json.JSONObject;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import scala.collection.Seq;
-
-import javax.management.MBeanAttributeInfo;
-import javax.management.MBeanInfo;
-import javax.management.MBeanServer;
-import javax.management.ObjectName;
-
-
-/**
- * Kafka monitoring utilities.
- */
-public class Utils {
-  private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
-
-  public static final int ZK_CONNECTION_TIMEOUT_MS = 30_000;
-  public static final int ZK_SESSION_TIMEOUT_MS = 30_000;
-
-  /**
-   * Read number of partitions for the given topic on the specified zookeeper
-   * @param zkUrl zookeeper connection url
-   * @param topic topic name
-   *
-   * @return the number of partitions of the given topic
-   */
-  public static int getPartitionNumForTopic(String zkUrl, String topic) {
-    ZkUtils zkUtils = ZkUtils.apply(zkUrl, ZK_SESSION_TIMEOUT_MS, ZK_CONNECTION_TIMEOUT_MS, JaasUtils.isZkSecurityEnabled());
-    try {
-      Seq<String> topics = scala.collection.JavaConversions.asScalaBuffer(Arrays.asList(topic));
-      return zkUtils.getPartitionsForTopics(topics).apply(topic).size();
-    } catch (NoSuchElementException e) {
-      return 0;
-    } finally {
-      zkUtils.close();
-    }
-  }
-
-  /**
-   * Create the topic that the monitor uses to monitor the cluster.  This method attempts to create a topic so that all
-   * the brokers in the cluster will have partitionToBrokerRatio partitions.  If the topic exists, but has different parameters
-   * then this does nothing to update the parameters.
-   *
-   * TODO: Do we care about rack aware mode?  I would think no because we want to spread the topic over all brokers.
-   * @param zkUrl zookeeper connection url
-   * @param topic topic name
-   * @param replicationFactor the replication factor for the topic
-   * @param partitionToBrokerRatio This is multiplied by the number brokers to compute the number of partitions in the topic.
-   * @param topicConfig additional parameters for the topic for example min.insync.replicas
-   * @return the number of partitions created
-   */
-  public static int createMonitoringTopicIfNotExists(String zkUrl, String topic, int replicationFactor,
-      double partitionToBrokerRatio, Properties topicConfig) {
-    ZkUtils zkUtils = ZkUtils.apply(zkUrl, ZK_SESSION_TIMEOUT_MS, ZK_CONNECTION_TIMEOUT_MS, JaasUtils.isZkSecurityEnabled());
-    try {
-      if (AdminUtils.topicExists(zkUtils, topic)) {
-        return getPartitionNumForTopic(zkUrl, topic);
-      }
-
-      int brokerCount = zkUtils.getAllBrokersInCluster().size();
-
-      int partitionCount = (int) Math.ceil(brokerCount * partitionToBrokerRatio);
-
-      int defaultMinIsr = Math.max(replicationFactor - 1, 1);
-      if (!topicConfig.containsKey(KafkaConfig.MinInSyncReplicasProp())) {
-        topicConfig.setProperty(KafkaConfig.MinInSyncReplicasProp(), Integer.toString(defaultMinIsr));
-      }
-
-      try {
-        AdminUtils.createTopic(zkUtils, topic, partitionCount, replicationFactor, topicConfig, RackAwareMode.Enforced$.MODULE$);
-      } catch (TopicExistsException e) {
-        //There is a race condition with the consumer.
-        LOG.debug("Monitoring topic " + topic + " already exists in cluster " + zkUrl, e);
-        return getPartitionNumForTopic(zkUrl, topic);
-      }
-      LOG.info("Created monitoring topic " + topic + " in cluster " + zkUrl + " with " + partitionCount + " partitions, min ISR of "
-        + topicConfig.get(KafkaConfig.MinInSyncReplicasProp()) + " and replication factor of " + replicationFactor + ".");
-
-      return partitionCount;
-    } finally {
-      zkUtils.close();
-    }
-  }
-
-  /**
-   * @param zkUrl zookeeper connection url
-   * @return      number of brokers in this cluster
-   */
-  public static int getBrokerCount(String zkUrl) {
-    ZkUtils zkUtils = ZkUtils.apply(zkUrl, ZK_SESSION_TIMEOUT_MS, ZK_CONNECTION_TIMEOUT_MS, JaasUtils.isZkSecurityEnabled());
-    try {
-      return zkUtils.getAllBrokersInCluster().size();
-    } finally {
-      zkUtils.close();
-    }
-  }
-
-  /**
-   * @param timestamp time in Ms when this message is generated
-   * @param topic     topic this message is sent to
-   * @param idx       index is consecutive numbers used by KafkaMonitor to determine duplicate or lost messages
-   * @param msgSize   size of the message
-   * @return          string that encodes the above fields
-   */
-  public static String jsonFromFields(String topic, long idx, long timestamp, String producerId, int msgSize) {
-    GenericRecord record = new GenericData.Record(DefaultTopicSchema.MESSAGE_V0);
-    record.put(DefaultTopicSchema.TOPIC_FIELD.name(), topic);
-    record.put(DefaultTopicSchema.INDEX_FIELD.name(), idx);
-    record.put(DefaultTopicSchema.TIME_FIELD.name(), timestamp);
-    record.put(DefaultTopicSchema.PRODUCER_ID_FIELD.name(), producerId);
-    // CONTENT_FIELD is composed of #msgSize number of character 'x', e.g. xxxxxxxxxx
-    record.put(DefaultTopicSchema.CONTENT_FIELD.name(), String.format("%1$-" + msgSize + "s", "").replace(' ', 'x'));
-    return jsonFromGenericRecord(record);
-  }
-
-  /**
-   * @param message kafka message in the string format
-   * @return        GenericRecord that is deserialized from kafka message w.r.t. expected schema
-   */
-  public static GenericRecord genericRecordFromJson(String message) {
-    GenericRecord record = new GenericData.Record(DefaultTopicSchema.MESSAGE_V0);
-    JSONObject jsonObject = new JSONObject(message);
-    record.put(DefaultTopicSchema.TOPIC_FIELD.name(), jsonObject.getString(DefaultTopicSchema.TOPIC_FIELD.name()));
-    record.put(DefaultTopicSchema.INDEX_FIELD.name(), jsonObject.getLong(DefaultTopicSchema.INDEX_FIELD.name()));
-    record.put(DefaultTopicSchema.TIME_FIELD.name(), jsonObject.getLong(DefaultTopicSchema.TIME_FIELD.name()));
-    record.put(DefaultTopicSchema.PRODUCER_ID_FIELD.name(), jsonObject.getString(DefaultTopicSchema.PRODUCER_ID_FIELD.name()));
-    record.put(DefaultTopicSchema.CONTENT_FIELD.name(), jsonObject.getString(DefaultTopicSchema.CONTENT_FIELD.name()));
-    return record;
-  }
-
-  public static String jsonFromGenericRecord(GenericRecord record) {
-    ByteArrayOutputStream out = new ByteArrayOutputStream();
-    GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(DefaultTopicSchema.MESSAGE_V0);
-
-    try {
-      Encoder encoder = new JsonEncoder(DefaultTopicSchema.MESSAGE_V0, out);
-      writer.write(record, encoder);
-      encoder.flush();
-    } catch (IOException e) {
-      LOG.error("Unable to serialize avro record due to error " + e);
-    }
-    return out.toString();
-  }
-
-  public static List<MbeanAttributeValue> getMBeanAttributeValues(String mbeanExpr, String attributeExpr) {
-    List<MbeanAttributeValue> values = new ArrayList<>();
-    MBeanServer server = ManagementFactory.getPlatformMBeanServer();
-    try {
-      Set<ObjectName> mbeanNames = server.queryNames(new ObjectName(mbeanExpr), null);
-      for (ObjectName mbeanName: mbeanNames) {
-        MBeanInfo mBeanInfo = server.getMBeanInfo(mbeanName);
-        MBeanAttributeInfo[] attributeInfos = mBeanInfo.getAttributes();
-        for (MBeanAttributeInfo attributeInfo: attributeInfos) {
-          if (attributeInfo.getName().equals(attributeExpr) || attributeExpr.length() == 0 || attributeExpr.equals("*")) {
-            double value = (Double) server.getAttribute(mbeanName, attributeInfo.getName());
-            values.add(new MbeanAttributeValue(mbeanName.getCanonicalName(), attributeInfo.getName(), value));
-          }
-        }
-      }
-    } catch (Exception e) {
-      LOG.error("fail to retrieve value for " + mbeanExpr + ":" + attributeExpr, e);
-    }
-    return values;
-  }
-
-}
diff --git a/src/main/java/com/linkedin/kmf/consumer/NewConsumer.java b/src/main/java/com/linkedin/kmf/consumer/NewConsumer.java
deleted file mode 100644
index 0d827606..00000000
--- a/src/main/java/com/linkedin/kmf/consumer/NewConsumer.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
- * file except in compliance with the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
- * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-package com.linkedin.kmf.consumer;
-
-import org.apache.kafka.clients.consumer.ConsumerRecord;
-import org.apache.kafka.clients.consumer.KafkaConsumer;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.Properties;
-
-/*
- * Wrap around the new consumer from Apache Kafka and implement the #KMBaseConsumer interface
- */
-public class NewConsumer implements KMBaseConsumer {
-
-  private final KafkaConsumer<String, String> _consumer;
-  private Iterator<ConsumerRecord<String, String>> _recordIter;
-
-  public NewConsumer(String topic, Properties consumerProperties) {
-    _consumer = new KafkaConsumer<>(consumerProperties);
-    _consumer.subscribe(Arrays.asList(topic));
-  }
-
-  @Override
-  public BaseConsumerRecord receive() {
-    if (_recordIter == null || !_recordIter.hasNext())
-      _recordIter = _consumer.poll(Long.MAX_VALUE).iterator();
-
-    ConsumerRecord<String, String> record = _recordIter.next();
-    return new BaseConsumerRecord(record.topic(), record.partition(), record.offset(), record.key(), record.value());
-  }
-
-  @Override
-  public void close() {
-    _consumer.close();
-  }
-
-}
diff --git a/src/main/java/com/linkedin/kmf/consumer/OldConsumer.java b/src/main/java/com/linkedin/kmf/consumer/OldConsumer.java
deleted file mode 100644
index fdd842cc..00000000
--- a/src/main/java/com/linkedin/kmf/consumer/OldConsumer.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
- * file except in compliance with the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
- * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-package com.linkedin.kmf.consumer;
-
-import kafka.consumer.Consumer;
-import kafka.consumer.ConsumerConfig;
-import kafka.javaapi.consumer.ConsumerConnector;
-import kafka.consumer.ConsumerIterator;
-import kafka.consumer.KafkaStream;
-import kafka.message.MessageAndMetadata;
-import kafka.serializer.StringDecoder;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-/*
- * Wrap around the old consumer from Apache Kafka and implement the #KMBaseConsumer interface
- */
-public class OldConsumer implements KMBaseConsumer {
-
-  private final ConsumerConnector _connector;
-  private final ConsumerIterator<String, String> _iter;
-
-  public OldConsumer(String topic, Properties consumerProperties) {
-    _connector = Consumer.createJavaConsumerConnector(new ConsumerConfig(consumerProperties));
-    Map<String, Integer> topicCountMap = new HashMap<>();
-    topicCountMap.put(topic, 1);
-    Map<String, List<KafkaStream<String, String>>> kafkaStreams = _connector.createMessageStreams(topicCountMap, new StringDecoder(null), new StringDecoder(null));
-    _iter = kafkaStreams.get(topic).get(0).iterator();
-  }
-
-  @Override
-  public BaseConsumerRecord receive() {
-    if (!_iter.hasNext())
-      return null;
-    MessageAndMetadata<String, String> record = _iter.next();
-    return new BaseConsumerRecord(record.topic(), record.partition(), record.offset(), record.key(), record.message());
-  }
-
-  @Override
-  public void close() {
-    _connector.shutdown();
-  }
-
-}
diff --git a/src/main/java/com/linkedin/kmf/partitioner/OldKMPartitioner.java b/src/main/java/com/linkedin/kmf/partitioner/OldKMPartitioner.java
deleted file mode 100644
index fa356eff..00000000
--- a/src/main/java/com/linkedin/kmf/partitioner/OldKMPartitioner.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
- * file except in compliance with the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
- * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-package com.linkedin.kmf.partitioner;
-
-public class OldKMPartitioner implements KMPartitioner {
-
-  public int partition(String key, int partitionNum) {
-    return Math.abs(key.hashCode()) % partitionNum;
-  }
-}
diff --git a/src/main/java/com/linkedin/kmf/services/ConsumeService.java b/src/main/java/com/linkedin/kmf/services/ConsumeService.java
deleted file mode 100644
index e2d07f77..00000000
--- a/src/main/java/com/linkedin/kmf/services/ConsumeService.java
+++ /dev/null
@@ -1,288 +0,0 @@
-/**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
- * file except in compliance with the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
- * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-package com.linkedin.kmf.services;
-
-import com.linkedin.kmf.common.DefaultTopicSchema;
-import com.linkedin.kmf.common.Utils;
-import com.linkedin.kmf.consumer.BaseConsumerRecord;
-import com.linkedin.kmf.consumer.KMBaseConsumer;
-import com.linkedin.kmf.consumer.NewConsumer;
-import com.linkedin.kmf.consumer.OldConsumer;
-import com.linkedin.kmf.services.configs.ConsumeServiceConfig;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Random;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.kafka.clients.consumer.ConsumerConfig;
-import org.apache.kafka.common.MetricName;
-import org.apache.kafka.common.config.ConfigException;
-import org.apache.kafka.common.metrics.JmxReporter;
-import org.apache.kafka.common.metrics.Measurable;
-import org.apache.kafka.common.metrics.MetricConfig;
-import org.apache.kafka.common.metrics.Metrics;
-import org.apache.kafka.common.metrics.MetricsReporter;
-import org.apache.kafka.common.metrics.Sensor;
-import org.apache.kafka.common.metrics.stats.Avg;
-import org.apache.kafka.common.metrics.stats.Max;
-import org.apache.kafka.common.metrics.stats.Percentile;
-import org.apache.kafka.common.metrics.stats.Percentiles;
-import org.apache.kafka.common.metrics.stats.Rate;
-import org.apache.kafka.common.metrics.stats.Total;
-import org.apache.kafka.common.serialization.StringDeserializer;
-import org.apache.kafka.common.utils.SystemTime;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class ConsumeService implements Service {
-  private static final Logger LOG = LoggerFactory.getLogger(ConsumeService.class);
-  private static final String METRIC_GROUP_NAME = "consume-service";
-  private static final String[] NONOVERRIDABLE_PROPERTIES =
-    new String[] {ConsumeServiceConfig.BOOTSTRAP_SERVERS_CONFIG,
-      ConsumeServiceConfig.ZOOKEEPER_CONNECT_CONFIG};
-
-  private final String _name;
-  private final ConsumeMetrics _sensors;
-  private final KMBaseConsumer _consumer;
-  private final Thread _thread;
-  private final int _latencyPercentileMaxMs;
-  private final int _latencyPercentileGranularityMs;
-  private final AtomicBoolean _running;
-  private final int _latencySlaMs;
-
-  public ConsumeService(Map<String, Object> props, String name) throws Exception {
-    _name = name;
-    Map consumerPropsOverride = props.containsKey(ConsumeServiceConfig.CONSUMER_PROPS_CONFIG)
-      ? (Map) props.get(ConsumeServiceConfig.CONSUMER_PROPS_CONFIG) : new HashMap<>();
-    ConsumeServiceConfig config = new ConsumeServiceConfig(props);
-    String topic = config.getString(ConsumeServiceConfig.TOPIC_CONFIG);
-    String zkConnect = config.getString(ConsumeServiceConfig.ZOOKEEPER_CONNECT_CONFIG);
-    String brokerList = config.getString(ConsumeServiceConfig.BOOTSTRAP_SERVERS_CONFIG);
-    String consumerClassName = config.getString(ConsumeServiceConfig.CONSUMER_CLASS_CONFIG);
-    _latencySlaMs = config.getInt(ConsumeServiceConfig.LATENCY_SLA_MS_CONFIG);
-    _latencyPercentileMaxMs = config.getInt(ConsumeServiceConfig.LATENCY_PERCENTILE_MAX_MS_CONFIG);
-    _latencyPercentileGranularityMs = config.getInt(ConsumeServiceConfig.LATENCY_PERCENTILE_GRANULARITY_MS_CONFIG);
-    _running = new AtomicBoolean(false);
-
-    for (String property: NONOVERRIDABLE_PROPERTIES) {
-      if (consumerPropsOverride.containsKey(property)) {
-        throw new ConfigException("Override must not contain " + property + " config.");
-      }
-    }
-
-    Properties consumerProps = new Properties();
-
-    // Assign default config. This has the lowest priority.
-    consumerProps.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
-    consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
-    consumerProps.put(ConsumerConfig.CLIENT_ID_CONFIG, "kmf-consumer");
-    consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, "kmf-consumer-group-" + new Random().nextInt());
-    consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
-    consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
-
-    if (consumerClassName.equals(NewConsumer.class.getCanonicalName()) || consumerClassName.equals(NewConsumer.class.getSimpleName())) {
-      consumerClassName = NewConsumer.class.getCanonicalName();
-    } else if (consumerClassName.equals(OldConsumer.class.getCanonicalName()) || consumerClassName.equals(OldConsumer.class.getSimpleName())) {
-      consumerClassName = OldConsumer.class.getCanonicalName();
-      // The name/value of these configs are changed in the new consumer.
-      consumerProps.put("auto.commit.enable", "false");
-      consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "largest");
-    }
-
-    // Assign config specified for ConsumeService.
-    consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
-    consumerProps.put("zookeeper.connect", zkConnect);
-
-    // Assign config specified for consumer. This has the highest priority.
-    consumerProps.putAll(consumerPropsOverride);
-
-    _consumer = (KMBaseConsumer) Class.forName(consumerClassName).getConstructor(String.class, Properties.class).newInstance(topic, consumerProps);
-
-    _thread = new Thread(new Runnable() {
-      @Override
-      public void run() {
-        try {
-          consume();
-        } catch (Exception e) {
-          LOG.error(_name + "/ConsumeService failed", e);
-        }
-      }
-    }, _name + " consume-service");
-    _thread.setDaemon(true);
-
-    MetricConfig metricConfig = new MetricConfig().samples(60).timeWindow(1000, TimeUnit.MILLISECONDS);
-    List<MetricsReporter> reporters = new ArrayList<>();
-    reporters.add(new JmxReporter(JMX_PREFIX));
-    Metrics metrics = new Metrics(metricConfig, reporters, new SystemTime());
-    Map<String, String> tags = new HashMap<>();
-    tags.put("name", _name);
-    _sensors = new ConsumeMetrics(metrics, tags);
-  }
-
-  private void consume() throws Exception {
-    // Delay 1 second to reduce the chance that consumer creates topic before TopicManagementService
-    Thread.sleep(1000);
-
-    Map<Integer, Long> nextIndexes = new HashMap<>();
-
-    while (_running.get()) {
-      BaseConsumerRecord record;
-      try {
-        record = _consumer.receive();
-      } catch (Exception e) {
-        _sensors._consumeError.record();
-        LOG.warn(_name + "/ConsumeService failed to receive record", e);
-        // Avoid busy while loop
-        Thread.sleep(100);
-        continue;
-      }
-
-      if (record == null)
-        continue;
-
-      GenericRecord avroRecord = Utils.genericRecordFromJson(record.value());
-      if (avroRecord == null) {
-        _sensors._consumeError.record();
-        continue;
-      }
-      int partition = record.partition();
-      long index = (Long) avroRecord.get(DefaultTopicSchema.INDEX_FIELD.name());
-      long currMs = System.currentTimeMillis();
-      long prevMs = (Long) avroRecord.get(DefaultTopicSchema.TIME_FIELD.name());
-      _sensors._recordsConsumed.record();
-      _sensors._bytesConsumed.record(record.value().length());
-      _sensors._recordsDelay.record(currMs - prevMs);
-
-      if (currMs - prevMs > _latencySlaMs)
-        _sensors._recordsDelayed.record();
-
-      if (index == -1L || !nextIndexes.containsKey(partition)) {
-        nextIndexes.put(partition, -1L);
-        continue;
-      }
-
-      long nextIndex = nextIndexes.get(partition);
-      if (nextIndex == -1 || index == nextIndex) {
-        nextIndexes.put(partition, index + 1);
-      } else if (index < nextIndex) {
-        _sensors._recordsDuplicated.record();
-      } else if (index > nextIndex) {
-        nextIndexes.put(partition, index + 1);
-        _sensors._recordsLost.record(index - nextIndex);
-      }
-    }
-  }
-
-  @Override
-  public synchronized void start() {
-    if (_running.compareAndSet(false, true)) {
-      _thread.start();
-      LOG.info("{}/ConsumeService started", _name);
-    }
-  }
-
-  @Override
-  public synchronized void stop() {
-    if (_running.compareAndSet(true, false)) {
-      try {
-        _consumer.close();
-      } catch (Exception e) {
-        LOG.warn(_name + "/ConsumeService while trying to close consumer.", e);
-      }
-      LOG.info("{}/ConsumeService stopped", _name);
-    }
-  }
-
-  @Override
-  public void awaitShutdown() {
-    LOG.info("{}/ConsumeService shutdown completed", _name);
-  }
-
-  @Override
-  public boolean isRunning() {
-    return _running.get() && _thread.isAlive();
-  }
-
-  private class ConsumeMetrics {
-    public final Metrics metrics;
-    private final Sensor _bytesConsumed;
-    private final Sensor _consumeError;
-    private final Sensor _recordsConsumed;
-    private final Sensor _recordsDuplicated;
-    private final Sensor _recordsLost;
-    private final Sensor _recordsDelay;
-    private final Sensor _recordsDelayed;
-
-    public ConsumeMetrics(Metrics metrics, final Map<String, String> tags) {
-      this.metrics = metrics;
-
-      _bytesConsumed = metrics.sensor("bytes-consumed");
-      _bytesConsumed.add(new MetricName("bytes-consumed-rate", METRIC_GROUP_NAME, "The average number of bytes per second that are consumed", tags), new Rate());
-
-      _consumeError = metrics.sensor("consume-error");
-      _consumeError.add(new MetricName("consume-error-rate", METRIC_GROUP_NAME, "The average number of errors per second", tags), new Rate());
-      _consumeError.add(new MetricName("consume-error-total", METRIC_GROUP_NAME, "The total number of errors", tags), new Total());
-
-      _recordsConsumed = metrics.sensor("records-consumed");
-      _recordsConsumed.add(new MetricName("records-consumed-rate", METRIC_GROUP_NAME, "The average number of records per second that are consumed", tags), new Rate());
-      _recordsConsumed.add(new MetricName("records-consumed-total", METRIC_GROUP_NAME, "The total number of records that are consumed", tags), new Total());
-
-      _recordsDuplicated = metrics.sensor("records-duplicated");
-      _recordsDuplicated.add(new MetricName("records-duplicated-rate", METRIC_GROUP_NAME, "The average number of records per second that are duplicated", tags), new Rate());
-      _recordsDuplicated.add(new MetricName("records-duplicated-total", METRIC_GROUP_NAME, "The total number of records that are duplicated", tags), new Total());
-
-      _recordsLost = metrics.sensor("records-lost");
-      _recordsLost.add(new MetricName("records-lost-rate", METRIC_GROUP_NAME, "The average number of records per second that are lost", tags), new Rate());
-      _recordsLost.add(new MetricName("records-lost-total", METRIC_GROUP_NAME, "The total number of records that are lost", tags), new Total());
-
-      _recordsDelayed = metrics.sensor("records-delayed");
-      _recordsDelayed.add(new MetricName("records-delayed-rate", METRIC_GROUP_NAME, "The average number of records per second that are either lost or arrive after maximum allowed latency under SLA", tags), new Rate());
-      _recordsDelayed.add(new MetricName("records-delayed-total", METRIC_GROUP_NAME, "The total number of records that are either lost or arrive after maximum allowed latency under SLA", tags), new Total());
-
-      _recordsDelay = metrics.sensor("records-delay");
-      _recordsDelay.add(new MetricName("records-delay-ms-avg", METRIC_GROUP_NAME, "The average latency of records from producer to consumer", tags), new Avg());
-      _recordsDelay.add(new MetricName("records-delay-ms-max", METRIC_GROUP_NAME, "The maximum latency of records from producer to consumer", tags), new Max());
-
-      // There are 2 extra buckets use for values smaller than 0.0 or larger than max, respectively.
-      int bucketNum = _latencyPercentileMaxMs / _latencyPercentileGranularityMs + 2;
-      int sizeInBytes = 4 * bucketNum;
-      _recordsDelay.add(new Percentiles(sizeInBytes, _latencyPercentileMaxMs, Percentiles.BucketSizing.CONSTANT,
-        new Percentile(new MetricName("records-delay-ms-99th", METRIC_GROUP_NAME, "The 99th percentile latency of records from producer to consumer", tags), 99.0),
-        new Percentile(new MetricName("records-delay-ms-999th", METRIC_GROUP_NAME, "The 999th percentile latency of records from producer to consumer", tags), 99.9)));
-
-      metrics.addMetric(new MetricName("consume-availability-avg", METRIC_GROUP_NAME, "The average consume availability", tags),
-        new Measurable() {
-          @Override
-          public double measure(MetricConfig config, long now) {
-            double recordsConsumedRate = _sensors.metrics.metrics().get(new MetricName("records-consumed-rate", METRIC_GROUP_NAME, tags)).value();
-            double recordsLostRate = _sensors.metrics.metrics().get(new MetricName("records-lost-rate", METRIC_GROUP_NAME, tags)).value();
-            double recordsDelayedRate = _sensors.metrics.metrics().get(new MetricName("records-delayed-rate", METRIC_GROUP_NAME, tags)).value();
-
-            if (new Double(recordsLostRate).isNaN())
-              recordsLostRate = 0;
-            if (new Double(recordsDelayedRate).isNaN())
-              recordsDelayedRate = 0;
-
-            double consumeAvailability = recordsConsumedRate + recordsLostRate > 0
-              ? (recordsConsumedRate - recordsDelayedRate) / (recordsConsumedRate + recordsLostRate) : 0;
-
-            return consumeAvailability;
-          }
-        }
-      );
-    }
-
-  }
-
-}
\ No newline at end of file
diff --git a/src/main/java/com/linkedin/kmf/services/JettyService.java b/src/main/java/com/linkedin/kmf/services/JettyService.java
deleted file mode 100644
index 42b74ae5..00000000
--- a/src/main/java/com/linkedin/kmf/services/JettyService.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
- * file except in compliance with the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
- * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-package com.linkedin.kmf.services;
-
-import com.linkedin.kmf.services.configs.JettyServiceConfig;
-import org.eclipse.jetty.server.Server;
-import org.eclipse.jetty.server.handler.ResourceHandler;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Map;
-
-// Jetty server that serves html files.
-public class JettyService implements Service {
-  private static final Logger LOG = LoggerFactory.getLogger(JettyService.class);
-
-  private final String _name;
-  private final Server _jettyServer;
-  private final int _port;
-
-  public JettyService(Map<String, Object> props, String name) {
-    _name = name;
-    JettyServiceConfig config = new JettyServiceConfig(props);
-    _port = config.getInt(JettyServiceConfig.PORT_CONFIG);
-    _jettyServer = new Server(_port);
-    ResourceHandler resourceHandler = new ResourceHandler();
-    resourceHandler.setDirectoriesListed(true);
-    resourceHandler.setWelcomeFiles(new String[]{"index.html"});
-    resourceHandler.setResourceBase("webapp");
-    _jettyServer.setHandler(resourceHandler);
-  }
-
-  public synchronized void start() {
-    try {
-      _jettyServer.start();
-      LOG.info("{}/JettyService started at port {}", _name, _port);
-    } catch (Exception e) {
-      LOG.error(_name + "/JettyService failed to start", e);
-    }
-  }
-
-  public synchronized void stop() {
-    try {
-      _jettyServer.stop();
-      LOG.info("{}/JettyService stopped", _name);
-    } catch (Exception e) {
-      LOG.error(_name + "/JettyService failed to stop", e);
-    }
-  }
-
-  public boolean isRunning() {
-    return _jettyServer.isRunning();
-  }
-
-  public void awaitShutdown() {
-
-  }
-
-}
diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java
deleted file mode 100644
index d869d41c..00000000
--- a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java
+++ /dev/null
@@ -1,380 +0,0 @@
-/**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
- * file except in compliance with the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
- * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-
-package com.linkedin.kmf.services;
-
-import com.linkedin.kmf.common.Utils;
-import com.linkedin.kmf.services.configs.CommonServiceConfig;
-import com.linkedin.kmf.services.configs.MultiClusterTopicManagementServiceConfig;
-import com.linkedin.kmf.services.configs.TopicManagementServiceConfig;
-import com.linkedin.kmf.topicfactory.TopicFactory;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-import java.util.concurrent.Executors;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.ThreadFactory;
-import java.util.concurrent.TimeUnit;
-import kafka.admin.AdminOperationException;
-import java.util.concurrent.atomic.AtomicBoolean;
-import kafka.admin.AdminUtils;
-import kafka.admin.BrokerMetadata;
-import kafka.admin.PreferredReplicaLeaderElectionCommand;
-import kafka.admin.RackAwareMode;
-import kafka.cluster.Broker;
-import kafka.common.TopicAndPartition;
-import kafka.utils.ZkUtils;
-import org.I0Itec.zkclient.exception.ZkNodeExistsException;
-import org.apache.kafka.common.Node;
-import org.apache.kafka.common.PartitionInfo;
-import org.apache.kafka.common.config.ConfigException;
-import org.apache.kafka.common.security.JaasUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import scala.collection.Seq;
-
-import static com.linkedin.kmf.common.Utils.ZK_CONNECTION_TIMEOUT_MS;
-import static com.linkedin.kmf.common.Utils.ZK_SESSION_TIMEOUT_MS;
-
-/**
- * This service periodically checks and rebalances the monitor topics across a pipeline of Kafka clusters so that
- * leadership of the partitions of the monitor topic in each cluster is distributed evenly across brokers in the cluster.
- *
- * More specifically, this service may do some or all of the following tasks depending on the config:
- *
- * - Create the monitor topic using the user-specified replication factor and partition number
- * - Increase partition number of the monitor topic if either partitionsToBrokersRatio or minPartitionNum is not satisfied
- * - Increase replication factor of the monitor topic if the user-specified replicationFactor is not satisfied
- * - Reassign partition across brokers to make sure each broker acts as preferred leader of at least one partition of the monitor topic
- * - Trigger preferred leader election to make sure each broker acts as leader of at least one partition of the monitor topic.
- * - Make sure the number of partitions of the monitor topic is same across all monitored custers.
- *
- */
-public class MultiClusterTopicManagementService implements Service {
-  private static final Logger LOG = LoggerFactory.getLogger(MultiClusterTopicManagementService.class);
-
-  private final AtomicBoolean _isRunning = new AtomicBoolean(false);
-  private final String _serviceName;
-  private final Map<String, TopicManagementHelper> _topicManagementByCluster;
-  private final int _scheduleIntervalMs;
-  private final ScheduledExecutorService _executor;
-
-  public MultiClusterTopicManagementService(Map<String, Object> props, String serviceName) throws Exception {
-    _serviceName = serviceName;
-    MultiClusterTopicManagementServiceConfig config = new MultiClusterTopicManagementServiceConfig(props);
-    String topic = config.getString(CommonServiceConfig.TOPIC_CONFIG);
-    Map<String, Map> propsByCluster = props.containsKey(MultiClusterTopicManagementServiceConfig.PROPS_PER_CLUSTER_CONFIG)
-        ? (Map) props.get(MultiClusterTopicManagementServiceConfig.PROPS_PER_CLUSTER_CONFIG) : new HashMap<>();
-    _topicManagementByCluster = initializeTopicManagementHelper(propsByCluster, topic);
-    _scheduleIntervalMs = config.getInt(MultiClusterTopicManagementServiceConfig.REBALANCE_INTERVAL_MS_CONFIG);
-    _executor = Executors.newSingleThreadScheduledExecutor(new ThreadFactory() {
-      @Override
-      public Thread newThread(Runnable r) {
-        return new Thread(r, _serviceName + "-multi-cluster-topic-management-service");
-      }
-    });
-  }
-
-  private Map<String, TopicManagementHelper> initializeTopicManagementHelper(Map<String, Map> propsByCluster, String topic) throws Exception {
-    Map<String, TopicManagementHelper> topicManagementByCluster = new HashMap<>();
-    for (Map.Entry<String, Map> entry: propsByCluster.entrySet()) {
-      String clusterName = entry.getKey();
-      Map serviceProps = entry.getValue();
-      if (serviceProps.containsKey(MultiClusterTopicManagementServiceConfig.TOPIC_CONFIG))
-        throw new ConfigException("The raw per-cluster config for MultiClusterTopicManagementService must not contain " +
-            MultiClusterTopicManagementServiceConfig.TOPIC_CONFIG);
-      serviceProps.put(MultiClusterTopicManagementServiceConfig.TOPIC_CONFIG, topic);
-      topicManagementByCluster.put(clusterName, new TopicManagementHelper(serviceProps));
-    }
-    return topicManagementByCluster;
-  }
-
-  @Override
-  public synchronized void start() {
-    if (_isRunning.compareAndSet(false, true)) {
-      Runnable r = new TopicManagementRunnable();
-      _executor.scheduleWithFixedDelay(r, 0, _scheduleIntervalMs, TimeUnit.MILLISECONDS);
-      LOG.info("{}/MultiClusterTopicManagementService started.", _serviceName);
-    }
-  }
-
-  @Override
-  public synchronized void stop() {
-    if (_isRunning.compareAndSet(true, false)) {
-      _executor.shutdown();
-      LOG.info("{}/MultiClusterTopicManagementService stopped.", _serviceName);
-    }
-  }
-
-  @Override
-  public boolean isRunning() {
-    return _isRunning.get() && !_executor.isShutdown();
-  }
-
-  @Override
-  public void awaitShutdown() {
-    try {
-      _executor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
-    } catch (InterruptedException e) {
-      LOG.info("Thread interrupted when waiting for {}/MultiClusterTopicManagementService to shutdown", _serviceName);
-    }
-    LOG.info("{}/MultiClusterTopicManagementService shutdown completed", _serviceName);
-  }
-
-  private class TopicManagementRunnable implements Runnable {
-    @Override
-    public void run() {
-      try {
-        for (TopicManagementHelper helper : _topicManagementByCluster.values()) {
-          helper.maybeCreateTopic();
-        }
-
-        /*
-         * The partition number of the monitor topics should be the minimum partition number that satisifies the following conditions:
-         * - partition number of the monitor topics across all monitored clusters should be the same
-         * - partitionNum / brokerNum >= user-configured partitionsToBrokersRatio.
-         * - partitionNum >= user-configured minPartitionNum
-         */
-
-        int minPartitionNum = 0;
-        for (TopicManagementHelper helper : _topicManagementByCluster.values()) {
-          minPartitionNum = Math.max(minPartitionNum, helper.minPartitionNum());
-        }
-        for (TopicManagementHelper helper : _topicManagementByCluster.values()) {
-          helper.maybeAddPartitions(minPartitionNum);
-        }
-
-        for (Map.Entry<String, TopicManagementHelper> entry : _topicManagementByCluster.entrySet()) {
-          String clusterName = entry.getKey();
-          TopicManagementHelper helper = entry.getValue();
-          try {
-            helper.maybeReassignPartitionAndElectLeader();
-          } catch (IOException | ZkNodeExistsException | AdminOperationException e) {
-            LOG.warn(_serviceName + "/MultiClusterTopicManagementService will retry later in cluster " + clusterName, e);
-          }
-        }
-      } catch (Exception e) {
-        LOG.error(_serviceName + "/MultiClusterTopicManagementService will stop due to error.", e);
-        stop();
-      }
-    }
-  }
-
-  static class TopicManagementHelper {
-
-    private final boolean _topicCreationEnabled;
-    private final String _topic;
-    private final String _zkConnect;
-    private final int _replicationFactor;
-    private final double _minPartitionsToBrokersRatio;
-    private final int _minPartitionNum;
-    private final TopicFactory _topicFactory;
-    private final Properties _topicProperties;
-
-    TopicManagementHelper(Map<String, Object> props) throws Exception {
-      TopicManagementServiceConfig config = new TopicManagementServiceConfig(props);
-      _topicCreationEnabled = config.getBoolean(TopicManagementServiceConfig.TOPIC_CREATION_ENABLED_CONFIG);
-      _topic = config.getString(TopicManagementServiceConfig.TOPIC_CONFIG);
-      _zkConnect = config.getString(TopicManagementServiceConfig.ZOOKEEPER_CONNECT_CONFIG);
-      _replicationFactor = config.getInt(TopicManagementServiceConfig.TOPIC_REPLICATION_FACTOR_CONFIG);
-      _minPartitionsToBrokersRatio = config.getDouble(TopicManagementServiceConfig.PARTITIONS_TO_BROKERS_RATIO_CONFIG);
-      _minPartitionNum = config.getInt(TopicManagementServiceConfig.MIN_PARTITION_NUM_CONFIG);
-      String topicFactoryClassName = config.getString(TopicManagementServiceConfig.TOPIC_FACTORY_CLASS_CONFIG);
-      _topicProperties = new Properties();
-      if (props.containsKey(TopicManagementServiceConfig.TOPIC_PROPS_CONFIG))
-        _topicProperties.putAll((Map) props.get(TopicManagementServiceConfig.TOPIC_PROPS_CONFIG));
-
-      Map topicFactoryConfig = props.containsKey(TopicManagementServiceConfig.TOPIC_FACTORY_PROPS_CONFIG) ?
-          (Map) props.get(TopicManagementServiceConfig.TOPIC_FACTORY_PROPS_CONFIG) : new HashMap();
-      _topicFactory = (TopicFactory) Class.forName(topicFactoryClassName).getConstructor(Map.class).newInstance(topicFactoryConfig);
-    }
-
-    void maybeCreateTopic() throws Exception {
-      if (_topicCreationEnabled) {
-        _topicFactory.createTopicIfNotExist(_zkConnect, _topic, _replicationFactor, _minPartitionsToBrokersRatio, _topicProperties);
-      }
-    }
-
-    int minPartitionNum() {
-      int brokerCount = Utils.getBrokerCount(_zkConnect);
-      return Math.max((int) Math.ceil(_minPartitionsToBrokersRatio * brokerCount), _minPartitionNum);
-    }
-
-    void maybeAddPartitions(int minPartitionNum) {
-      ZkUtils zkUtils = ZkUtils.apply(_zkConnect, ZK_SESSION_TIMEOUT_MS, ZK_CONNECTION_TIMEOUT_MS, JaasUtils.isZkSecurityEnabled());
-      try {
-        int partitionNum = getPartitionInfo(zkUtils, _topic).size();
-        if (partitionNum < minPartitionNum) {
-          LOG.info("MultiClusterTopicManagementService will increase partition of the topic {} "
-              + "in cluster {} from {} to {}.", _topic, _zkConnect, partitionNum, minPartitionNum);
-          AdminUtils.addPartitions(zkUtils, _topic, minPartitionNum, null, false, RackAwareMode.Enforced$.MODULE$);
-        }
-      } finally {
-        zkUtils.close();
-      }
-    }
-
-    void maybeReassignPartitionAndElectLeader() throws Exception {
-      ZkUtils zkUtils = ZkUtils.apply(_zkConnect, ZK_SESSION_TIMEOUT_MS, ZK_CONNECTION_TIMEOUT_MS, JaasUtils.isZkSecurityEnabled());
-
-      try {
-        List<PartitionInfo> partitionInfoList = getPartitionInfo(zkUtils, _topic);
-        Collection<Broker> brokers = scala.collection.JavaConversions.asJavaCollection(zkUtils.getAllBrokersInCluster());
-
-        if (partitionInfoList.size() == 0)
-          throw new IllegalStateException("Topic " + _topic + " does not exist in cluster " + _zkConnect);
-
-        int currentReplicationFactor = getReplicationFactor(partitionInfoList);
-
-        if (_replicationFactor < currentReplicationFactor)
-          throw new RuntimeException(String.format("Configured replication factor %d "
-                  + "is smaller than the current replication factor %d of the topic %s in cluster %s",
-              _replicationFactor, currentReplicationFactor, _topic, _zkConnect));
-
-        if (_replicationFactor > currentReplicationFactor && zkUtils.getPartitionsBeingReassigned().isEmpty()) {
-          LOG.info("MultiClusterTopicManagementService will increase the replication factor of the topic {} in cluster {}", _topic, _zkConnect);
-          reassignPartitions(zkUtils, brokers, _topic, partitionInfoList.size(), _replicationFactor);
-        }
-
-        if (partitionInfoList.size() >= brokers.size() &&
-            someBrokerNotPreferredLeader(partitionInfoList, brokers) &&
-            zkUtils.getPartitionsBeingReassigned().isEmpty()) {
-          LOG.info("MultiClusterTopicManagementService will reassign partitions of the topic {} in cluster {}", _topic, _zkConnect);
-          reassignPartitions(zkUtils, brokers, _topic, partitionInfoList.size(), _replicationFactor);
-        }
-
-        if (partitionInfoList.size() >= brokers.size() &&
-            someBrokerNotElectedLeader(partitionInfoList, brokers)) {
-          LOG.info("MultiClusterTopicManagementService will trigger preferred leader election for the topic {} in cluster {}", _topic, _zkConnect);
-          triggerPreferredLeaderElection(zkUtils, partitionInfoList);
-        }
-      } finally {
-        zkUtils.close();
-      }
-    }
-
-    private static void triggerPreferredLeaderElection(ZkUtils zkUtils, List<PartitionInfo> partitionInfoList) {
-      scala.collection.mutable.HashSet<TopicAndPartition> scalaPartitionInfoSet = new scala.collection.mutable.HashSet<>();
-      for (PartitionInfo javaPartitionInfo : partitionInfoList) {
-        scalaPartitionInfoSet.add(new TopicAndPartition(javaPartitionInfo.topic(), javaPartitionInfo.partition()));
-      }
-      PreferredReplicaLeaderElectionCommand.writePreferredReplicaElectionData(zkUtils, scalaPartitionInfoSet);
-    }
-
-    private static void reassignPartitions(ZkUtils zkUtils, Collection<Broker> brokers, String topic, int partitionCount, int replicationFactor) {
-      scala.collection.mutable.ArrayBuffer<BrokerMetadata> brokersMetadata = new scala.collection.mutable.ArrayBuffer<>(brokers.size());
-      for (Broker broker : brokers) {
-        brokersMetadata.$plus$eq(new BrokerMetadata(broker.id(), broker.rack()));
-      }
-      scala.collection.Map<Object, Seq<Object>> partitionToReplicas =
-          AdminUtils.assignReplicasToBrokers(brokersMetadata, partitionCount, replicationFactor, 0, 0);
-      String jsonReassignmentData = formatAsReassignmentJson(topic, partitionToReplicas);
-      zkUtils.createPersistentPath(ZkUtils.ReassignPartitionsPath(), jsonReassignmentData, zkUtils.DefaultAcls());
-    }
-
-    private static List<PartitionInfo> getPartitionInfo(ZkUtils zkUtils, String topic) {
-      scala.collection.mutable.ArrayBuffer<String> topicList = new scala.collection.mutable.ArrayBuffer<>();
-      topicList.$plus$eq(topic);
-      scala.collection.Map<Object, scala.collection.Seq<Object>> partitionAssignments =
-          zkUtils.getPartitionAssignmentForTopics(topicList).apply(topic);
-      List<PartitionInfo> partitionInfoList = new ArrayList<>();
-      scala.collection.Iterator<scala.Tuple2<Object, scala.collection.Seq<Object>>> it = partitionAssignments.iterator();
-      while (it.hasNext()) {
-        scala.Tuple2<Object, scala.collection.Seq<Object>> scalaTuple = it.next();
-        Integer partition = (Integer) scalaTuple._1();
-        scala.Option<Object> leaderOption = zkUtils.getLeaderForPartition(topic, partition);
-        Node leader = leaderOption.isEmpty() ?  null : new Node((Integer) leaderOption.get(), "", -1);
-        Node[] replicas = new Node[scalaTuple._2().size()];
-        for (int i = 0; i < replicas.length; i++) {
-          Integer brokerId = (Integer) scalaTuple._2().apply(i);
-          replicas[i] = new Node(brokerId, "", -1);
-        }
-        partitionInfoList.add(new PartitionInfo(topic, partition, leader, replicas, null));
-      }
-
-      return partitionInfoList;
-    }
-
-    static int getReplicationFactor(List<PartitionInfo> partitionInfoList) {
-      if (partitionInfoList.isEmpty())
-        throw new RuntimeException("Partition list is empty");
-
-      int replicationFactor = partitionInfoList.get(0).replicas().length;
-      for (PartitionInfo partitionInfo : partitionInfoList) {
-        if (replicationFactor != partitionInfo.replicas().length) {
-          String topic = partitionInfoList.get(0).topic();
-          throw new RuntimeException("Partitions of the topic " + topic + " have different replication factor");
-        }
-      }
-      return replicationFactor;
-    }
-
-    static boolean someBrokerNotPreferredLeader(List<PartitionInfo> partitionInfoList, Collection<Broker> brokers) {
-      Set<Integer> brokersNotPreferredLeader = new HashSet<>(brokers.size());
-      for (Broker broker: brokers)
-        brokersNotPreferredLeader.add(broker.id());
-      for (PartitionInfo partitionInfo : partitionInfoList)
-        brokersNotPreferredLeader.remove(partitionInfo.replicas()[0].id());
-
-      return !brokersNotPreferredLeader.isEmpty();
-    }
-
-    static boolean someBrokerNotElectedLeader(List<PartitionInfo> partitionInfoList, Collection<Broker> brokers) {
-      Set<Integer> brokersNotElectedLeader = new HashSet<>(brokers.size());
-      for (Broker broker: brokers)
-        brokersNotElectedLeader.add(broker.id());
-      for (PartitionInfo partitionInfo : partitionInfoList) {
-        if (partitionInfo.leader() != null)
-          brokersNotElectedLeader.remove(partitionInfo.leader().id());
-      }
-      return !brokersNotElectedLeader.isEmpty();
-    }
-
-    /**
-     * @param topic topic
-     * @param partitionsToBeReassigned a map from partition (int) to replica list (int seq)
-     *
-     * @return a json string with the same format as output of kafka.utils.ZkUtils.formatAsReassignmentJson
-     *
-     * Example:
-     * <pre>
-     *   {"version":1,"partitions":[
-     *     {"topic":"kmf-topic","partition":1,"replicas":[0,1]},
-     *     {"topic":"kmf-topic","partition":2,"replicas":[1,2]},
-     *     {"topic":"kmf-topic","partition":0,"replicas":[2,0]}]}
-     * </pre>
-     */
-    private static String formatAsReassignmentJson(String topic, scala.collection.Map<Object, Seq<Object>> partitionsToBeReassigned) {
-      StringBuilder bldr = new StringBuilder();
-      bldr.append("{\"version\":1,\"partitions\":[\n");
-      for (int partition = 0; partition < partitionsToBeReassigned.size(); partition++) {
-        bldr.append("  {\"topic\":\"").append(topic).append("\",\"partition\":").append(partition).append(",\"replicas\":[");
-        scala.collection.Seq<Object> replicas = partitionsToBeReassigned.apply(partition);
-        for (int replicaIndex = 0; replicaIndex < replicas.size(); replicaIndex++) {
-          Object replica = replicas.apply(replicaIndex);
-          bldr.append(replica).append(",");
-        }
-        bldr.setLength(bldr.length() - 1);
-        bldr.append("]},\n");
-      }
-      bldr.setLength(bldr.length() - 2);
-      bldr.append("]}");
-      return bldr.toString();
-    }
-
-  }
-}
-
diff --git a/src/main/java/com/linkedin/kmf/topicfactory/DefaultTopicFactory.java b/src/main/java/com/linkedin/kmf/topicfactory/DefaultTopicFactory.java
deleted file mode 100644
index 5158d24d..00000000
--- a/src/main/java/com/linkedin/kmf/topicfactory/DefaultTopicFactory.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
- * file except in compliance with the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
- * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-package com.linkedin.kmf.topicfactory;
-
-import com.linkedin.kmf.common.Utils;
-
-import java.util.Map;
-import java.util.Properties;
-
-
-public class DefaultTopicFactory implements TopicFactory {
-
-  /** This constructor is required by TopicFactory but does nothing. */
-  public DefaultTopicFactory(Map<String, ?> config) {
-  }
-
-  @Override
-  public int createTopicIfNotExist(String zkUrl, String topic, int replicationFactor, double partitionToBrokerRatio, Properties topicConfig) {
-    return Utils.createMonitoringTopicIfNotExists(zkUrl, topic, replicationFactor, partitionToBrokerRatio, topicConfig);
-  }
-}
diff --git a/src/main/java/com/linkedin/kmf/KafkaMonitor.java b/src/main/java/com/linkedin/xinfra/monitor/XinfraMonitor.java
similarity index 51%
rename from src/main/java/com/linkedin/kmf/KafkaMonitor.java
rename to src/main/java/com/linkedin/xinfra/monitor/XinfraMonitor.java
index daa623c4..d516b076 100644
--- a/src/main/java/com/linkedin/kmf/KafkaMonitor.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/XinfraMonitor.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,42 +7,39 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf;
 
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.linkedin.kmf.services.Service;
-import com.linkedin.kmf.apps.App;
+package com.linkedin.xinfra.monitor;
 
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.linkedin.xinfra.monitor.apps.App;
+import com.linkedin.xinfra.monitor.services.Service;
+import com.linkedin.xinfra.monitor.services.ServiceFactory;
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.lang.reflect.Constructor;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.kafka.common.metrics.JmxReporter;
-import org.apache.kafka.common.metrics.Measurable;
 import org.apache.kafka.common.metrics.MetricConfig;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.metrics.MetricsReporter;
 import org.apache.kafka.common.utils.SystemTime;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import java.io.BufferedReader;
-import java.io.FileReader;
-import java.util.Map;
-import java.util.concurrent.Executors;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.TimeUnit;
-
 
 /**
  * This is the main entry point of the monitor.  It reads the configuration and manages the life cycle of the monitoring
  * applications.
  */
-public class KafkaMonitor {
-  private static final Logger LOG = LoggerFactory.getLogger(KafkaMonitor.class);
-  public static final String CLASS_NAME_CONFIG = "class.name";
-  private static final String METRIC_GROUP_NAME = "kafka-monitor";
-  private static final String JMX_PREFIX = "kmf";
+public class XinfraMonitor {
+  private static final Logger LOG = LoggerFactory.getLogger(XinfraMonitor.class);
 
   /** This is concurrent because healthCheck() can modify this map, but awaitShutdown() can be called at any time by
    * a different thread.
@@ -51,27 +48,39 @@ public class KafkaMonitor {
   private final ConcurrentMap<String, Service> _services;
   private final ConcurrentMap<String, Object> _offlineRunnables;
   private final ScheduledExecutorService _executor;
-  /** When true start has been called on this instance of Kafka monitor. */
+  /** When true start has been called on this instance of Xinfra Monitor. */
   private final AtomicBoolean _isRunning = new AtomicBoolean(false);
 
-  public KafkaMonitor(Map<String, Map> testProps) throws Exception {
+  /**
+   * XinfraMonitor constructor creates apps and services for each of the individual clusters (properties) that's passed in.
+   * For example, if there are 10 clusters to be monitored, then this Constructor will create 10 * num_apps_per_cluster
+   * and 10 * num_services_per_cluster.
+   * @param allClusterProps the properties of ALL kafka clusters for which apps and services need to be appended.
+   * @throws Exception when exception occurs while assigning Apps and Services
+   */
+
+  @SuppressWarnings({"rawtypes"})
+  public XinfraMonitor(Map<String, Map> allClusterProps) throws Exception {
     _apps = new ConcurrentHashMap<>();
     _services = new ConcurrentHashMap<>();
 
-    for (Map.Entry<String, Map> entry : testProps.entrySet()) {
-      String name = entry.getKey();
-      Map props = entry.getValue();
-      if (!props.containsKey(CLASS_NAME_CONFIG))
-        throw new IllegalArgumentException(name + " is not configured with " + CLASS_NAME_CONFIG);
-      String className = (String) props.get(CLASS_NAME_CONFIG);
-
-      Class<?> cls = Class.forName(className);
-      if (App.class.isAssignableFrom(cls)) {
-        App test = (App) Class.forName(className).getConstructor(Map.class, String.class).newInstance(props, name);
-        _apps.put(name, test);
-      } else if (Service.class.isAssignableFrom(cls)) {
-        Service service = (Service) Class.forName(className).getConstructor(Map.class, String.class).newInstance(props, name);
-        _services.put(name, service);
+    for (Map.Entry<String, Map> clusterProperty : allClusterProps.entrySet()) {
+      String clusterName = clusterProperty.getKey();
+      Map props = clusterProperty.getValue();
+      if (!props.containsKey(XinfraMonitorConstants.CLASS_NAME_CONFIG))
+        throw new IllegalArgumentException(clusterName + " is not configured with " + XinfraMonitorConstants.CLASS_NAME_CONFIG);
+      String className = (String) props.get(XinfraMonitorConstants.CLASS_NAME_CONFIG);
+
+      Class<?> aClass = Class.forName(className);
+      if (App.class.isAssignableFrom(aClass)) {
+        App clusterApp = (App) Class.forName(className).getConstructor(Map.class, String.class).newInstance(props, clusterName);
+        _apps.put(clusterName, clusterApp);
+      } else if (Service.class.isAssignableFrom(aClass)) {
+        ServiceFactory serviceFactory = (ServiceFactory) Class.forName(className + XinfraMonitorConstants.FACTORY)
+            .getConstructor(Map.class, String.class)
+            .newInstance(props, clusterName);
+        Service service = serviceFactory.createService();
+        _services.put(clusterName, service);
       } else {
         throw new IllegalArgumentException(className + " should implement either " + App.class.getSimpleName() + " or " + Service.class.getSimpleName());
       }
@@ -79,19 +88,22 @@ public KafkaMonitor(Map<String, Map> testProps) throws Exception {
     _executor = Executors.newSingleThreadScheduledExecutor();
     _offlineRunnables = new ConcurrentHashMap<>();
     List<MetricsReporter> reporters = new ArrayList<>();
-    reporters.add(new JmxReporter(JMX_PREFIX));
+    reporters.add(new JmxReporter(XinfraMonitorConstants.JMX_PREFIX));
     Metrics metrics = new Metrics(new MetricConfig(), reporters, new SystemTime());
-    metrics.addMetric(metrics.metricName("offline-runnable-count", METRIC_GROUP_NAME, "The number of Service/App that are not fully running"),
-        new Measurable() {
-          @Override
-          public double measure(MetricConfig config, long now) {
-            return _offlineRunnables.size();
-          }
-        }
-    );
+    metrics.addMetric(metrics.metricName("offline-runnable-count", XinfraMonitorConstants.METRIC_GROUP_NAME, "The number of Service/App that are not fully running"),
+      (config, now) -> _offlineRunnables.size());
+  }
+
+  private boolean constructorContainsClass(Constructor<?>[] constructors, Class<?> classObject) {
+    for (int n = 0; n < constructors[0].getParameterTypes().length; ++n) {
+      if (constructors[0].getParameterTypes()[n].equals(classObject)) {
+        return true;
+      }
+    }
+    return false;
   }
 
-  public synchronized void start() {
+  public synchronized void start() throws Exception {
     if (!_isRunning.compareAndSet(false, true)) {
       return;
     }
@@ -102,34 +114,37 @@ public synchronized void start() {
       entry.getValue().start();
     }
 
-    _executor.scheduleAtFixedRate(
-      new Runnable() {
-        @Override
-        public void run() {
-          try {
-            checkHealth();
-          } catch (Exception e) {
-            LOG.error("Failed to check health of tests and services", e);
-          }
-        }
-      }, 5, 5, TimeUnit.SECONDS
+    long initialDelaySecond = 5;
+    long periodSecond = 5;
+
+    _executor.scheduleAtFixedRate(() -> {
+      try {
+        checkHealth();
+      } catch (Exception e) {
+        LOG.error("Failed to check health of apps and services", e);
+      }
+    }, initialDelaySecond, periodSecond, TimeUnit.SECONDS
     );
   }
 
   private void checkHealth() {
     for (Map.Entry<String, App> entry: _apps.entrySet()) {
-      if (!entry.getValue().isRunning()) {
+      if (!entry.getValue().isRunning())
         _offlineRunnables.putIfAbsent(entry.getKey(), entry.getValue());
-        LOG.error("App " + entry.getKey() + " is not fully running.");
-      }
     }
 
     for (Map.Entry<String, Service> entry: _services.entrySet()) {
-      if (!entry.getValue().isRunning()) {
+      if (!entry.getValue().isRunning())
         _offlineRunnables.putIfAbsent(entry.getKey(), entry.getValue());
+    }
+
+    for (Map.Entry<String, Object> entry: _offlineRunnables.entrySet()) {
+      if (entry.getValue() instanceof App)
+        LOG.error("App " + entry.getKey() + " is not fully running.");
+      else
         LOG.error("Service " + entry.getKey() + " is not fully running.");
-      }
     }
+
   }
 
   public synchronized void stop() {
@@ -137,26 +152,26 @@ public synchronized void stop() {
       return;
     }
     _executor.shutdownNow();
-    for (App test: _apps.values())
-      test.stop();
+    for (App app: _apps.values())
+      app.stop();
     for (Service service: _services.values())
       service.stop();
   }
 
   public void awaitShutdown() {
-    for (App test: _apps.values())
-      test.awaitShutdown();
+    for (App app: _apps.values())
+      app.awaitShutdown();
     for (Service service: _services.values())
-      service.awaitShutdown();
+      service.awaitShutdown(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
   }
 
+  @SuppressWarnings("rawtypes")
   public static void main(String[] args) throws Exception {
     if (args.length <= 0) {
-      LOG.info("USAGE: java [options] " + KafkaMonitor.class.getName() + " config/kafka-monitor.properties");
+      LOG.info("USAGE: java [options] " + XinfraMonitor.class.getName() + " config/xinfra-monitor.properties");
       return;
     }
 
-
     StringBuilder buffer = new StringBuilder();
     try (BufferedReader br = new BufferedReader(new FileReader(args[0].trim()))) {
       String line;
@@ -168,11 +183,11 @@ public static void main(String[] args) throws Exception {
 
     @SuppressWarnings("unchecked")
     Map<String, Map> props = new ObjectMapper().readValue(buffer.toString(), Map.class);
-    KafkaMonitor kafkaMonitor = new KafkaMonitor(props);
-    kafkaMonitor.start();
-    LOG.info("KafkaMonitor started");
+    XinfraMonitor xinfraMonitor = new XinfraMonitor(props);
+    xinfraMonitor.start();
+    LOG.info("Xinfra Monitor has started.");
 
-    kafkaMonitor.awaitShutdown();
+    xinfraMonitor.awaitShutdown();
   }
 
 }
diff --git a/src/main/java/com/linkedin/xinfra/monitor/XinfraMonitorConstants.java b/src/main/java/com/linkedin/xinfra/monitor/XinfraMonitorConstants.java
new file mode 100644
index 00000000..f22c63c9
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/XinfraMonitorConstants.java
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor;
+
+/**
+ * Constant variables in Xinfra Monitor repo.
+ */
+public class XinfraMonitorConstants {
+
+  public XinfraMonitorConstants() {
+    
+  }
+
+  public static final String TAGS_NAME = "name";
+
+  public static final String FALSE = "false";
+
+  public static final String XINFRA_MONITOR_PREFIX = "xinfra-monitor-";
+
+  public static final String TOPIC_MANIPULATION_SERVICE_TOPIC =
+      "xinfra-monitor-cluster-topic-manipulation-service-topic-";
+
+  public static final String KAFKA_LOG_DIRECTORY = "/tmp/kafka-logs";
+
+  public static final int TOPIC_MANIPULATION_TOPIC_NUM_PARTITIONS = 3;
+
+  static final String FACTORY = "Factory";
+
+  static final String CLASS_NAME_CONFIG = "class.name";
+
+  public static final String METRIC_GROUP_NAME = "kafka-monitor";
+
+  public static final String JMX_PREFIX = "kmf";
+
+  public static final String METRIC_GROUP_NAME_PRODUCE_SERVICE = "produce-service";
+
+}
diff --git a/src/main/java/com/linkedin/kmf/apps/App.java b/src/main/java/com/linkedin/xinfra/monitor/apps/App.java
similarity index 78%
rename from src/main/java/com/linkedin/kmf/apps/App.java
rename to src/main/java/com/linkedin/xinfra/monitor/apps/App.java
index c1ac8ee0..cdc44be0 100644
--- a/src/main/java/com/linkedin/kmf/apps/App.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/apps/App.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,11 +7,12 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.apps;
+
+package com.linkedin.xinfra.monitor.apps;
 
 public interface App {
 
-  void start();
+  void start() throws Exception;
 
   void stop();
 
diff --git a/src/main/java/com/linkedin/kmf/apps/MultiClusterMonitor.java b/src/main/java/com/linkedin/xinfra/monitor/apps/MultiClusterMonitor.java
similarity index 65%
rename from src/main/java/com/linkedin/kmf/apps/MultiClusterMonitor.java
rename to src/main/java/com/linkedin/xinfra/monitor/apps/MultiClusterMonitor.java
index 0ae0332c..f4aa8c1e 100644
--- a/src/main/java/com/linkedin/kmf/apps/MultiClusterMonitor.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/apps/MultiClusterMonitor.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,14 +7,18 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.apps;
 
-import com.linkedin.kmf.apps.configs.MultiClusterMonitorConfig;
-import com.linkedin.kmf.services.ConsumeService;
-import com.linkedin.kmf.services.MultiClusterTopicManagementService;
-import com.linkedin.kmf.services.ProduceService;
+package com.linkedin.xinfra.monitor.apps;
+
+import com.linkedin.xinfra.monitor.apps.configs.MultiClusterMonitorConfig;
+import com.linkedin.xinfra.monitor.services.ConsumeService;
+import com.linkedin.xinfra.monitor.services.ConsumerFactoryImpl;
+import com.linkedin.xinfra.monitor.services.MultiClusterTopicManagementService;
+import com.linkedin.xinfra.monitor.services.ProduceService;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.TimeUnit;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -26,10 +30,11 @@
  * across Kafka clusters and make sure they have the same number of partitions.
  */
 
+@SuppressWarnings("rawtypes")
 public class MultiClusterMonitor implements App {
   private static final Logger LOG = LoggerFactory.getLogger(MultiClusterMonitor.class);
 
-  private final MultiClusterTopicManagementService _topicManagementService;
+  private final MultiClusterTopicManagementService _multiClusterTopicManagementService;
   private final ProduceService _produceService;
   private final ConsumeService _consumeService;
   private final String _name;
@@ -37,9 +42,11 @@ public class MultiClusterMonitor implements App {
   public MultiClusterMonitor(Map<String, Object> props, String name) throws Exception {
     _name = name;
     MultiClusterMonitorConfig config = new MultiClusterMonitorConfig(props);
-    _topicManagementService = new MultiClusterTopicManagementService(createMultiClusterTopicManagementServiceProps(props, config), name);
+    _multiClusterTopicManagementService = new MultiClusterTopicManagementService(createMultiClusterTopicManagementServiceProps(props, config), name);
+    CompletableFuture<Void> topicPartitionReady = _multiClusterTopicManagementService.topicPartitionResult();
     _produceService = new ProduceService(createProduceServiceProps(props, config), name);
-    _consumeService = new ConsumeService(createConsumeServiceProps(props, config), name);
+    ConsumerFactoryImpl consumerFactory = new ConsumerFactoryImpl(createConsumeServiceProps(props, config));
+    _consumeService = new ConsumeService(name, topicPartitionReady, consumerFactory);
   }
 
   @SuppressWarnings("unchecked")
@@ -60,7 +67,6 @@ private Map<String, Object> createConsumeServiceProps(Map<String, Object> props,
     return serviceProps;
   }
 
-  @SuppressWarnings("unchecked")
   private Map<String, Object> createMultiClusterTopicManagementServiceProps(Map<String, Object> props, MultiClusterMonitorConfig config) {
     Map<String, Object> serviceProps = new HashMap<>();
     serviceProps.put(MultiClusterMonitorConfig.TOPIC_MANAGEMENT_SERVICE_CONFIG, props.get(MultiClusterMonitorConfig.TOPIC_MANAGEMENT_SERVICE_CONFIG));
@@ -70,15 +76,18 @@ private Map<String, Object> createMultiClusterTopicManagementServiceProps(Map<St
 
   @Override
   public void start() {
-    _topicManagementService.start();
-    _produceService.start();
-    _consumeService.start();
-    LOG.info(_name + "/MultiClusterMonitor started");
+    _multiClusterTopicManagementService.start();
+    CompletableFuture<Void> topicPartitionResult = _multiClusterTopicManagementService.topicPartitionResult();
+    topicPartitionResult.thenRun(() -> {
+      _produceService.start();
+      _consumeService.start();
+    });
+    LOG.info(_name + "/MultiClusterMonitor started.");
   }
 
   @Override
   public void stop() {
-    _topicManagementService.stop();
+    _multiClusterTopicManagementService.stop();
     _produceService.stop();
     _consumeService.stop();
     LOG.info(_name + "/MultiClusterMonitor stopped");
@@ -86,13 +95,13 @@ public void stop() {
 
   @Override
   public boolean isRunning() {
-    return _topicManagementService.isRunning() && _produceService.isRunning() && _consumeService.isRunning();
+    return _multiClusterTopicManagementService.isRunning() && _produceService.isRunning() && _consumeService.isRunning();
   }
 
   @Override
   public void awaitShutdown() {
-    _topicManagementService.awaitShutdown();
-    _produceService.awaitShutdown();
-    _consumeService.awaitShutdown();
+    _multiClusterTopicManagementService.awaitShutdown(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
+    _produceService.awaitShutdown(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
+    _consumeService.awaitShutdown(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
   }
 }
diff --git a/src/main/java/com/linkedin/kmf/apps/SingleClusterMonitor.java b/src/main/java/com/linkedin/xinfra/monitor/apps/SingleClusterMonitor.java
similarity index 51%
rename from src/main/java/com/linkedin/kmf/apps/SingleClusterMonitor.java
rename to src/main/java/com/linkedin/xinfra/monitor/apps/SingleClusterMonitor.java
index d118930f..a44b0827 100644
--- a/src/main/java/com/linkedin/kmf/apps/SingleClusterMonitor.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/apps/SingleClusterMonitor.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,32 +7,39 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.apps;
-
-import com.linkedin.kmf.services.TopicManagementService;
-import com.linkedin.kmf.services.configs.ConsumeServiceConfig;
-import com.linkedin.kmf.services.configs.DefaultMetricsReporterServiceConfig;
-import com.linkedin.kmf.services.configs.MultiClusterTopicManagementServiceConfig;
-import com.linkedin.kmf.services.configs.ProduceServiceConfig;
-import com.linkedin.kmf.services.ConsumeService;
-import com.linkedin.kmf.services.JettyService;
-import com.linkedin.kmf.services.JolokiaService;
-import com.linkedin.kmf.services.DefaultMetricsReporterService;
-import com.linkedin.kmf.services.ProduceService;
-import com.linkedin.kmf.services.configs.TopicManagementServiceConfig;
+
+package com.linkedin.xinfra.monitor.apps;
+
+import com.linkedin.xinfra.monitor.services.ConsumeService;
+import com.linkedin.xinfra.monitor.services.ConsumerFactory;
+import com.linkedin.xinfra.monitor.services.ConsumerFactoryImpl;
+import com.linkedin.xinfra.monitor.services.DefaultMetricsReporterService;
+import com.linkedin.xinfra.monitor.services.JolokiaService;
+import com.linkedin.xinfra.monitor.services.ProduceService;
+import com.linkedin.xinfra.monitor.services.Service;
+import com.linkedin.xinfra.monitor.services.TopicManagementService;
+import com.linkedin.xinfra.monitor.services.configs.ConsumeServiceConfig;
+import com.linkedin.xinfra.monitor.services.configs.DefaultMetricsReporterServiceConfig;
+import com.linkedin.xinfra.monitor.services.configs.MultiClusterTopicManagementServiceConfig;
+import com.linkedin.xinfra.monitor.services.configs.ProduceServiceConfig;
+import com.linkedin.xinfra.monitor.services.configs.TopicManagementServiceConfig;
+import com.linkedin.xinfra.monitor.services.metrics.ClusterTopicManipulationMetrics;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
 import net.sourceforge.argparse4j.ArgumentParsers;
 import net.sourceforge.argparse4j.inf.ArgumentParser;
 import net.sourceforge.argparse4j.inf.Namespace;
+import org.apache.kafka.common.utils.Utils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.apache.kafka.common.utils.Utils;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 
-import static net.sourceforge.argparse4j.impl.Arguments.store;
+import static com.linkedin.xinfra.monitor.common.Utils.prettyPrint;
 
 /*
  * The SingleClusterMonitor app is intended to monitor the performance and availability of a given Kafka cluster. It creates
@@ -46,44 +53,109 @@
 public class SingleClusterMonitor implements App {
   private static final Logger LOG = LoggerFactory.getLogger(SingleClusterMonitor.class);
 
+  private static final int SERVICES_INITIAL_CAPACITY = 4;
   private final TopicManagementService _topicManagementService;
-  private final ProduceService _produceService;
-  private final ConsumeService _consumeService;
-  private final String _name;
-
-  public SingleClusterMonitor(Map<String, Object> props, String name) throws Exception {
-    _name = name;
-    _topicManagementService = new TopicManagementService(props, name);
-    _produceService = new ProduceService(props, name);
-    _consumeService = new ConsumeService(props, name);
+  private final String _clusterName;
+  private final List<Service> _allServices;
+  private final boolean _isTopicManagementServiceEnabled;
+
+  public SingleClusterMonitor(Map<String, Object> props, String clusterName) throws Exception {
+    ConsumerFactory consumerFactory = new ConsumerFactoryImpl(props);
+    _clusterName = clusterName;
+    LOG.info("SingleClusterMonitor properties: {}", prettyPrint(props));
+    TopicManagementServiceConfig config = new TopicManagementServiceConfig(props);
+    _isTopicManagementServiceEnabled =
+        config.getBoolean(TopicManagementServiceConfig.TOPIC_MANAGEMENT_ENABLED_CONFIG);
+    _allServices = new ArrayList<>(SERVICES_INITIAL_CAPACITY);
+    CompletableFuture<Void> topicPartitionResult;
+    if (_isTopicManagementServiceEnabled) {
+      String topicManagementServiceName = String.format("Topic-management-service-for-%s", clusterName);
+      _topicManagementService = new TopicManagementService(props, topicManagementServiceName);
+      topicPartitionResult = _topicManagementService.topicPartitionResult();
+
+      // block on the MultiClusterTopicManagementService to complete.
+      topicPartitionResult.get();
+
+      _allServices.add(_topicManagementService);
+    } else {
+      _topicManagementService = null;
+      topicPartitionResult = new CompletableFuture<>();
+      topicPartitionResult.complete(null);
+    }
+    ProduceService produceService = new ProduceService(props, clusterName);
+    ConsumeService consumeService = new ConsumeService(clusterName, topicPartitionResult, consumerFactory);
+    _allServices.add(produceService);
+    _allServices.add(consumeService);
   }
 
   @Override
-  public void start() {
-    _topicManagementService.start();
-    _produceService.start();
-    _consumeService.start();
-    LOG.info(_name + "/SingleClusterMonitor started");
+  public void start() throws Exception {
+    if (_isTopicManagementServiceEnabled) {
+      _topicManagementService.start();
+      CompletableFuture<Void> topicPartitionResult = _topicManagementService.topicPartitionResult();
+
+      try {
+      /* Delay 2 second to reduce the chance that produce and consumer thread has race condition
+      with TopicManagementService and MultiClusterTopicManagementService */
+        long threadSleepMs = TimeUnit.SECONDS.toMillis(2);
+        Thread.sleep(threadSleepMs);
+      } catch (InterruptedException e) {
+        throw new Exception("Interrupted while sleeping the thread", e);
+      }
+      CompletableFuture<Void> topicPartitionFuture = topicPartitionResult.thenRun(() -> {
+        for (Service service : _allServices) {
+          if (!service.isRunning()) {
+            LOG.debug("Now starting {}", service.getServiceName());
+            service.start();
+          }
+        }
+      });
+
+      try {
+        topicPartitionFuture.get();
+      } catch (InterruptedException | ExecutionException e) {
+        throw new Exception("Exception occurred while getting the TopicPartitionFuture", e);
+      }
+
+    } else {
+      for (Service service : _allServices) {
+        if (!service.isRunning()) {
+          LOG.debug("Now starting {}", service.getServiceName());
+          service.start();
+        }
+      }
+    }
+
+    LOG.info(_clusterName + "/SingleClusterMonitor started!");
   }
 
   @Override
   public void stop() {
-    _topicManagementService.stop();
-    _produceService.stop();
-    _consumeService.stop();
-    LOG.info(_name + "/SingleClusterMonitor stopped");
+    for (Service service : _allServices) {
+      service.stop();
+    }
+    LOG.info(_clusterName + "/SingleClusterMonitor stopped.");
   }
 
   @Override
   public boolean isRunning() {
-    return _topicManagementService.isRunning() && _produceService.isRunning() && _consumeService.isRunning();
+    boolean isRunning = true;
+
+    for (Service service : _allServices) {
+      if (!service.isRunning()) {
+        isRunning = false;
+        LOG.info("{} is not running.", service.getServiceName());
+      }
+    }
+
+    return isRunning;
   }
 
   @Override
   public void awaitShutdown() {
-    _topicManagementService.awaitShutdown();
-    _produceService.awaitShutdown();
-    _consumeService.awaitShutdown();
+    for (Service service : _allServices) {
+      service.awaitShutdown(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
+    }
   }
 
   /** Get the command-line argument parser. */
@@ -94,7 +166,7 @@ private static ArgumentParser argParser() {
       .description("");
 
     parser.addArgument("--topic")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(String.class)
       .metavar("TOPIC")
@@ -102,14 +174,14 @@ private static ArgumentParser argParser() {
       .help("Produce messages to this topic and consume message from this topic");
 
     parser.addArgument("--producer-id")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(String.class)
       .dest("producerId")
       .help("The producerId will be used by producer client and encoded in the messages to the topic");
 
     parser.addArgument("--broker-list")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(true)
       .type(String.class)
       .metavar("HOST1:PORT1[,HOST2:PORT2[...]]")
@@ -117,7 +189,7 @@ private static ArgumentParser argParser() {
       .help("Comma-separated list of Kafka brokers in the form HOST1:PORT1,HOST2:PORT2,...");
 
     parser.addArgument("--zookeeper")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(true)
       .type(String.class)
       .metavar("HOST:PORT")
@@ -125,7 +197,7 @@ private static ArgumentParser argParser() {
       .help("The connection string for the zookeeper connection in the form host:port");
 
     parser.addArgument("--record-size")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(String.class)
       .metavar("RECORD_SIZE")
@@ -133,7 +205,7 @@ private static ArgumentParser argParser() {
       .help("The size of each record.");
 
     parser.addArgument("--producer-class")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(String.class)
       .metavar("PRODUCER_CLASS_NAME")
@@ -141,7 +213,7 @@ private static ArgumentParser argParser() {
       .help("Specify the class of producer. Available choices include newProducer or class name");
 
     parser.addArgument("--consumer-class")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(String.class)
       .metavar("CONSUMER_CLASS_NAME")
@@ -149,7 +221,7 @@ private static ArgumentParser argParser() {
       .help("Specify the class of consumer. Available choices include oldConsumer, newConsumer, or class name");
 
     parser.addArgument("--producer.config")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(String.class)
       .metavar("PRODUCER_CONFIG")
@@ -157,7 +229,7 @@ private static ArgumentParser argParser() {
       .help("Producer config properties file.");
 
     parser.addArgument("--consumer.config")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(String.class)
       .metavar("CONSUMER_CONFIG")
@@ -165,7 +237,7 @@ private static ArgumentParser argParser() {
       .help("Consumer config properties file.");
 
     parser.addArgument("--report-interval-sec")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(String.class)
       .metavar("REPORT_INTERVAL_SEC")
@@ -173,7 +245,7 @@ private static ArgumentParser argParser() {
       .help("Interval in sec with which to export stats");
 
     parser.addArgument("--record-delay-ms")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(String.class)
       .metavar("RECORD_DELAY_MS")
@@ -181,7 +253,7 @@ private static ArgumentParser argParser() {
       .help("The delay in ms before sending next record to the same partition");
 
     parser.addArgument("--latency-percentile-max-ms")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(String.class)
       .metavar("LATENCY_PERCENTILE_MAX_MS")
@@ -190,7 +262,7 @@ private static ArgumentParser argParser() {
             "The percentile will be reported as Double.POSITIVE_INFINITY if its value exceeds the max value.");
 
     parser.addArgument("--latency-percentile-granularity-ms")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(String.class)
       .metavar("LATENCY_PERCENTILE_GRANULARITY_MS")
@@ -198,15 +270,31 @@ private static ArgumentParser argParser() {
       .help("The granularity in ms of latency percentile metric. This is the width of the bucket used in percentile calculation.");
 
     parser.addArgument("--topic-creation-enabled")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(Boolean.class)
       .metavar("AUTO_TOPIC_CREATION_ENABLED")
       .dest("autoTopicCreationEnabled")
       .help(TopicManagementServiceConfig.TOPIC_CREATION_ENABLED_DOC);
 
+    parser.addArgument("--topic-add-partition-enabled")
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
+      .required(false)
+      .type(Boolean.class)
+      .metavar("TOPIC_ADD_PARTITION_ENABLED")
+      .dest("topicAddPartitionEnabled")
+      .help(TopicManagementServiceConfig.TOPIC_ADD_PARTITION_ENABLED_DOC);
+
+    parser.addArgument("--topic-reassign-partition-and-elect-leader-enabled")
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
+      .required(false)
+      .type(Boolean.class)
+      .metavar("TOPIC_REASSIGN_PARTITION_AND_ELECT_LEADER_ENABLED")
+      .dest("topicReassignPartitionAndElectLeaderEnabled")
+      .help(TopicManagementServiceConfig.TOPIC_REASSIGN_PARTITION_AND_ELECT_LEADER_ENABLED_DOC);
+
     parser.addArgument("--replication-factor")
-        .action(store())
+        .action(net.sourceforge.argparse4j.impl.Arguments.store())
         .required(false)
         .type(Integer.class)
         .metavar("REPLICATION_FACTOR")
@@ -214,13 +302,21 @@ private static ArgumentParser argParser() {
         .help(TopicManagementServiceConfig.TOPIC_REPLICATION_FACTOR_DOC);
 
     parser.addArgument("--topic-rebalance-interval-ms")
-      .action(store())
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
       .required(false)
       .type(Integer.class)
       .metavar("REBALANCE_MS")
       .dest("rebalanceMs")
       .help(MultiClusterTopicManagementServiceConfig.REBALANCE_INTERVAL_MS_DOC);
 
+    parser.addArgument("--topic-preferred-leader-election-interval-ms")
+      .action(net.sourceforge.argparse4j.impl.Arguments.store())
+      .required(false)
+      .type(Integer.class)
+      .metavar("PREFERED_LEADER_ELECTION_INTERVAL_MS")
+      .dest("preferredLeaderElectionIntervalMs")
+      .help(MultiClusterTopicManagementServiceConfig.PREFERRED_LEADER_ELECTION_CHECK_INTERVAL_MS_DOC);
+
     return parser;
   }
 
@@ -232,9 +328,7 @@ public static void main(String[] args) throws Exception {
     }
 
     Namespace res = parser.parseArgs(args);
-
     Map<String, Object> props = new HashMap<>();
-
     // produce service config
     props.put(ProduceServiceConfig.ZOOKEEPER_CONNECT_CONFIG, res.getString("zkConnect"));
     props.put(ProduceServiceConfig.BOOTSTRAP_SERVERS_CONFIG, res.getString("brokerList"));
@@ -266,11 +360,16 @@ public static void main(String[] args) throws Exception {
     // topic management service config
     if (res.getBoolean("autoTopicCreationEnabled") != null)
       props.put(TopicManagementServiceConfig.TOPIC_CREATION_ENABLED_CONFIG, res.getBoolean("autoTopicCreationEnabled"));
+    if (res.getBoolean("topicAddPartitionEnabled") != null)
+      props.put(TopicManagementServiceConfig.TOPIC_ADD_PARTITION_ENABLED_CONFIG, res.getBoolean("topicAddPartitionEnabled"));
+    if (res.getBoolean("topicReassignPartitionAndElectLeaderEnabled") != null)
+      props.put(TopicManagementServiceConfig.TOPIC_REASSIGN_PARTITION_AND_ELECT_LEADER_ENABLED_CONFIG, res.getBoolean("topicReassignPartitionAndElectLeaderEnabled"));
     if (res.getInt("replicationFactor") != null)
       props.put(TopicManagementServiceConfig.TOPIC_REPLICATION_FACTOR_CONFIG, res.getInt("replicationFactor"));
     if (res.getInt("rebalanceMs") != null)
       props.put(MultiClusterTopicManagementServiceConfig.REBALANCE_INTERVAL_MS_CONFIG, res.getInt("rebalanceMs"));
-
+    if (res.getLong("preferredLeaderElectionIntervalMs") != null)
+      props.put(MultiClusterTopicManagementServiceConfig.PREFERRED_LEADER_ELECTION_CHECK_INTERVAL_MS_CONFIG, res.getLong("preferredLeaderElectionIntervalMs"));
     SingleClusterMonitor app = new SingleClusterMonitor(props, "single-cluster-monitor");
     app.start();
 
@@ -279,31 +378,49 @@ public static void main(String[] args) throws Exception {
     if (res.getString("reportIntervalSec") != null)
       props.put(DefaultMetricsReporterServiceConfig.REPORT_INTERVAL_SEC_CONFIG, res.getString("reportIntervalSec"));
     List<String> metrics = Arrays.asList(
+      "kmf.services:type=consume-service,name=*:topic-partitions-count",
       "kmf.services:type=produce-service,name=*:produce-availability-avg",
       "kmf.services:type=consume-service,name=*:consume-availability-avg",
       "kmf.services:type=produce-service,name=*:records-produced-total",
       "kmf.services:type=consume-service,name=*:records-consumed-total",
       "kmf.services:type=consume-service,name=*:records-lost-total",
+      "kmf.services:type=consume-service,name=*:records-lost-rate",
       "kmf.services:type=consume-service,name=*:records-duplicated-total",
       "kmf.services:type=consume-service,name=*:records-delay-ms-avg",
       "kmf.services:type=produce-service,name=*:records-produced-rate",
       "kmf.services:type=produce-service,name=*:produce-error-rate",
-      "kmf.services:type=consume-service,name=*:consume-error-rate");
+      "kmf.services:type=consume-service,name=*:consume-error-rate",
+      "kmf.services:type=commit-availability-service,name=*:offsets-committed-total",
+      "kmf.services:type=commit-availability-service,name=*:offsets-committed-avg",
+      "kmf.services:type=commit-availability-service,name=*:failed-commit-offsets-total",
+      "kmf.services:type=commit-availability-service,name=*:failed-commit-offsets-avg",
+      "kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-avg",
+      "kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-max",
+      "kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-99th",
+      "kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-999th",
+      "kmf.services:type=commit-latency-service,name=*:commit-offset-latency-ms-9999th",
+      "kmf.services:type=offset-commit-service,name=*:offset-commit-availability-avg",
+      "kmf.services:type=offset-commit-service,name=*:offset-commit-service-success-rate",
+      "kmf.services:type=offset-commit-service,name=*:offset-commit-service-success-total",
+      "kmf.services:type=offset-commit-service,name=*:offset-commit-service-failure-rate",
+      "kmf.services:type=offset-commit-service,name=*:offset-commit-service-failure-total",
+
+      "kmf.services:type=" + ClusterTopicManipulationMetrics.METRIC_GROUP_NAME
+          + ",name=*:topic-creation-metadata-propagation-ms-avg",
+      "kmf.services:type=" + ClusterTopicManipulationMetrics.METRIC_GROUP_NAME
+          + ",name=*:topic-creation-metadata-propagation-ms-max",
+      "kmf.services:type=" + ClusterTopicManipulationMetrics.METRIC_GROUP_NAME
+          + ",name=*:topic-deletion-metadata-propagation-ms-avg",
+      "kmf.services:type=" + ClusterTopicManipulationMetrics.METRIC_GROUP_NAME
+          + ",name=*:topic-deletion-metadata-propagation-ms-max"
+    );
+
     props.put(DefaultMetricsReporterServiceConfig.REPORT_METRICS_CONFIG, metrics);
 
     DefaultMetricsReporterService metricsReporterService = new DefaultMetricsReporterService(props, "end-to-end");
     metricsReporterService.start();
 
-    JolokiaService jolokiaService = new JolokiaService(new HashMap<String, Object>(), "end-to-end");
+    JolokiaService jolokiaService = new JolokiaService(new HashMap<>(), "end-to-end");
     jolokiaService.start();
-
-    JettyService jettyService = new JettyService(new HashMap<String, Object>(), "end-to-end");
-    jettyService.start();
-
-    if (!app.isRunning()) {
-      LOG.error("Some services have stopped");
-      System.exit(-1);
-    }
-    app.awaitShutdown();
   }
 }
diff --git a/src/main/java/com/linkedin/kmf/apps/configs/MultiClusterMonitorConfig.java b/src/main/java/com/linkedin/xinfra/monitor/apps/configs/MultiClusterMonitorConfig.java
similarity index 86%
rename from src/main/java/com/linkedin/kmf/apps/configs/MultiClusterMonitorConfig.java
rename to src/main/java/com/linkedin/xinfra/monitor/apps/configs/MultiClusterMonitorConfig.java
index 9fe128c7..6abefd48 100644
--- a/src/main/java/com/linkedin/kmf/apps/configs/MultiClusterMonitorConfig.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/apps/configs/MultiClusterMonitorConfig.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -8,10 +8,10 @@
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
 
-package com.linkedin.kmf.apps.configs;
+package com.linkedin.xinfra.monitor.apps.configs;
 
-import com.linkedin.kmf.services.configs.CommonServiceConfig;
-import com.linkedin.kmf.services.configs.MultiClusterTopicManagementServiceConfig;
+import com.linkedin.xinfra.monitor.services.configs.CommonServiceConfig;
+import com.linkedin.xinfra.monitor.services.configs.MultiClusterTopicManagementServiceConfig;
 import java.util.Map;
 import org.apache.kafka.common.config.AbstractConfig;
 import org.apache.kafka.common.config.ConfigDef;
@@ -48,4 +48,5 @@ public Double getDouble(String key) {
   public MultiClusterMonitorConfig(Map<?, ?> props) {
     super(CONFIG, props);
   }
+
 }
diff --git a/src/main/java/com/linkedin/xinfra/monitor/common/ConfigDocumentationGenerator.java b/src/main/java/com/linkedin/xinfra/monitor/common/ConfigDocumentationGenerator.java
new file mode 100644
index 00000000..a82afd9b
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/common/ConfigDocumentationGenerator.java
@@ -0,0 +1,69 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.common;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.Writer;
+import java.lang.reflect.Field;
+import org.apache.kafka.common.config.AbstractConfig;
+import org.apache.kafka.common.config.ConfigDef;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Generates the table of configuration parameters, their documentation strings and default values.
+ */
+public class ConfigDocumentationGenerator {
+  private static final Logger LOG = LoggerFactory.getLogger(ConfigDocumentationGenerator.class);
+
+  private static void printHelp() {
+    System.out.println("ConfigDocumentationGenerator outputDirectory configClassNames...");
+  }
+
+  private static void printHtmlHeader(Writer out, String docClass) throws IOException {
+    out.write("<html><head><title>Kafka Monitoring Automatically Generated Documentation. </title></head><body>\n");
+    out.write("<h1>");
+    out.write(docClass);
+    out.write("</h1>\n");
+  }
+  private static void printHtmlFooter(Writer out) throws IOException {
+    out.write("</body>\n</html>\n");
+  }
+
+  public static void main(String[] argv) throws Exception {
+    if (argv.length < 2) {
+      printHelp();
+      System.exit(1);
+    }
+
+    File outputDir = new File(argv[0]);
+    if (!outputDir.exists()) {
+      outputDir.mkdirs();
+    }
+
+    for (int i = 1; i < argv.length; i++) {
+      Class<? extends AbstractConfig> configClass = (Class<? extends AbstractConfig>) Class.forName(argv[i]);
+      Field configDefField = configClass.getDeclaredField("CONFIG");
+      configDefField.setAccessible(true);
+      ConfigDef configDef = (ConfigDef) configDefField.get(null);
+      String docClass = configClass.getSimpleName();
+      File outputFile = new File(outputDir, docClass + ".html");
+      try (FileWriter fout = new FileWriter(outputFile)) {
+        printHtmlHeader(fout, docClass);
+        fout.write(configDef.toHtmlTable());
+        printHtmlFooter(fout);
+      }
+    }
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/common/ConsumerGroupCoordinatorUtils.java b/src/main/java/com/linkedin/xinfra/monitor/common/ConsumerGroupCoordinatorUtils.java
new file mode 100644
index 00000000..897b4a70
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/common/ConsumerGroupCoordinatorUtils.java
@@ -0,0 +1,85 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.common;
+
+import com.linkedin.xinfra.monitor.consumer.NewConsumer;
+import java.util.Collections;
+import java.util.concurrent.ExecutionException;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.common.internals.Topic;
+import org.apache.kafka.common.utils.Utils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class ConsumerGroupCoordinatorUtils {
+  private static final Logger LOGGER = LoggerFactory.getLogger(NewConsumer.class);
+  private static final String CONSUMER_GROUP_PREFIX_CANDIDATE = "__shadow_consumer_group-";
+
+  /**
+   * https://github.com/apache/kafka/blob/trunk/core/src/main/scala/kafka/coordinator/group/GroupMetadataManager.scala#L189
+   * The consumer group string's hash code is used for this modulo operation.
+   * @param groupId kafka consumer group ID
+   * @param consumerOffsetsTopicPartitions number of partitions in the __consumer_offsets topic.
+   * @return hashed integer which represents a number, the Kafka's Utils.abs() value of which is the broker
+   * ID of the group coordinator, or the leader of the offsets topic partition.
+   */
+  public static int partitionFor(String groupId, int consumerOffsetsTopicPartitions) {
+
+    LOGGER.debug("Hashed and modulo output: {}", groupId.hashCode());
+    return Utils.abs(groupId.hashCode()) % consumerOffsetsTopicPartitions;
+  }
+
+  /**
+   * Instead of making targetGroupId an instance variable and then assigning it some value which this then looks up
+   * it can just be a parameter to a method
+   * hash(group.id) % (number of __consumer_offsets topic partitions).
+   * The partition's leader is the group coordinator
+   * Choose B s.t hash(A) % (number of __consumer_offsets topic partitions) == hash(B) % (number of __consumer_offsets topic partitions)
+   * @param targetGroupId the identifier of the target consumer group
+   * @param adminClient an Admin Client object
+   */
+  public static String findCollision(String targetGroupId, AdminClient adminClient)
+      throws ExecutionException, InterruptedException {
+    if (targetGroupId.equals("")) {
+      throw new IllegalArgumentException("The target consumer group identifier cannot be empty: " + targetGroupId);
+    }
+
+    int numOffsetsTopicPartitions = adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME))
+        .values()
+        .get(Topic.GROUP_METADATA_TOPIC_NAME)
+        .get()
+        .partitions()
+        .size();
+
+    // Extract invariant from loop
+    int targetConsumerOffsetsPartition = partitionFor(targetGroupId, numOffsetsTopicPartitions);
+
+    //  This doesn't need to be an instance variable because we throw this out this value at the end of computation
+    int groupSuffix = 0;
+
+    // Extract return value so it's not computed twice, this reduces the possibility of bugs
+    String newConsumerGroup;
+
+    // Use while(true) otherwise halting condition is hard to read.
+    while (true) {
+      // TODO: could play fancy StringBuilder games here to make this generate less garbage
+      newConsumerGroup = CONSUMER_GROUP_PREFIX_CANDIDATE + groupSuffix++;
+      int newGroupNamePartition = ConsumerGroupCoordinatorUtils.partitionFor(newConsumerGroup, numOffsetsTopicPartitions);
+      if (newGroupNamePartition == targetConsumerOffsetsPartition) {
+        break;
+      }
+    }
+
+    return newConsumerGroup;
+  }
+}
+
diff --git a/src/main/java/com/linkedin/kmf/common/DefaultTopicSchema.java b/src/main/java/com/linkedin/xinfra/monitor/common/DefaultTopicSchema.java
similarity index 70%
rename from src/main/java/com/linkedin/kmf/common/DefaultTopicSchema.java
rename to src/main/java/com/linkedin/xinfra/monitor/common/DefaultTopicSchema.java
index e248807a..cb1dc34c 100644
--- a/src/main/java/com/linkedin/kmf/common/DefaultTopicSchema.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/common/DefaultTopicSchema.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,7 +7,8 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.common;
+
+package com.linkedin.xinfra.monitor.common;
 
 import java.util.Arrays;
 import org.apache.avro.Schema;
@@ -15,17 +16,17 @@
 
 public class DefaultTopicSchema {
 
-  public static final Field TOPIC_FIELD = new Field("topic", Schema.create(Schema.Type.STRING), null, null);
+  static final Field TOPIC_FIELD = new Field("topic", Schema.create(Schema.Type.STRING), null, null);
 
   public static final Field TIME_FIELD = new Field("time", Schema.create(Schema.Type.LONG), null, null);
 
   public static final Field INDEX_FIELD = new Field("index", Schema.create(Schema.Type.LONG), null, null);
 
-  public static final Field PRODUCER_ID_FIELD = new Field("producerId", Schema.create(Schema.Type.STRING), null, null);
+  static final Field PRODUCER_ID_FIELD = new Field("producerId", Schema.create(Schema.Type.STRING), null, null);
 
-  public static final Field CONTENT_FIELD = new Field("content", Schema.create(Schema.Type.STRING), null, null);
+  static final Field CONTENT_FIELD = new Field("content", Schema.create(Schema.Type.STRING), null, null);
 
-  public static final Schema MESSAGE_V0;
+  static final Schema MESSAGE_V0;
 
   static {
     MESSAGE_V0 = Schema.createRecord("KafkaMonitorSchema", null, "kafka.monitor", false);
diff --git a/src/main/java/com/linkedin/kmf/common/MbeanAttributeValue.java b/src/main/java/com/linkedin/xinfra/monitor/common/MbeanAttributeValue.java
similarity index 89%
rename from src/main/java/com/linkedin/kmf/common/MbeanAttributeValue.java
rename to src/main/java/com/linkedin/xinfra/monitor/common/MbeanAttributeValue.java
index 3dff36cf..35c6f511 100644
--- a/src/main/java/com/linkedin/kmf/common/MbeanAttributeValue.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/common/MbeanAttributeValue.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,7 +7,8 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.common;
+
+package com.linkedin.xinfra.monitor.common;
 
 public class MbeanAttributeValue {
   private final String _mbean;
diff --git a/src/main/java/com/linkedin/xinfra/monitor/common/Utils.java b/src/main/java/com/linkedin/xinfra/monitor/common/Utils.java
new file mode 100644
index 00000000..d920437d
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/common/Utils.java
@@ -0,0 +1,273 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import com.linkedin.avroutil1.compatibility.AvroCodecUtil;
+import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper;
+import com.linkedin.avroutil1.compatibility.AvroVersion;
+import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import javax.management.MBeanAttributeInfo;
+import javax.management.MBeanInfo;
+import javax.management.MBeanServer;
+import javax.management.ObjectName;
+import kafka.admin.BrokerMetadata;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.Decoder;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.admin.CreateTopicsResult;
+import org.apache.kafka.clients.admin.ListPartitionReassignmentsResult;
+import org.apache.kafka.clients.admin.NewTopic;
+import org.apache.kafka.clients.admin.PartitionReassignment;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.errors.TopicExistsException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Xinfra Monitor utilities.
+ */
+public class Utils {
+  private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
+  public static final int ZK_CONNECTION_TIMEOUT_MS = 30_000;
+  public static final int ZK_SESSION_TIMEOUT_MS = 30_000;
+  private static final long LIST_PARTITION_REASSIGNMENTS_TIMEOUT_MS = 60000L;
+  private static final int LIST_PARTITION_REASSIGNMENTS_MAX_ATTEMPTS = 3;
+  private static final String LIST_PARTITION_REASSIGNMENTS_TIMEOUT_MS_CONFIG = "list.partition.reassignment.timeout.ms";
+  private static final int DEFAULT_RETRY_BACKOFF_BASE = 2;
+
+  public static String prettyPrint(Object value) throws JsonProcessingException {
+    ObjectMapper objectMapper = new ObjectMapper();
+    ObjectWriter objectWriter = objectMapper.writerWithDefaultPrettyPrinter();
+    String written = objectWriter.writeValueAsString(value);
+    LOG.trace("pretty printed: {}", written);
+
+    return written;
+  }
+
+  /**
+   * Retrieve the map of {@link PartitionReassignment reassignment} by {@link TopicPartition partitions}.
+   *
+   * If the response times out, the method retries up to {@link #LIST_PARTITION_REASSIGNMENTS_MAX_ATTEMPTS} times.
+   * The max time to wait for the {@link AdminClient adminClient} response is computed.
+   *
+   * @param adminClient The {@link AdminClient adminClient} to ask for ongoing partition reassignments
+   * @return The map of {@link PartitionReassignment reassignment} by {@link TopicPartition partitions}
+   */
+  public static Map<TopicPartition, PartitionReassignment> ongoingPartitionReassignments(AdminClient adminClient)
+      throws InterruptedException, ExecutionException, TimeoutException {
+    Map<TopicPartition, PartitionReassignment> partitionReassignments = null;
+    int attempts = 0;
+    long timeoutMs = LIST_PARTITION_REASSIGNMENTS_TIMEOUT_MS;
+    do {
+      ListPartitionReassignmentsResult responseResult = adminClient.listPartitionReassignments();
+      try {
+        // A successful response is expected to be non-null.
+        partitionReassignments = responseResult.reassignments().get(timeoutMs, TimeUnit.MILLISECONDS);
+      } catch (TimeoutException timeoutException) {
+        LOG.info(
+            "Xinfra Monitor has failed to list partition reassignments in {}ms (attempt={}). "
+                + "Please consider increasing the value of {} config.",
+            timeoutMs, 1 + attempts, LIST_PARTITION_REASSIGNMENTS_TIMEOUT_MS_CONFIG);
+        attempts++;
+        if (attempts == LIST_PARTITION_REASSIGNMENTS_MAX_ATTEMPTS) {
+          throw timeoutException;
+        }
+        timeoutMs *= DEFAULT_RETRY_BACKOFF_BASE;
+      }
+    } while (partitionReassignments == null);
+
+    return partitionReassignments;
+  }
+
+  public static List<Integer> replicaIdentifiers(Set<BrokerMetadata> brokers) {
+    if (brokers == null || brokers.size() == 0) {
+      throw new IllegalArgumentException("brokers are either null or empty.");
+    }
+
+    List<BrokerMetadata> brokerMetadataList = new ArrayList<>(brokers);
+
+    // Shuffle to get a random order in the replica list
+    Collections.shuffle(brokerMetadataList);
+
+    // Get broker ids for replica list
+    List<Integer> replicaList = brokerMetadataList.stream().map(m -> m.id()).collect(Collectors.toList());
+
+    return replicaList;
+  }
+
+  /**
+   * Read number of partitions for the given topic on the specified ZooKeeper
+   * @param adminClient AdminClient object initialized.
+   * @param topic topic name.
+   * @return the number of partitions of the given topic
+   * @throws ExecutionException thrown when describeTopics(topics) get(topic) execution fails.
+   * @throws InterruptedException thrown when adminClient's describeTopics getTopic is interrupted.
+   */
+  private static int getPartitionNumForTopic(AdminClient adminClient, String topic)
+      throws ExecutionException, InterruptedException {
+    try {
+      return adminClient.describeTopics(Collections.singleton(topic)).values().get(topic).get().partitions().size();
+    } catch (NoSuchElementException e) {
+      return 0;
+    } finally {
+      LOG.info("Finished getPartitionNumForTopic.");
+    }
+  }
+
+  /**
+   * Create the topic. This method attempts to create a topic so that all
+   * the brokers in the cluster will have partitionToBrokerRatio partitions.  If the topic exists, but has different parameters
+   * then this does nothing to update the parameters.
+   *
+   * TODO: Do we care about rack aware mode?  I would think no because we want to spread the topic over all brokers.
+   * @param topic topic name
+   * @param replicationFactor the replication factor for the topic
+   * @param partitionToBrokerRatio This is multiplied by the number brokers to compute the number of partitions in the topic.
+   * @param minPartitionNum partition number to be created at least
+   * @param topicConfig additional parameters for the topic for example min.insync.replicas
+   * @param adminClient AdminClient object initialized.
+   * @return the number of partitions created
+   * @throws ExecutionException exception thrown then executing the topic creation fails.
+   * @throws InterruptedException exception that's thrown when interrupt occurs.
+   */
+  @SuppressWarnings("unchecked")
+  public static int createTopicIfNotExists(String topic, short replicationFactor, double partitionToBrokerRatio,
+      int minPartitionNum, Properties topicConfig, AdminClient adminClient)
+      throws ExecutionException, InterruptedException {
+    try {
+      if (adminClient.listTopics().names().get().contains(topic)) {
+        LOG.info("AdminClient indicates that topic {} already exists in the cluster. Topic config: {}", topic, topicConfig);
+        return getPartitionNumForTopic(adminClient, topic);
+      }
+      int brokerCount = Utils.getBrokerCount(adminClient);
+      int partitionCount = Math.max((int) Math.ceil(brokerCount * partitionToBrokerRatio), minPartitionNum);
+      try {
+        NewTopic newTopic = new NewTopic(topic, partitionCount, replicationFactor);
+        //noinspection rawtypes
+        newTopic.configs((Map) topicConfig);
+
+        List<NewTopic> topics = new ArrayList<>();
+        topics.add(newTopic);
+        CreateTopicsResult result = adminClient.createTopics(topics);
+
+        // waits for this topic creation future to complete, and then returns its result.
+        result.values().get(topic).get();
+        LOG.info("CreateTopicsResult: {}.", result.values());
+      } catch (TopicExistsException e) {
+        /* There is a race condition with the consumer. */
+        LOG.info("Monitoring topic " + topic + " already exists in the cluster.", e);
+        return getPartitionNumForTopic(adminClient, topic);
+      }
+      LOG.info("Created monitoring topic {} in cluster with {} partitions and replication factor of {}.", topic,
+          partitionCount, replicationFactor);
+
+      return partitionCount;
+    } finally {
+      LOG.info("Completed the topic creation if it doesn't exist for {}.", topic);
+    }
+  }
+
+  /**
+   * @return the number of brokers in this cluster
+   */
+  private static int getBrokerCount(AdminClient adminClient) throws ExecutionException, InterruptedException {
+    return adminClient.describeCluster().nodes().get().size();
+  }
+
+  /**
+   * @param timestamp time in Ms when this message is generated
+   * @param topic     topic this message is sent to
+   * @param idx       index is consecutive numbers used by XinfraMonitor to determine duplicate or lost messages
+   * @param msgSize   size of the message
+   * @return string that encodes the above fields
+   */
+  public static String jsonFromFields(String topic, long idx, long timestamp, String producerId, int msgSize) {
+    GenericRecord record = new GenericData.Record(DefaultTopicSchema.MESSAGE_V0);
+    record.put(DefaultTopicSchema.TOPIC_FIELD.name(), topic);
+    record.put(DefaultTopicSchema.INDEX_FIELD.name(), idx);
+    record.put(DefaultTopicSchema.TIME_FIELD.name(), timestamp);
+    record.put(DefaultTopicSchema.PRODUCER_ID_FIELD.name(), producerId);
+    // CONTENT_FIELD is composed of #msgSize number of character 'x', e.g. xxxxxxxxxx
+    record.put(DefaultTopicSchema.CONTENT_FIELD.name(), String.format("%1$-" + msgSize + "s", "").replace(' ', 'x'));
+    return jsonFromGenericRecord(record);
+  }
+
+  /**
+   * @param message kafka message in the string format
+   * @return GenericRecord that is de-serialized from kafka message w.r.t. expected schema.
+   */
+  public static GenericRecord genericRecordFromJson(String message) {
+    try {
+      Decoder jsonDecoder = AvroCompatibilityHelper.newCompatibleJsonDecoder(DefaultTopicSchema.MESSAGE_V0, message);
+      GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(DefaultTopicSchema.MESSAGE_V0, DefaultTopicSchema.MESSAGE_V0);
+      return reader.read(null, jsonDecoder);
+    } catch (Exception e) {
+      throw new IllegalStateException("unable to deserialize " + message, e);
+    }
+  }
+
+  public static String jsonFromGenericRecord(GenericRecord record) {
+    try {
+      return AvroCodecUtil.serializeJson(record, AvroVersion.AVRO_1_4);
+    } catch (IOException e) {
+      throw new IllegalStateException("Unable to serialize avro record due to error: " + record, e);
+    }
+  }
+
+  public static List<MbeanAttributeValue> getMBeanAttributeValues(String mbeanExpr, String attributeExpr) {
+    List<MbeanAttributeValue> values = new ArrayList<>();
+    MBeanServer server = ManagementFactory.getPlatformMBeanServer();
+    try {
+      Set<ObjectName> mbeanNames = server.queryNames(new ObjectName(mbeanExpr), null);
+      for (ObjectName mbeanName : mbeanNames) {
+        MBeanInfo mBeanInfo = server.getMBeanInfo(mbeanName);
+        MBeanAttributeInfo[] attributeInfos = mBeanInfo.getAttributes();
+        for (MBeanAttributeInfo attributeInfo : attributeInfos) {
+          if (attributeInfo.getName().equals(attributeExpr) || attributeExpr.length() == 0 || attributeExpr.equals(
+              "*")) {
+            double value = (Double) server.getAttribute(mbeanName, attributeInfo.getName());
+            values.add(new MbeanAttributeValue(mbeanName.getCanonicalName(), attributeInfo.getName(), value));
+          }
+        }
+      }
+    } catch (Exception e) {
+      LOG.error("fail to retrieve value for " + mbeanExpr + ":" + attributeExpr, e);
+    }
+    return values;
+  }
+
+  public static void delay(Duration duration) {
+    try {
+      Thread.sleep(duration.toMillis());
+    } catch (InterruptedException e) {
+      LOG.warn("While trying to sleep for {} millis. Got:", duration.toMillis(), e);
+    }
+  }
+}
diff --git a/src/main/java/com/linkedin/kmf/consumer/BaseConsumerRecord.java b/src/main/java/com/linkedin/xinfra/monitor/consumer/BaseConsumerRecord.java
similarity index 91%
rename from src/main/java/com/linkedin/kmf/consumer/BaseConsumerRecord.java
rename to src/main/java/com/linkedin/xinfra/monitor/consumer/BaseConsumerRecord.java
index 17ce0c75..7b8f514b 100644
--- a/src/main/java/com/linkedin/kmf/consumer/BaseConsumerRecord.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/consumer/BaseConsumerRecord.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,7 +7,8 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.consumer;
+
+package com.linkedin.xinfra.monitor.consumer;
 
 public class BaseConsumerRecord {
 
diff --git a/src/main/java/com/linkedin/kmf/consumer/KMBaseConsumer.java b/src/main/java/com/linkedin/xinfra/monitor/consumer/KMBaseConsumer.java
similarity index 53%
rename from src/main/java/com/linkedin/kmf/consumer/KMBaseConsumer.java
rename to src/main/java/com/linkedin/xinfra/monitor/consumer/KMBaseConsumer.java
index 84a011eb..a67a65dc 100644
--- a/src/main/java/com/linkedin/kmf/consumer/KMBaseConsumer.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/consumer/KMBaseConsumer.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,18 +7,37 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.consumer;
+
+package com.linkedin.xinfra.monitor.consumer;
+
+import java.util.Map;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.OffsetCommitCallback;
+import org.apache.kafka.common.TopicPartition;
+
 
 /**
  * A base consumer used to abstract different consumer classes.
  *
- * Implementations of this class must have constructor with the following signature: <br/>
+ * Implementations of this class must have constructor with the following signature:
  *   Constructor({@link java.util.Properties} properties).
  */
 public interface KMBaseConsumer {
 
   BaseConsumerRecord receive() throws Exception;
 
+  void commitAsync();
+
+  void commitAsync(final Map<TopicPartition, OffsetAndMetadata> offsets, OffsetCommitCallback callback);
+
+  void commitAsync(OffsetCommitCallback callback);
+
+  OffsetAndMetadata committed(TopicPartition tp);
+
   void close();
 
-}
\ No newline at end of file
+  long lastCommitted();
+
+  void updateLastCommit();
+
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/consumer/NewConsumer.java b/src/main/java/com/linkedin/xinfra/monitor/consumer/NewConsumer.java
new file mode 100644
index 00000000..e958d43c
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/consumer/NewConsumer.java
@@ -0,0 +1,105 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.consumer;
+
+import com.linkedin.xinfra.monitor.common.ConsumerGroupCoordinatorUtils;
+import java.time.Duration;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.ExecutionException;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.OffsetCommitCallback;
+import org.apache.kafka.common.TopicPartition;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Wraps around the new consumer from Apache Kafka and implements the #KMBaseConsumer interface
+ */
+public class NewConsumer implements KMBaseConsumer {
+
+  private final KafkaConsumer<String, String> _consumer;
+  private Iterator<ConsumerRecord<String, String>> _recordIter;
+  private static final Logger LOGGER = LoggerFactory.getLogger(NewConsumer.class);
+  private static long lastCommitted;
+
+  public NewConsumer(String topic, Properties consumerProperties, AdminClient adminClient)
+      throws ExecutionException, InterruptedException {
+    LOGGER.info("{} is being instantiated in the constructor..", this.getClass().getSimpleName());
+
+    NewConsumerConfig newConsumerConfig = new NewConsumerConfig(consumerProperties);
+    String targetConsumerGroupId = newConsumerConfig.getString(NewConsumerConfig.TARGET_CONSUMER_GROUP_ID_CONFIG);
+
+    if (targetConsumerGroupId != null) {
+      consumerProperties.put(ConsumerConfig.GROUP_ID_CONFIG, configureGroupId(targetConsumerGroupId, adminClient));
+    }
+    _consumer = new KafkaConsumer<>(consumerProperties);
+    _consumer.subscribe(Collections.singletonList(topic));
+  }
+
+  static String configureGroupId(String targetConsumerGroupId, AdminClient adminClient)
+      throws ExecutionException, InterruptedException {
+
+    return ConsumerGroupCoordinatorUtils.findCollision(targetConsumerGroupId, adminClient);
+  }
+
+  @Override
+  public BaseConsumerRecord receive() {
+    if (_recordIter == null || !_recordIter.hasNext()) {
+      _recordIter = _consumer.poll(Duration.ofMillis(Long.MAX_VALUE)).iterator();
+    }
+
+    ConsumerRecord<String, String> record = _recordIter.next();
+    return new BaseConsumerRecord(record.topic(), record.partition(), record.offset(), record.key(), record.value());
+  }
+
+  @Override
+  public void commitAsync() {
+    _consumer.commitAsync();
+  }
+
+  @Override
+  public void commitAsync(final Map<TopicPartition, OffsetAndMetadata> offsets, OffsetCommitCallback callback) {
+    _consumer.commitAsync(offsets, callback);
+  }
+
+  @Override
+  public void commitAsync(OffsetCommitCallback callback) {
+    _consumer.commitAsync(callback);
+  }
+
+  @Override
+  public OffsetAndMetadata committed(TopicPartition tp) {
+    return _consumer.committed(tp);
+  }
+
+  @Override
+  public void close() {
+    _consumer.close();
+  }
+
+  @Override
+  public long lastCommitted() {
+    return lastCommitted;
+  }
+
+  @Override
+  public void updateLastCommit() {
+    lastCommitted = System.currentTimeMillis();
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/consumer/NewConsumerConfig.java b/src/main/java/com/linkedin/xinfra/monitor/consumer/NewConsumerConfig.java
new file mode 100644
index 00000000..0526c022
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/consumer/NewConsumerConfig.java
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+package com.linkedin.xinfra.monitor.consumer;
+
+import java.util.Map;
+import org.apache.kafka.common.config.AbstractConfig;
+import org.apache.kafka.common.config.ConfigDef;
+
+
+/**
+ * Configuration for Xinfra Monitor New Consumer
+ */
+public class NewConsumerConfig extends AbstractConfig {
+
+  private static final ConfigDef CONFIG_DEF;
+
+  public static final String TARGET_CONSUMER_GROUP_ID_CONFIG = "target.consumer.group.id";
+  public static final String TARGET_CONSUMER_GROUP_ID_CONFIG_DOC =
+      "When defined a consumer group is chosen such that it maps to the same group coordinator as the specified "
+          + "group coordinator.";
+
+  static {
+    CONFIG_DEF = new ConfigDef().define(TARGET_CONSUMER_GROUP_ID_CONFIG,
+                                    ConfigDef.Type.STRING,
+                                    null,
+                                    ConfigDef.Importance.MEDIUM,
+                                    TARGET_CONSUMER_GROUP_ID_CONFIG_DOC);
+  }
+
+  public NewConsumerConfig(Map<?, ?> props) {
+    super(CONFIG_DEF, props);
+  }
+}
+
diff --git a/src/main/java/com/linkedin/kmf/partitioner/KMPartitioner.java b/src/main/java/com/linkedin/xinfra/monitor/partitioner/KMPartitioner.java
similarity index 81%
rename from src/main/java/com/linkedin/kmf/partitioner/KMPartitioner.java
rename to src/main/java/com/linkedin/xinfra/monitor/partitioner/KMPartitioner.java
index 839e3d77..36c0f5f6 100644
--- a/src/main/java/com/linkedin/kmf/partitioner/KMPartitioner.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/partitioner/KMPartitioner.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,7 +7,8 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.partitioner;
+
+package com.linkedin.xinfra.monitor.partitioner;
 
 public interface KMPartitioner {
 
diff --git a/src/main/java/com/linkedin/kmf/partitioner/NewKMPartitioner.java b/src/main/java/com/linkedin/xinfra/monitor/partitioner/NewKMPartitioner.java
similarity index 75%
rename from src/main/java/com/linkedin/kmf/partitioner/NewKMPartitioner.java
rename to src/main/java/com/linkedin/xinfra/monitor/partitioner/NewKMPartitioner.java
index e73a4119..12f8a59d 100644
--- a/src/main/java/com/linkedin/kmf/partitioner/NewKMPartitioner.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/partitioner/NewKMPartitioner.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,18 +7,17 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.partitioner;
-
-import static org.apache.kafka.common.utils.Utils.murmur2;
+package com.linkedin.xinfra.monitor.partitioner;
 
 public class NewKMPartitioner implements KMPartitioner {
 
   public int partition(String key, int partitionNum) {
     byte[] keyBytes = key.getBytes();
-    return toPositive(murmur2(keyBytes)) % partitionNum;
+    return toPositive(org.apache.kafka.common.utils.Utils.murmur2(keyBytes)) % partitionNum;
   }
 
   private static int toPositive(int number) {
     return number & 0x7fffffff;
   }
+
 }
diff --git a/src/main/java/com/linkedin/kmf/producer/BaseProducerRecord.java b/src/main/java/com/linkedin/xinfra/monitor/producer/BaseProducerRecord.java
similarity index 90%
rename from src/main/java/com/linkedin/kmf/producer/BaseProducerRecord.java
rename to src/main/java/com/linkedin/xinfra/monitor/producer/BaseProducerRecord.java
index 22548383..41c7f05d 100644
--- a/src/main/java/com/linkedin/kmf/producer/BaseProducerRecord.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/producer/BaseProducerRecord.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,7 +7,8 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.producer;
+
+package com.linkedin.xinfra.monitor.producer;
 
 public class BaseProducerRecord {
   private final String _topic;
diff --git a/src/main/java/com/linkedin/kmf/producer/KMBaseProducer.java b/src/main/java/com/linkedin/xinfra/monitor/producer/KMBaseProducer.java
similarity index 87%
rename from src/main/java/com/linkedin/kmf/producer/KMBaseProducer.java
rename to src/main/java/com/linkedin/xinfra/monitor/producer/KMBaseProducer.java
index e2ecade6..26635833 100644
--- a/src/main/java/com/linkedin/kmf/producer/KMBaseProducer.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/producer/KMBaseProducer.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,7 +7,8 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.producer;
+
+package com.linkedin.xinfra.monitor.producer;
 
 import org.apache.kafka.clients.producer.RecordMetadata;
 
@@ -23,4 +24,4 @@ public interface KMBaseProducer {
 
   void close();
 
-}
\ No newline at end of file
+}
diff --git a/src/main/java/com/linkedin/kmf/producer/NewProducer.java b/src/main/java/com/linkedin/xinfra/monitor/producer/NewProducer.java
similarity index 92%
rename from src/main/java/com/linkedin/kmf/producer/NewProducer.java
rename to src/main/java/com/linkedin/xinfra/monitor/producer/NewProducer.java
index 7379dc7b..f9b9f9de 100644
--- a/src/main/java/com/linkedin/kmf/producer/NewProducer.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/producer/NewProducer.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,7 +7,8 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.producer;
+
+package com.linkedin.xinfra.monitor.producer;
 
 import org.apache.kafka.clients.producer.KafkaProducer;
 import org.apache.kafka.clients.producer.ProducerRecord;
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/AbstractService.java b/src/main/java/com/linkedin/xinfra/monitor/services/AbstractService.java
new file mode 100644
index 00000000..45d88317
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/AbstractService.java
@@ -0,0 +1,80 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.linkedin.xinfra.monitor.common.Utils;
+import java.time.Duration;
+import java.util.Collections;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.admin.DescribeTopicsResult;
+import org.apache.kafka.clients.admin.TopicDescription;
+import org.apache.kafka.common.KafkaFuture;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public abstract class AbstractService implements Service {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AbstractService.class);
+  // Below fields are used for the topic description retry logic since sometimes it takes a while for the admin clint
+  // to discover a topic due to the fact that Kafka's metadata is eventually consistent. The retry logic is particularly
+  // helpful to avoid exceptions when a new topic gets created since it takes even longer for the admin client to discover
+  // the newly created topic
+  private final int _describeTopicRetries;
+  private final Duration _describeTopicRetryInterval;
+
+  AbstractService(int describeTopicRetries, Duration describeTopicRetryInterval) {
+    if (describeTopicRetries < 1) {
+      throw new IllegalArgumentException("Expect retry greater 0. Got: " + describeTopicRetries);
+    }
+    _describeTopicRetries = describeTopicRetries;
+    _describeTopicRetryInterval = describeTopicRetryInterval;
+  }
+
+  TopicDescription getTopicDescription(AdminClient adminClient, String topic) {
+    int attemptCount = 0;
+    TopicDescription topicDescription = null;
+    Exception exception = null;
+
+    while (attemptCount < _describeTopicRetries) {
+      DescribeTopicsResult describeTopicsResult = adminClient.describeTopics(Collections.singleton(topic));
+      Map<String, KafkaFuture<TopicDescription>> topicResultValues = describeTopicsResult.values();
+      KafkaFuture<TopicDescription> topicDescriptionKafkaFuture = topicResultValues.get(topic);
+      topicDescription = null;
+      exception = null;
+      try {
+        topicDescription = topicDescriptionKafkaFuture.get();
+      } catch (InterruptedException | ExecutionException e) {
+        exception = e;
+      }
+      if (exception != null) {
+        LOG.error("Exception occurred while getting the topicDescriptionKafkaFuture for topic: {} at attempt {}", topic,
+            attemptCount, exception);
+      } else if (topicDescription == null) {
+        LOG.warn("Got null description for topic {} at attempt {}", topic, attemptCount);
+      } else {
+        return topicDescription;
+      }
+      attemptCount++;
+      if (attemptCount < _describeTopicRetries) {
+        Utils.delay(_describeTopicRetryInterval);
+      }
+    }
+
+    if (exception != null) {
+      throw new IllegalStateException(exception);
+    } else {
+      throw new IllegalStateException(String.format("Got null description for topic %s after %d retry(s)", topic, _describeTopicRetries));
+    }
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/ClusterTopicManipulationService.java b/src/main/java/com/linkedin/xinfra/monitor/services/ClusterTopicManipulationService.java
new file mode 100644
index 00000000..56c3ddc2
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/ClusterTopicManipulationService.java
@@ -0,0 +1,391 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.linkedin.xinfra.monitor.XinfraMonitorConstants;
+import com.linkedin.xinfra.monitor.common.Utils;
+import com.linkedin.xinfra.monitor.services.configs.TopicManagementServiceConfig;
+import com.linkedin.xinfra.monitor.services.metrics.ClusterTopicManipulationMetrics;
+import com.linkedin.xinfra.monitor.topicfactory.TopicFactory;
+import java.lang.reflect.InvocationTargetException;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import kafka.admin.BrokerMetadata;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.admin.CreateTopicsResult;
+import org.apache.kafka.clients.admin.NewTopic;
+import org.apache.kafka.clients.admin.TopicDescription;
+import org.apache.kafka.common.KafkaFuture;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.metrics.JmxReporter;
+import org.apache.kafka.common.metrics.MetricConfig;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.MetricsReporter;
+import org.apache.kafka.common.requests.DescribeLogDirsResponse;
+import org.apache.kafka.common.utils.SystemTime;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Service monitoring the creations and deletions of Kafka Cluster's Topic.
+ */
+public class ClusterTopicManipulationService implements Service {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(ClusterTopicManipulationService.class);
+  private final String _configDefinedServiceName;
+  private final Duration _reportIntervalSecond;
+  private final ScheduledExecutorService _executor;
+  private final AdminClient _adminClient;
+  private boolean _isOngoingTopicCreationDone;
+  private boolean _isOngoingTopicDeletionDone;
+  private final AtomicBoolean _running;
+  private String _currentlyOngoingTopic;
+  int _expectedPartitionsCount;
+
+  private final ClusterTopicManipulationMetrics _clusterTopicManipulationMetrics;
+  private final TopicFactory _topicFactory;
+  private final String _zkConnect;
+
+  public ClusterTopicManipulationService(String name, AdminClient adminClient, Map<String, Object> props)
+      throws ClassNotFoundException, NoSuchMethodException, IllegalAccessException, InvocationTargetException,
+             InstantiationException {
+    LOGGER.info("ClusterTopicManipulationService constructor initiated {}", this.getClass().getName());
+
+    _isOngoingTopicCreationDone = true;
+    _isOngoingTopicDeletionDone = true;
+    _adminClient = adminClient;
+    _executor = Executors.newSingleThreadScheduledExecutor();
+    _reportIntervalSecond = Duration.ofSeconds(1);
+    _running = new AtomicBoolean(false);
+    _configDefinedServiceName = name;
+
+    MetricConfig metricConfig = new MetricConfig().samples(60).timeWindow(1000, TimeUnit.MILLISECONDS);
+    List<MetricsReporter> reporters = new ArrayList<>();
+    reporters.add(new JmxReporter(Service.JMX_PREFIX));
+    Metrics metrics = new Metrics(metricConfig, reporters, new SystemTime());
+    Map<String, String> tags = new HashMap<>();
+    tags.put("name", name);
+    TopicManagementServiceConfig config = new TopicManagementServiceConfig(props);
+    String topicFactoryClassName = config.getString(TopicManagementServiceConfig.TOPIC_FACTORY_CLASS_CONFIG);
+    @SuppressWarnings("rawtypes")
+    Map topicFactoryConfig =
+        props.containsKey(TopicManagementServiceConfig.TOPIC_FACTORY_PROPS_CONFIG) ? (Map) props.get(
+            TopicManagementServiceConfig.TOPIC_FACTORY_PROPS_CONFIG) : new HashMap();
+
+    _clusterTopicManipulationMetrics = new ClusterTopicManipulationMetrics(metrics, tags);
+    _zkConnect = config.getString(TopicManagementServiceConfig.ZOOKEEPER_CONNECT_CONFIG);
+    _topicFactory =
+        (TopicFactory) Class.forName(topicFactoryClassName).getConstructor(Map.class).newInstance(topicFactoryConfig);
+  }
+
+  /**
+   * The start logic must only execute once.  If an error occurs then the implementer of this class must assume that
+   * stop() will be called to clean up.  This method must be thread safe and must assume that stop() may be called
+   * concurrently. This can happen if the monitoring application's life cycle is being managed by a container.  Start
+   * will only be called once.
+   */
+  @Override
+  public void start() {
+    if (_running.compareAndSet(false, true)) {
+      LOGGER.info("ClusterTopicManipulationService started for {} - {}", _configDefinedServiceName,
+          this.getClass().getCanonicalName());
+      Runnable clusterTopicManipulationServiceRunnable = new ClusterTopicManipulationServiceRunnable();
+
+      _executor.scheduleAtFixedRate(clusterTopicManipulationServiceRunnable, _reportIntervalSecond.getSeconds(),
+          _reportIntervalSecond.getSeconds(), TimeUnit.SECONDS);
+    }
+  }
+
+  private class ClusterTopicManipulationServiceRunnable implements Runnable {
+
+    private ClusterTopicManipulationServiceRunnable() {
+      // unaccessed.
+    }
+
+    /**
+     * When an object implementing interface <code>Runnable</code> is used
+     * to create a thread, starting the thread causes the object's
+     * <code>run</code> method to be called in that separately executing
+     * thread.
+     * <p>
+     * The general contract of the method <code>run</code> is that it may
+     * take any action whatsoever.
+     *
+     * @see     Thread#run()
+     */
+    @Override
+    public void run() {
+      try {
+        ClusterTopicManipulationService.this.createDeleteClusterTopic();
+      } catch (Exception e) {
+        LOGGER.error("{} {} failed to run createDeleteClusterTopic()", _configDefinedServiceName,
+            ClusterTopicManipulationService.this.getClass().getSimpleName(), e);
+      }
+    }
+  }
+
+  /**
+   * 1 - Iterates through all the brokers in the cluster.
+   * 2 - checks the individual log directories of each broker
+   * 3 - checks how many topic partition of the ongoing topic there are and compares it against the expected value
+   * The RF is set to the brokerCount currently to enable maximize assigning the many
+   * partitions and replicas across all the brokers in the clusters as possible.
+   */
+  private void createDeleteClusterTopic() {
+
+    if (_isOngoingTopicCreationDone) {
+
+      int random = ThreadLocalRandom.current().nextInt();
+      _currentlyOngoingTopic = XinfraMonitorConstants.TOPIC_MANIPULATION_SERVICE_TOPIC + Math.abs(random);
+
+      try {
+        int brokerCount = _adminClient.describeCluster().nodes().get().size();
+
+        Set<BrokerMetadata> brokers = new HashSet<>();
+        for (Node broker : _adminClient.describeCluster().nodes().get()) {
+          BrokerMetadata brokerMetadata = new BrokerMetadata(broker.id(), null);
+          brokers.add(brokerMetadata);
+        }
+        Set<Integer> excludedBrokers = _topicFactory.getExcludedBrokers(_adminClient);
+        if (!excludedBrokers.isEmpty()) {
+          brokers.removeIf(broker -> excludedBrokers.contains(broker.id()));
+        }
+
+        // map from partition id to replica ids (i.e. broker ids).
+        // good idea for all partitions to have the same number of replicas.
+        Map<Integer, List<Integer>> replicasAssignments = new HashMap<>();
+        for (int partition = 0; partition < XinfraMonitorConstants.TOPIC_MANIPULATION_TOPIC_NUM_PARTITIONS;
+            partition++) {
+
+          // Regardless of the replica assignments here, maybeReassignPartitionAndElectLeader()
+          // will periodically reassign the partition as needed.
+          replicasAssignments.putIfAbsent(partition, Utils.replicaIdentifiers(brokers));
+        }
+
+        CreateTopicsResult createTopicsResult =
+            _adminClient.createTopics(Collections.singleton(new NewTopic(_currentlyOngoingTopic, replicasAssignments)));
+        createTopicsResult.all().get();
+        _expectedPartitionsCount = brokerCount * XinfraMonitorConstants.TOPIC_MANIPULATION_TOPIC_NUM_PARTITIONS;
+        _isOngoingTopicCreationDone = false;
+        LOGGER.debug("Initiated a new topic creation. topic information - topic: {}, cluster broker count: {}",
+            _currentlyOngoingTopic, brokerCount);
+        _clusterTopicManipulationMetrics.startTopicCreationMeasurement();
+      } catch (InterruptedException | ExecutionException e) {
+        LOGGER.error("Exception occurred while retrieving the brokers count: ", e);
+      }
+    }
+
+    try {
+      LOGGER.trace("cluster id: {}", _adminClient.describeCluster().clusterId().get());
+      Collection<Node> brokers = _adminClient.describeCluster().nodes().get();
+
+      if (this.doesClusterContainTopic(_currentlyOngoingTopic, brokers, _adminClient, _expectedPartitionsCount)) {
+        _clusterTopicManipulationMetrics.finishTopicCreationMeasurement();
+        _isOngoingTopicCreationDone = true;
+
+        if (_isOngoingTopicDeletionDone) {
+          KafkaFuture<Void> deleteTopicFuture =
+              _adminClient.deleteTopics(Collections.singleton(_currentlyOngoingTopic)).all();
+
+          _isOngoingTopicDeletionDone = false;
+          _clusterTopicManipulationMetrics.startTopicDeletionMeasurement();
+          LOGGER.debug("clusterTopicManipulationServiceRunnable: Initiated topic deletion on {}.",
+              _currentlyOngoingTopic);
+
+          deleteTopicFuture.get();
+        }
+
+        LOGGER.trace("{}-clusterTopicManipulationServiceRunnable successful!", this.getClass().getSimpleName());
+      }
+    } catch (InterruptedException | ExecutionException e) {
+      LOGGER.error("Exception occurred while creating cluster topic in {}: ", _configDefinedServiceName, e);
+    }
+
+    if (!_isOngoingTopicDeletionDone) {
+
+      _clusterTopicManipulationMetrics.finishTopicDeletionMeasurement();
+      LOGGER.debug("Finished measuring deleting the topic.");
+
+      _isOngoingTopicDeletionDone = true;
+    }
+  }
+
+  /**
+   * for all brokers, checks if the topic exists in the cluster by iterating through the log dirs of individual brokers.
+   * @param topic current ongoing topic
+   * @param brokers brokers to check log dirs from
+   * @param adminClient Admin Client
+   * @return true if the cluster contains the topic.
+   * @throws ExecutionException when attempting to retrieve the result of a task
+   * that aborted by throwing an exception.
+   * @throws InterruptedException when a thread is waiting, sleeping, or occupied,
+   * and the thread is interrupted, either before or during the activity.
+   */
+  private boolean doesClusterContainTopic(String topic, Collection<Node> brokers, AdminClient adminClient,
+      int expectedTotalPartitionsInCluster) throws ExecutionException, InterruptedException {
+    int totalPartitionsInCluster = 0;
+    for (Node broker : brokers) {
+      LOGGER.trace("broker log directories: {}",
+          adminClient.describeLogDirs(Collections.singleton(broker.id())).all().get());
+      Map<Integer, Map<String, DescribeLogDirsResponse.LogDirInfo>> logDirectoriesResponseMap =
+          adminClient.describeLogDirs(Collections.singleton(broker.id())).all().get();
+
+      totalPartitionsInCluster += this.processBroker(logDirectoriesResponseMap, broker, topic);
+    }
+
+    if (totalPartitionsInCluster != expectedTotalPartitionsInCluster) {
+      LOGGER.debug("totalPartitionsInCluster {} does not equal expectedTotalPartitionsInCluster {}",
+          totalPartitionsInCluster, expectedTotalPartitionsInCluster);
+      return false;
+    }
+
+    boolean isDescribeSuccessful = true;
+    try {
+      Map<String, TopicDescription> topicDescriptions =
+          ClusterTopicManipulationService.describeTopics(adminClient, Collections.singleton(topic));
+      LOGGER.trace("topicDescriptionMap = {}", topicDescriptions);
+    } catch (InterruptedException | ExecutionException e) {
+      isDescribeSuccessful = false;
+      LOGGER.error("Exception occurred within describeTopicsFinished method for topics {}",
+          Collections.singleton(topic), e);
+    }
+
+    LOGGER.trace("isDescribeSuccessful: {}", isDescribeSuccessful);
+    return isDescribeSuccessful;
+  }
+
+  /**
+   * Waits if necessary for this future to complete and gets the future in a blocking fashion.
+   * returns Map<String, TopicDescription> if the future succeeds, which occurs only if all the topic descriptions are successful.
+   * @param adminClient administrative client for Kafka, supporting managing and inspecting topics, brokers, configurations and ACLs.
+   * @param topicNames Collection of topic names
+   * @return Map<String, TopicDescription> if describe topic succeeds.
+   */
+  private static Map<String, TopicDescription> describeTopics(AdminClient adminClient, Collection<String> topicNames)
+      throws InterruptedException, ExecutionException {
+    KafkaFuture<Map<String, TopicDescription>> mapKafkaFuture = adminClient.describeTopics(topicNames).all();
+    LOGGER.debug("describeTopics future: {}", mapKafkaFuture);
+    LOGGER.debug("describeTopics: {}", mapKafkaFuture.get());
+
+    return mapKafkaFuture.get();
+  }
+
+  /**
+   * iterates through the broker's log directories and checks for the ongoing topic partitions and replica's existence.
+   * @param logDirectoriesResponseMap map of log directories response in the broker
+   * @param broker broker to process the log dirs in
+   * @param topic ongoing kmf manipulation topic
+   */
+  int processBroker(Map<Integer, Map<String, DescribeLogDirsResponse.LogDirInfo>> logDirectoriesResponseMap,
+      Node broker, String topic) {
+    int totalPartitionsInBroker = 0;
+    LOGGER.trace("logDirectoriesResponseMap: {}", logDirectoriesResponseMap);
+    Map<String, DescribeLogDirsResponse.LogDirInfo> logDirInfoMap = logDirectoriesResponseMap.get(broker.id());
+    String logDirectoriesKey = logDirInfoMap.keySet().iterator().next();
+    LOGGER.trace("logDirInfoMap: {}", logDirInfoMap.get(logDirectoriesKey));
+    DescribeLogDirsResponse.LogDirInfo logDirInfo = logDirInfoMap.get(logDirectoriesKey);
+
+    if (logDirInfo != null && !logDirectoriesResponseMap.isEmpty()) {
+      Map<TopicPartition, DescribeLogDirsResponse.ReplicaInfo> topicPartitionReplicaInfoMap = logDirInfo.replicaInfos;
+      totalPartitionsInBroker += this.processLogDirsWithinBroker(topicPartitionReplicaInfoMap, topic, broker);
+    }
+
+    return totalPartitionsInBroker;
+  }
+
+  private int processLogDirsWithinBroker(
+      Map<TopicPartition, DescribeLogDirsResponse.ReplicaInfo> topicPartitionReplicaInfoMap, String topic,
+      Node broker) {
+    int totalPartitionsInBroker = 0;
+    for (Map.Entry<TopicPartition, DescribeLogDirsResponse.ReplicaInfo> topicPartitionReplicaInfoEntry : topicPartitionReplicaInfoMap
+        .entrySet()) {
+
+      TopicPartition topicPartition = topicPartitionReplicaInfoEntry.getKey();
+      DescribeLogDirsResponse.ReplicaInfo replicaInfo = topicPartitionReplicaInfoEntry.getValue();
+
+      if (topicPartition.topic().equals(topic)) {
+        totalPartitionsInBroker++;
+        LOGGER.trace("totalPartitions In The Broker = {}", totalPartitionsInBroker);
+      }
+
+      LOGGER.trace("broker information: {}", broker);
+      LOGGER.trace("logDirInfo for kafka-logs: topicPartition = {}, replicaInfo = {}", topicPartition, replicaInfo);
+    }
+
+    return totalPartitionsInBroker;
+  }
+
+  /**
+   * This may be called multiple times.  This method must be thread safe and must assume that start() may be called
+   * concurrently.  This can happen if the monitoring application's life cycle is being managed by a container.
+   * Implementations must be non-blocking and should release the resources acquired by the service during start().
+   */
+  @Override
+  public void stop() {
+    if (_running.compareAndSet(true, false)) {
+      _executor.shutdown();
+    }
+  }
+
+  /**
+   * Implementations of this method must be thread safe as it can be called at any time.  Implementations must be
+   * non-blocking.
+   * @return true if this start() has returned successfully else this must return false.  This must also return false if
+   * the service can no longer perform its function.
+   */
+  @Override
+  public boolean isRunning() {
+
+    return _running.get() && !_executor.isShutdown();
+  }
+
+  /**
+   * Implementations of this method must be thread safe and must be blocking.
+   */
+  @Override
+  public void awaitShutdown(long timeout, TimeUnit timeUnit) {
+
+    try {
+      _executor.awaitTermination(3, TimeUnit.MINUTES);
+      LOGGER.info("{} shutdown completed", _configDefinedServiceName);
+    } catch (InterruptedException e) {
+      LOGGER.info("Thread interrupted when waiting for {} to shutdown", _configDefinedServiceName);
+    }
+  }
+
+  @Override
+  public String toString() {
+    return this.getClass().getSimpleName() + "-" + _configDefinedServiceName;
+  }
+
+  void setExpectedPartitionsCount(int count) {
+    _expectedPartitionsCount = count;
+  }
+
+  int expectedPartitionsCount() {
+    return _expectedPartitionsCount;
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/ClusterTopicManipulationServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/ClusterTopicManipulationServiceFactory.java
new file mode 100644
index 00000000..2d932174
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/ClusterTopicManipulationServiceFactory.java
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.Map;
+import org.apache.kafka.clients.admin.AdminClient;
+
+
+/**
+ * Factory class which instantiates a ClusterTopicManipulationService service object.
+ */
+@SuppressWarnings("rawtypes")
+public class ClusterTopicManipulationServiceFactory implements ServiceFactory {
+
+  private final Map _properties;
+  private final String _serviceName;
+
+  /**
+   * "Class 'ClusterTopicManipulationServiceFactory' is never used" and
+   * "Constructor 'ClusterTopicManipulationServiceFactory(java.util.Map, java.lang.String)' is never used"
+   * shown as warnings in Intellij IDEA are not true.
+   * XinfraMonitor class uses (ServiceFactory) Class.forName(..)
+   * .getConstructor(...).newInstance(...) to return Class that's associated
+   * with the class or interface with the given string name
+   * @param properties config properties
+   * @param serviceName name of the service
+   */
+  public ClusterTopicManipulationServiceFactory(Map properties, String serviceName) {
+
+    _properties = properties;
+    _serviceName = serviceName;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Service createService() throws Exception {
+
+    AdminClient adminClient = AdminClient.create(_properties);
+
+    return new ClusterTopicManipulationService(_serviceName, adminClient, _properties);
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/ConsumeService.java b/src/main/java/com/linkedin/xinfra/monitor/services/ConsumeService.java
new file mode 100644
index 00000000..53f32360
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/ConsumeService.java
@@ -0,0 +1,271 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.linkedin.xinfra.monitor.common.DefaultTopicSchema;
+import com.linkedin.xinfra.monitor.common.Utils;
+import com.linkedin.xinfra.monitor.consumer.BaseConsumerRecord;
+import com.linkedin.xinfra.monitor.consumer.KMBaseConsumer;
+import com.linkedin.xinfra.monitor.services.metrics.CommitAvailabilityMetrics;
+import com.linkedin.xinfra.monitor.services.metrics.CommitLatencyMetrics;
+import com.linkedin.xinfra.monitor.services.metrics.ConsumeMetrics;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.admin.TopicDescription;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.OffsetCommitCallback;
+import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.metrics.JmxReporter;
+import org.apache.kafka.common.metrics.MetricConfig;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.MetricsReporter;
+import org.apache.kafka.common.metrics.stats.CumulativeSum;
+import org.apache.kafka.common.utils.SystemTime;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ConsumeService extends AbstractService {
+  private static final Logger LOG = LoggerFactory.getLogger(ConsumeService.class);
+  private static final String TAGS_NAME = "name";
+  private static final long COMMIT_TIME_INTERVAL = 4;
+  private static final long CONSUME_THREAD_SLEEP_MS = 100;
+  private static Metrics metrics;
+  private final AtomicBoolean _running;
+  private final KMBaseConsumer _baseConsumer;
+  private final int _latencySlaMs;
+  private ConsumeMetrics _sensors;
+  private Thread _consumeThread;
+  private final AdminClient _adminClient;
+  private CommitAvailabilityMetrics _commitAvailabilityMetrics;
+  private CommitLatencyMetrics _commitLatencyMetrics;
+  private String _topic;
+  private final String _name;
+  private static final String METRIC_GROUP_NAME = "consume-service";
+  private static Map<String, String> tags;
+
+  /**
+   * Mainly contains services for three metrics:
+   * 1 - ConsumeAvailability metrics
+   * 2 - CommitOffsetAvailability metrics
+   *   2.1 - commitAvailabilityMetrics records offsets committed upon success. that is, no exception upon callback
+   *   2.2 - commitAvailabilityMetrics records offsets commit fail upon failure. that is, exception upon callback
+   * 3 - CommitOffsetLatency metrics
+   *   3.1 - commitLatencyMetrics records the latency between last successful callback and start of last recorded commit.
+   *
+   * @param name Name of the Monitor instance
+   * @param topicPartitionResult The completable future for topic partition
+   * @param consumerFactory Consumer Factory object.
+   * @throws ExecutionException when attempting to retrieve the result of a task that aborted by throwing an exception
+   * @throws InterruptedException when a thread is waiting, sleeping, or otherwise occupied and the thread is interrupted
+   */
+  public ConsumeService(String name,
+                        CompletableFuture<Void> topicPartitionResult,
+                        ConsumerFactory consumerFactory)
+      throws ExecutionException, InterruptedException {
+    // TODO: Make values of below fields come from configs
+    super(10, Duration.ofMinutes(1));
+    _baseConsumer = consumerFactory.baseConsumer();
+    _latencySlaMs = consumerFactory.latencySlaMs();
+    _name = name;
+    _adminClient = consumerFactory.adminClient();
+    _running = new AtomicBoolean(false);
+
+    // Returns a new CompletionStage (topicPartitionFuture) which
+    // executes the given action - code inside run() - when this stage (topicPartitionResult) completes normally,.
+    CompletableFuture<Void> topicPartitionFuture = topicPartitionResult.thenRun(() -> {
+      MetricConfig metricConfig = new MetricConfig().samples(60).timeWindow(1000, TimeUnit.MILLISECONDS);
+      List<MetricsReporter> reporters = new ArrayList<>();
+      reporters.add(new JmxReporter(JMX_PREFIX));
+      metrics = new Metrics(metricConfig, reporters, new SystemTime());
+      tags = new HashMap<>();
+      tags.put(TAGS_NAME, name);
+      _topic = consumerFactory.topic();
+      _sensors = new ConsumeMetrics(metrics, tags, consumerFactory.latencyPercentileMaxMs(),
+          consumerFactory.latencyPercentileGranularityMs());
+      _commitLatencyMetrics = new CommitLatencyMetrics(metrics, tags, consumerFactory.latencyPercentileMaxMs(),
+          consumerFactory.latencyPercentileGranularityMs());
+      _commitAvailabilityMetrics = new CommitAvailabilityMetrics(metrics, tags);
+      _consumeThread = new Thread(() -> {
+        try {
+          consume();
+        } catch (Exception e) {
+          LOG.error(name + "/ConsumeService failed", e);
+        }
+      }, name + " consume-service");
+      _consumeThread.setDaemon(true);
+      _consumeThread.setUncaughtExceptionHandler((t, e) -> {
+        LOG.error(name + "/ConsumeService error", e);
+      });
+    });
+
+    // In a blocking fashion, waits for this topicPartitionFuture to complete, and then returns its result.
+    topicPartitionFuture.get();
+  }
+
+  private void consume() throws Exception {
+    /* Delay 1 second to reduce the chance that consumer creates topic before TopicManagementService */
+    Thread.sleep(1000);
+
+    Map<Integer, Long> nextIndexes = new HashMap<>();
+
+    while (_running.get()) {
+      BaseConsumerRecord record;
+      try {
+        record = _baseConsumer.receive();
+      } catch (Exception e) {
+        _sensors._consumeError.record();
+        LOG.warn(_name + "/ConsumeService failed to receive record", e);
+        /* Avoid busy while loop */
+        //noinspection BusyWait
+        Thread.sleep(CONSUME_THREAD_SLEEP_MS);
+        continue;
+      }
+
+      if (record == null) continue;
+
+      GenericRecord avroRecord = null;
+      try {
+        avroRecord = Utils.genericRecordFromJson(record.value());
+      } catch (Exception exception) {
+        LOG.error("An exception occurred while getting avro record.", exception);
+      }
+
+      if (avroRecord == null) {
+        _sensors._consumeError.record();
+        continue;
+      }
+      int partition = record.partition();
+      /* Commit availability and commit latency service */
+      /* Call commitAsync, wait for a NON-NULL return value (see https://issues.apache.org/jira/browse/KAFKA-6183) */
+      OffsetCommitCallback commitCallback = new OffsetCommitCallback() {
+        @Override
+        public void onComplete(Map<TopicPartition, OffsetAndMetadata> topicPartitionOffsetAndMetadataMap, Exception kafkaException) {
+          if (kafkaException != null) {
+            LOG.error("Exception while trying to perform an asynchronous commit.", kafkaException);
+            _commitAvailabilityMetrics._failedCommitOffsets.record();
+          } else {
+            _commitAvailabilityMetrics._offsetsCommitted.record();
+            _commitLatencyMetrics.recordCommitComplete();
+          }
+        }
+      };
+
+      /* Current timestamp to perform subtraction*/
+      long currTimeMillis = System.currentTimeMillis();
+
+      /* 4 seconds consumer offset commit interval. */
+      long timeDiffMillis = TimeUnit.SECONDS.toMillis(COMMIT_TIME_INTERVAL);
+
+      if (currTimeMillis - _baseConsumer.lastCommitted() >= timeDiffMillis) {
+        /* commit the consumer offset asynchronously with a callback. */
+        _baseConsumer.commitAsync(commitCallback);
+        _commitLatencyMetrics.recordCommitStart();
+        /* Record the current time for the committed consumer offset */
+        _baseConsumer.updateLastCommit();
+      }
+      /* Finished consumer offset commit service. */
+
+      long index = (Long) avroRecord.get(DefaultTopicSchema.INDEX_FIELD.name());
+      long currMs = System.currentTimeMillis();
+      long prevMs = (Long) avroRecord.get(DefaultTopicSchema.TIME_FIELD.name());
+
+      _sensors._recordsConsumed.record();
+      _sensors._bytesConsumed.record(record.value().length());
+      _sensors._recordsDelay.record(currMs - prevMs);
+
+      if (currMs - prevMs > _latencySlaMs)
+        _sensors._recordsDelayed.record();
+
+      if (index == -1L || !nextIndexes.containsKey(partition)) {
+        nextIndexes.put(partition, -1L);
+        continue;
+      }
+
+      long nextIndex = nextIndexes.get(partition);
+
+      if (nextIndex == -1 || index == nextIndex) {
+        nextIndexes.put(partition, index + 1);
+
+      } else if (index < nextIndex) {
+        _sensors._recordsDuplicated.record();
+      } else { // this will equate to the case where index > nextIndex...
+        nextIndexes.put(partition, index + 1);
+        long numLostRecords = index - nextIndex;
+        _sensors._recordsLost.record(numLostRecords);
+        LOG.info("_recordsLost recorded: Avro record current index: {} at timestamp {}. Next index: {}. Lost {} records.", index, currMs, nextIndex, numLostRecords);
+      }
+    }
+    /* end of consume() while loop */
+    LOG.info("{}/ConsumeService/Consumer closing.", _name);
+    _baseConsumer.close();
+    LOG.info("{}/ConsumeService/Consumer stopped.", _name);
+  }
+
+  Metrics metrics() {
+    return metrics;
+  }
+
+  void startConsumeThreadForTesting() {
+    if (_running.compareAndSet(false, true)) {
+      _consumeThread.start();
+      LOG.info("{}/ConsumeService started.", _name);
+    }
+  }
+
+  @Override
+  public synchronized void start() {
+    if (_running.compareAndSet(false, true)) {
+      _consumeThread.start();
+      LOG.info("{}/ConsumeService started.", _name);
+
+      TopicDescription topicDescription = getTopicDescription(_adminClient, _topic);
+      @SuppressWarnings("ConstantConditions")
+      double partitionCount = topicDescription.partitions().size();
+      metrics.sensor("topic-partitions").add(
+          new MetricName("topic-partitions-count", METRIC_GROUP_NAME, "The total number of partitions for the topic.",
+              tags), new CumulativeSum(partitionCount));
+    }
+  }
+
+  @Override
+  public synchronized void stop() {
+    if (_running.compareAndSet(true, false)) {
+      LOG.info("{}/ConsumeService stopping.", _name);
+    }
+  }
+
+  @Override
+  public void awaitShutdown(long timeout, TimeUnit unit) {
+    LOG.info("{}/ConsumeService shutdown awaiting…", _name);
+    try {
+      _consumeThread.join(unit.toMillis(timeout));
+    } catch (InterruptedException e) {
+      LOG.error(_name + "/ConsumeService interrupted", e);
+    }
+    LOG.info("{}/ConsumeService shutdown completed.", _name);
+  }
+
+  @Override
+  public boolean isRunning() {
+    return _running.get() && _consumeThread.isAlive();
+  }
+
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/ConsumeServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/ConsumeServiceFactory.java
new file mode 100644
index 00000000..c3a290ed
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/ConsumeServiceFactory.java
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+
+
+/**
+ * Factory that constructs the ConsumeService.
+ */
+@SuppressWarnings({"rawtypes", "unchecked"})
+public class ConsumeServiceFactory implements ServiceFactory {
+  private final Map _props;
+  private final String _name;
+
+  public ConsumeServiceFactory(Map props, String name) {
+    _props = props;
+    _name = name;
+  }
+
+  @Override
+  public Service createService() throws Exception {
+
+    CompletableFuture<Void> topicPartitionResult = new CompletableFuture<>();
+    topicPartitionResult.complete(null);
+    ConsumerFactoryImpl consumerFactory = new ConsumerFactoryImpl(_props);
+
+    return new ConsumeService(_name, topicPartitionResult, consumerFactory);
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/ConsumerFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/ConsumerFactory.java
new file mode 100644
index 00000000..e603550b
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/ConsumerFactory.java
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.linkedin.xinfra.monitor.consumer.KMBaseConsumer;
+import org.apache.kafka.clients.admin.AdminClient;
+
+
+public interface ConsumerFactory {
+
+  AdminClient adminClient();
+
+  int latencySlaMs();
+
+  KMBaseConsumer baseConsumer();
+
+  String topic();
+
+  int latencyPercentileMaxMs();
+
+  int latencyPercentileGranularityMs();
+
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/ConsumerFactoryImpl.java b/src/main/java/com/linkedin/xinfra/monitor/services/ConsumerFactoryImpl.java
new file mode 100644
index 00000000..07943db8
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/ConsumerFactoryImpl.java
@@ -0,0 +1,138 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.linkedin.xinfra.monitor.consumer.KMBaseConsumer;
+import com.linkedin.xinfra.monitor.consumer.NewConsumer;
+import com.linkedin.xinfra.monitor.services.configs.CommonServiceConfig;
+import com.linkedin.xinfra.monitor.services.configs.ConsumeServiceConfig;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Random;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class ConsumerFactoryImpl implements ConsumerFactory {
+  private final KMBaseConsumer _baseConsumer;
+  private final String _topic;
+  private static final String FALSE = "false";
+  private final int _latencyPercentileMaxMs;
+  private final int _latencyPercentileGranularityMs;
+  private static final String[] NON_OVERRIDABLE_PROPERTIES =
+      new String[] {ConsumeServiceConfig.BOOTSTRAP_SERVERS_CONFIG, ConsumeServiceConfig.ZOOKEEPER_CONNECT_CONFIG};
+  private final int _latencySlaMs;
+  private static AdminClient adminClient;
+  private static final Logger LOG = LoggerFactory.getLogger(ConsumerFactoryImpl.class);
+
+  @SuppressWarnings("rawtypes")
+  public ConsumerFactoryImpl(Map<String, Object> props) throws Exception {
+    LOG.info("Creating AdminClient.");
+    adminClient = AdminClient.create(props);
+    Map consumerPropsOverride = props.containsKey(ConsumeServiceConfig.CONSUMER_PROPS_CONFIG)
+        ? (Map) props.get(ConsumeServiceConfig.CONSUMER_PROPS_CONFIG) : new HashMap<>();
+    ConsumeServiceConfig config = new ConsumeServiceConfig(props);
+    _topic = config.getString(ConsumeServiceConfig.TOPIC_CONFIG);
+    String zkConnect = config.getString(ConsumeServiceConfig.ZOOKEEPER_CONNECT_CONFIG);
+    String brokerList = config.getString(ConsumeServiceConfig.BOOTSTRAP_SERVERS_CONFIG);
+    String consumerClassName = config.getString(ConsumeServiceConfig.CONSUMER_CLASS_CONFIG);
+    _latencySlaMs = config.getInt(ConsumeServiceConfig.LATENCY_SLA_MS_CONFIG);
+    _latencyPercentileMaxMs = config.getInt(ConsumeServiceConfig.LATENCY_PERCENTILE_MAX_MS_CONFIG);
+    _latencyPercentileGranularityMs = config.getInt(ConsumeServiceConfig.LATENCY_PERCENTILE_GRANULARITY_MS_CONFIG);
+    for (String property: NON_OVERRIDABLE_PROPERTIES) {
+      if (consumerPropsOverride.containsKey(property)) {
+        throw new ConfigException("Override must not contain " + property + " config.");
+      }
+    }
+    Properties consumerProps = new Properties();
+
+    /* Assign default config. This has the lowest priority. */
+    consumerProps.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, FALSE);
+    consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
+    consumerProps.put(ConsumerConfig.CLIENT_ID_CONFIG, "kmf-consumer");
+    consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, "kmf-consumer-group-" + new Random().nextInt());
+    consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
+    consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
+    if (consumerClassName.equals(NewConsumer.class.getCanonicalName()) || consumerClassName.equals(NewConsumer.class.getSimpleName())) {
+      consumerClassName = NewConsumer.class.getCanonicalName();
+    }
+
+    /* Assign config specified for ConsumeService. */
+    consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
+    consumerProps.put(CommonServiceConfig.ZOOKEEPER_CONNECT_CONFIG, zkConnect);
+
+    /* Assign config specified for consumer. This has the highest priority. */
+    consumerProps.putAll(consumerPropsOverride);
+
+    if (props.containsKey(ConsumeServiceConfig.CONSUMER_PROPS_CONFIG)) {
+      props.forEach(consumerProps::putIfAbsent);
+    }
+
+    java.lang.reflect.Constructor<?> constructor = adminClientConstructorIfExists(consumerClassName);
+    if (constructor != null) {
+      _baseConsumer = (KMBaseConsumer) constructor
+          .newInstance(_topic, consumerProps, adminClient());
+    } else {
+      _baseConsumer = (KMBaseConsumer) Class.forName(consumerClassName)
+          .getConstructor(String.class, Properties.class)
+          .newInstance(_topic, consumerProps);
+    }
+  }
+
+  private static java.lang.reflect.Constructor<?> adminClientConstructorIfExists(String consumerClassName)
+      throws ClassNotFoundException {
+    try {
+      return Class.forName(consumerClassName).getConstructor(String.class, Properties.class, AdminClient.class);
+    } catch (java.lang.NoSuchMethodException noSuchMethodException) {
+      LOG.info(consumerClassName
+          + " does not provide a constructor with signature (Ljava/lang/String;Ljava/util/Properties;Lorg/apache/kafka/clients/admin/AdminClient;)V - falling back to (Ljava/util/Properties;)V");
+      return null;
+    } catch (ClassNotFoundException e) {
+      throw new ClassNotFoundException("The class was not found: ", e);
+    }
+  }
+
+  @Override
+  public AdminClient adminClient() {
+    return adminClient;
+  }
+
+  @Override
+  public int latencySlaMs() {
+    return _latencySlaMs;
+  }
+
+  @Override
+  public KMBaseConsumer baseConsumer() {
+    return _baseConsumer;
+  }
+
+  @Override
+  public String topic() {
+    return _topic;
+  }
+
+  @Override
+  public int latencyPercentileMaxMs() {
+    return _latencyPercentileMaxMs;
+  }
+
+  @Override
+  public int latencyPercentileGranularityMs() {
+    return _latencyPercentileGranularityMs;
+  }
+
+}
diff --git a/src/main/java/com/linkedin/kmf/services/DefaultMetricsReporterService.java b/src/main/java/com/linkedin/xinfra/monitor/services/DefaultMetricsReporterService.java
similarity index 68%
rename from src/main/java/com/linkedin/kmf/services/DefaultMetricsReporterService.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/DefaultMetricsReporterService.java
index b6f8dac3..64a62bf1 100644
--- a/src/main/java/com/linkedin/kmf/services/DefaultMetricsReporterService.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/DefaultMetricsReporterService.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,22 +7,23 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services;
 
-import static com.linkedin.kmf.common.Utils.getMBeanAttributeValues;
+package com.linkedin.xinfra.monitor.services;
 
-import com.linkedin.kmf.common.MbeanAttributeValue;
-import com.linkedin.kmf.services.configs.DefaultMetricsReporterServiceConfig;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import com.linkedin.xinfra.monitor.common.MbeanAttributeValue;
+import com.linkedin.xinfra.monitor.common.Utils;
+import com.linkedin.xinfra.monitor.services.configs.DefaultMetricsReporterServiceConfig;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class DefaultMetricsReporterService implements Service {
   private static final Logger LOG = LoggerFactory.getLogger(DefaultMetricsReporterService.class);
+  private static final String LOG_DIVIDER = "==============================================================";
 
   private final String _name;
   private final List<String> _metricNames;
@@ -39,25 +40,20 @@ public DefaultMetricsReporterService(Map<String, Object> props, String name) {
 
   @Override
   public synchronized void start() {
-    _executor.scheduleAtFixedRate(
-      new Runnable() {
-        @Override
-        public void run() {
-          try {
-            reportMetrics();
-          } catch (Exception e) {
-            LOG.error(_name + "/DefaultMetricsReporterService failed to report metrics", e);
-          }
-        }
-      }, _reportIntervalSec, _reportIntervalSec, TimeUnit.SECONDS
-    );
-    LOG.info("{}/DefaultMetricsReporterService started", _name);
+    _executor.scheduleAtFixedRate(() -> {
+      try {
+        reportMetrics();
+      } catch (Exception e) {
+        LOG.error(_name + "/DefaultMetricsReporterService failed to report metrics.", e);
+      }
+    }, _reportIntervalSec, _reportIntervalSec, TimeUnit.SECONDS);
+    LOG.info("{}/DefaultMetricsReporterService started.", _name);
   }
 
   @Override
   public synchronized void stop() {
     _executor.shutdown();
-    LOG.info("{}/DefaultMetricsReporterService stopped", _name);
+    LOG.info("{}/DefaultMetricsReporterService stopped.", _name);
   }
 
   @Override
@@ -66,26 +62,28 @@ public boolean isRunning() {
   }
 
   @Override
-  public void awaitShutdown() {
+  public void awaitShutdown(long timeout, TimeUnit unit) {
     try {
       _executor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
     } catch (InterruptedException e) {
-      LOG.info("Thread interrupted when waiting for {}/DefaultMetricsReporterService to shutdown", _name);
+      LOG.info("Thread interrupted when waiting for {}/DefaultMetricsReporterService to shutdown.", _name);
     }
-    LOG.info("{}/DefaultMetricsReporterService shutdown completed", _name);
+    LOG.info("{}/DefaultMetricsReporterService shutdown completed.", _name);
   }
 
+
+
   private void reportMetrics() {
     StringBuilder builder = new StringBuilder();
     for (String metricName: _metricNames) {
       String mbeanExpr = metricName.substring(0, metricName.lastIndexOf(":"));
       String attributeExpr = metricName.substring(metricName.lastIndexOf(":") + 1);
-      List<MbeanAttributeValue> attributeValues = getMBeanAttributeValues(mbeanExpr, attributeExpr);
+      List<MbeanAttributeValue> attributeValues = Utils.getMBeanAttributeValues(mbeanExpr, attributeExpr);
       for (MbeanAttributeValue attributeValue: attributeValues) {
         builder.append(attributeValue.toString());
         builder.append("\n");
       }
     }
-    LOG.info(builder.toString());
+    LOG.info("{}\n{}", LOG_DIVIDER, builder.toString());
   }
 }
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/DefaultMetricsReporterServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/DefaultMetricsReporterServiceFactory.java
new file mode 100644
index 00000000..9d0acec0
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/DefaultMetricsReporterServiceFactory.java
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.Map;
+
+
+/**
+ * Factory class which instantiates a DefaultMetricsReporterService.
+ */
+@SuppressWarnings("rawtypes")
+public class DefaultMetricsReporterServiceFactory implements ServiceFactory {
+  private final Map _properties;
+  private final String _serviceName;
+
+  public DefaultMetricsReporterServiceFactory(Map properties, String serviceName) {
+
+    _properties = properties;
+    _serviceName = serviceName;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Service createService() {
+    return new DefaultMetricsReporterService(_properties, _serviceName);
+  }
+}
diff --git a/src/main/java/com/linkedin/kmf/services/GraphiteMetricsReporterService.java b/src/main/java/com/linkedin/xinfra/monitor/services/GraphiteMetricsReporterService.java
similarity index 81%
rename from src/main/java/com/linkedin/kmf/services/GraphiteMetricsReporterService.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/GraphiteMetricsReporterService.java
index 706defd5..24512d7d 100644
--- a/src/main/java/com/linkedin/kmf/services/GraphiteMetricsReporterService.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/GraphiteMetricsReporterService.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,20 +7,12 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services;
 
+package com.linkedin.xinfra.monitor.services;
 
-import static com.linkedin.kmf.common.Utils.getMBeanAttributeValues;
-
-import com.linkedin.kmf.common.MbeanAttributeValue;
-import com.linkedin.kmf.services.configs.GraphiteMetricsReporterServiceConfig;
-import net.savantly.graphite.GraphiteClient;
-import net.savantly.graphite.GraphiteClientFactory;
-import net.savantly.graphite.impl.SimpleCarbonMetric;
-import org.apache.commons.lang.StringUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import com.linkedin.xinfra.monitor.common.MbeanAttributeValue;
+import com.linkedin.xinfra.monitor.common.Utils;
+import com.linkedin.xinfra.monitor.services.configs.GraphiteMetricsReporterServiceConfig;
 import java.net.SocketException;
 import java.net.UnknownHostException;
 import java.util.List;
@@ -28,6 +20,12 @@
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
+import net.savantly.graphite.GraphiteClient;
+import net.savantly.graphite.GraphiteClientFactory;
+import net.savantly.graphite.impl.SimpleCarbonMetric;
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class GraphiteMetricsReporterService implements Service {
   private static final Logger LOG = LoggerFactory.getLogger(GraphiteMetricsReporterService.class);
@@ -54,17 +52,17 @@ public GraphiteMetricsReporterService(Map<String, Object> props, String name)
 
   @Override
   public synchronized void start() {
-    _executor.scheduleAtFixedRate(
-        new Runnable() {
-          @Override
-          public void run() {
-            try {
-              reportMetrics();
-            } catch (Exception e) {
-              LOG.error(_name + "/GraphiteMetricsReporterService failed to report metrics", e);
-            }
-          }
-        }, _reportIntervalSec, _reportIntervalSec, TimeUnit.SECONDS
+    _executor.scheduleAtFixedRate(new Runnable() {
+      @Override
+      public void run() {
+        try {
+          GraphiteMetricsReporterService.this.reportMetrics();
+        } catch (Exception e) {
+          LOG.error(_name + "/GraphiteMetricsReporterService failed to report metrics",
+              e);
+        }
+      }
+      }, _reportIntervalSec, _reportIntervalSec, TimeUnit.SECONDS
     );
     LOG.info("{}/GraphiteMetricsReporterService started", _name);
   }
@@ -81,7 +79,7 @@ public boolean isRunning() {
   }
 
   @Override
-  public void awaitShutdown() {
+  public void awaitShutdown(long timeout, TimeUnit unit) {
     try {
       _executor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
     } catch (InterruptedException e) {
@@ -104,7 +102,7 @@ private void reportMetrics() {
     for (String metricName: _metricNames) {
       String mbeanExpr = metricName.substring(0, metricName.lastIndexOf(":"));
       String attributeExpr = metricName.substring(metricName.lastIndexOf(":") + 1);
-      List<MbeanAttributeValue> attributeValues = getMBeanAttributeValues(mbeanExpr, attributeExpr);
+      List<MbeanAttributeValue> attributeValues = Utils.getMBeanAttributeValues(mbeanExpr, attributeExpr);
       for (MbeanAttributeValue attributeValue: attributeValues) {
         _graphiteClient.saveCarbonMetrics(
             new SimpleCarbonMetric(
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/GraphiteMetricsReporterServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/GraphiteMetricsReporterServiceFactory.java
new file mode 100644
index 00000000..ed1c5886
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/GraphiteMetricsReporterServiceFactory.java
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.Map;
+
+
+/**
+ * Factory class which instantiates a GraphiteMetricsReporterServiceFactory service.
+ */
+@SuppressWarnings("rawtypes")
+public class GraphiteMetricsReporterServiceFactory implements ServiceFactory {
+
+  private final Map _properties;
+  private final String _serviceName;
+
+  public GraphiteMetricsReporterServiceFactory(Map properties, String serviceName) {
+
+    _properties = properties;
+    _serviceName = serviceName;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Service createService() throws Exception {
+    return new GraphiteMetricsReporterService(_properties, _serviceName);
+  }
+}
diff --git a/src/main/java/com/linkedin/kmf/services/JolokiaService.java b/src/main/java/com/linkedin/xinfra/monitor/services/JolokiaService.java
similarity index 84%
rename from src/main/java/com/linkedin/kmf/services/JolokiaService.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/JolokiaService.java
index 5218cb32..ae1806e7 100644
--- a/src/main/java/com/linkedin/kmf/services/JolokiaService.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/JolokiaService.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,21 +7,22 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services;
 
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 import org.jolokia.jvmagent.JolokiaServer;
 import org.jolokia.jvmagent.JvmAgentConfig;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicBoolean;
-
 /**
  * Jolokia server allows user to query jmx metric value with HTTP request
  */
 public class JolokiaService implements Service {
-  private static final Logger LOG = LoggerFactory.getLogger(JettyService.class);
+  private static final Logger LOG = LoggerFactory.getLogger(JolokiaService.class);
 
   private final String _name;
   private final JolokiaServer _jolokiaServer;
@@ -51,7 +52,7 @@ public boolean isRunning() {
     return _isRunning.get();
   }
 
-  public void awaitShutdown() {
+  public void awaitShutdown(long timeout, TimeUnit timeUnit) {
 
   }
 
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/JolokiaServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/JolokiaServiceFactory.java
new file mode 100644
index 00000000..809eb630
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/JolokiaServiceFactory.java
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.Map;
+
+
+/**
+ * Factory class which instantiates a JolokiaService service.
+ */
+@SuppressWarnings("rawtypes")
+public class JolokiaServiceFactory implements ServiceFactory {
+
+  private final Map _properties;
+  private final String _serviceName;
+
+  public JolokiaServiceFactory(Map properties, String serviceName) {
+
+    _properties = properties;
+    _serviceName = serviceName;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Service createService() throws Exception {
+    return new JolokiaService(_properties, _serviceName);
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/KafkaMetricsReporterService.java b/src/main/java/com/linkedin/xinfra/monitor/services/KafkaMetricsReporterService.java
new file mode 100644
index 00000000..4027dc08
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/KafkaMetricsReporterService.java
@@ -0,0 +1,134 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.linkedin.xinfra.monitor.common.MbeanAttributeValue;
+import com.linkedin.xinfra.monitor.common.Utils;
+import com.linkedin.xinfra.monitor.services.configs.KafkaMetricsReporterServiceConfig;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class KafkaMetricsReporterService implements Service {
+  private static final Logger LOGGER = LoggerFactory.getLogger(KafkaMetricsReporterService.class);
+  private static final String METRICS_PRODUCER_ID = "kafka-metrics-reporter-id";
+  private final String _name;
+  private final List<String> _metricsNames;
+  private final int _reportIntervalSec;
+  private final ScheduledExecutorService _executor;
+  private KafkaProducer<String, String> _producer;
+  private final String _brokerList;
+  private final String _topic;
+  private final ObjectMapper _parser = new ObjectMapper();
+
+  public KafkaMetricsReporterService(Map<String, Object> props, String name, AdminClient adminClient) throws Exception {
+    _name = name;
+    KafkaMetricsReporterServiceConfig config = new KafkaMetricsReporterServiceConfig(props);
+    _metricsNames = config.getList(KafkaMetricsReporterServiceConfig.REPORT_METRICS_CONFIG);
+    _reportIntervalSec = config.getInt(KafkaMetricsReporterServiceConfig.REPORT_INTERVAL_SEC_CONFIG);
+    _executor = Executors.newSingleThreadScheduledExecutor();
+    _brokerList = config.getString(KafkaMetricsReporterServiceConfig.BOOTSTRAP_SERVERS_CONFIG);
+    initializeProducer();
+    _topic = config.getString(KafkaMetricsReporterServiceConfig.TOPIC_CONFIG);
+    Integer rf = config.getInt(KafkaMetricsReporterServiceConfig.TOPIC_REPLICATION_FACTOR);
+    Utils.createTopicIfNotExists(
+        _topic,
+        rf.shortValue(),
+        0, // parameter is set to 0 here since no matter the number of nodes, the topic partition number should be set to zero.
+        1, // fixed partition count 1
+        new Properties(),
+        adminClient
+    );
+  }
+
+  @Override
+  public synchronized void start() {
+    _executor.scheduleAtFixedRate(() -> {
+      try {
+        reportMetrics();
+      } catch (Exception e) {
+        LOGGER.error(_name + "/KafkaMetricsReporterService failed to report metrics.", e);
+      }
+    }, _reportIntervalSec, _reportIntervalSec, TimeUnit.SECONDS);
+    LOGGER.info("{}/KafkaMetricsReporterService has started.", _name);
+  }
+
+  @Override
+  public synchronized void stop() {
+    _executor.shutdown();
+    _producer.close();
+    LOGGER.info("{}/KafkaMetricsReporterService stopped.", _name);
+  }
+
+  @Override
+  public boolean isRunning() {
+    return !_executor.isShutdown();
+  }
+
+  @Override
+  public void awaitShutdown(long timeout, TimeUnit timeUnit) {
+    try {
+      _executor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
+    } catch (InterruptedException e) {
+      LOGGER.info("Thread interrupted when waiting for {}/KafkaMetricsReporterService to shutdown", _name);
+    }
+    LOGGER.info("{}/KafkaMetricsReporterService shutdown completed", _name);
+  }
+
+
+  private void initializeProducer() {
+    Properties producerProps = new Properties();
+    producerProps.put(ProducerConfig.ACKS_CONFIG, "-1");
+    producerProps.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, "20000");
+    producerProps.put(ProducerConfig.RETRIES_CONFIG, "3");
+    producerProps.put(ProducerConfig.MAX_BLOCK_MS_CONFIG, String.valueOf(Long.MAX_VALUE));
+    producerProps.put(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, "1");
+    producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+    producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+    producerProps.put(ProducerConfig.CLIENT_ID_CONFIG, METRICS_PRODUCER_ID);
+    producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, _brokerList);
+    _producer = new KafkaProducer<>(producerProps);
+  }
+
+  private void reportMetrics() {
+    Map<String, String> metrics = new HashMap<>();
+    for (String metricName : _metricsNames) {
+      String mbeanExpr = metricName.substring(0, metricName.lastIndexOf(":"));
+      String attributeExpr = metricName.substring(metricName.lastIndexOf(":") + 1);
+      List<MbeanAttributeValue> attributeValues = Utils.getMBeanAttributeValues(mbeanExpr, attributeExpr);
+      for (MbeanAttributeValue attributeValue : attributeValues) {
+        String metric = attributeValue.toString();
+        String key = metric.substring(0, metric.lastIndexOf("="));
+        String val = metric.substring(metric.lastIndexOf("=") + 1);
+        metrics.put(key, val);
+      }
+    }
+    try {
+      LOGGER.info("Kafka Metrics Reporter sending metrics = " + _parser.writerWithDefaultPrettyPrinter().writeValueAsString(metrics));
+      _producer.send(new ProducerRecord<>(_topic, _parser.writeValueAsString(metrics)));
+    } catch (JsonProcessingException e) {
+      LOGGER.warn("unsupported json format: " + metrics, e);
+    }
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/KafkaMetricsReporterServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/KafkaMetricsReporterServiceFactory.java
new file mode 100644
index 00000000..1eaa1419
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/KafkaMetricsReporterServiceFactory.java
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.Map;
+import org.apache.kafka.clients.admin.AdminClient;
+
+
+/**
+ * Factory class which instantiates a KafkaMetricsReporterService service object.
+ */
+@SuppressWarnings("rawtypes")
+public class KafkaMetricsReporterServiceFactory implements ServiceFactory {
+
+  private final Map _properties;
+  private final String _serviceName;
+
+  public KafkaMetricsReporterServiceFactory(Map properties, String serviceName) {
+
+    _properties = properties;
+    _serviceName = serviceName;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Service createService() throws Exception {
+
+    AdminClient adminClient = AdminClient.create(_properties);
+
+    return new KafkaMetricsReporterService(_properties, _serviceName, adminClient);
+
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/xinfra/monitor/services/MultiClusterTopicManagementService.java
new file mode 100644
index 00000000..dca1eb65
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/MultiClusterTopicManagementService.java
@@ -0,0 +1,704 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.linkedin.xinfra.monitor.common.Utils;
+import com.linkedin.xinfra.monitor.services.configs.CommonServiceConfig;
+import com.linkedin.xinfra.monitor.services.configs.MultiClusterTopicManagementServiceConfig;
+import com.linkedin.xinfra.monitor.services.configs.TopicManagementServiceConfig;
+import com.linkedin.xinfra.monitor.topicfactory.TopicFactory;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.CancellationException;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicBoolean;
+import kafka.admin.AdminUtils;
+import kafka.admin.BrokerMetadata;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.admin.AdminClientConfig;
+import org.apache.kafka.clients.admin.AlterConfigOp;
+import org.apache.kafka.clients.admin.AlterPartitionReassignmentsResult;
+import org.apache.kafka.clients.admin.Config;
+import org.apache.kafka.clients.admin.ConfigEntry;
+import org.apache.kafka.clients.admin.ElectLeadersResult;
+import org.apache.kafka.clients.admin.NewPartitionReassignment;
+import org.apache.kafka.clients.admin.NewPartitions;
+import org.apache.kafka.clients.admin.NewTopic;
+import org.apache.kafka.clients.admin.TopicDescription;
+import org.apache.kafka.common.ElectionType;
+import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.KafkaFuture;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.TopicPartitionInfo;
+import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.common.config.ConfigResource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Option$;
+import scala.collection.JavaConverters;
+import scala.collection.Seq;
+
+
+/**
+ * This service periodically checks and re-balances the monitor topics across a pipeline of Kafka clusters so that
+ * leadership of the partitions of the monitor topic in each cluster is distributed evenly across brokers in the cluster.
+ *
+ * More specifically, this service may do some or all of the following tasks depending on the config:
+ *
+ * - Create the monitor topic using the user-specified replication factor and partition number
+ * - Increase partition number of the monitor topic if either partitionsToBrokersRatio or minPartitionNum is not satisfied
+ * - Increase replication factor of the monitor topic if the user-specified replicationFactor is not satisfied
+ * - Reassign partition across brokers to make sure each broker acts as preferred leader of at least one partition of the monitor topic
+ * - Trigger preferred leader election to make sure each broker acts as the leader of at least one partition of the monitor topic.
+ * - Make sure the number of partitions of the monitor topic is same across all monitored clusters.
+ *
+ */
+@SuppressWarnings({"rawtypes", "unchecked"})
+public class MultiClusterTopicManagementService implements Service {
+  private static final Logger LOGGER = LoggerFactory.getLogger(MultiClusterTopicManagementService.class);
+  private static final String METRIC_GROUP_NAME = "topic-management-service";
+  private final CompletableFuture<Void> _topicPartitionResult = new CompletableFuture<>();
+  private final AtomicBoolean _isRunning = new AtomicBoolean(false);
+  private final String _serviceName;
+  private final Map<String, TopicManagementHelper> _topicManagementByCluster;
+  private final int _rebalanceIntervalMs;
+  private final long _preferredLeaderElectionIntervalMs;
+  private final ScheduledExecutorService _executor;
+
+  @SuppressWarnings("unchecked")
+  public MultiClusterTopicManagementService(Map<String, Object> props, String serviceName) throws Exception {
+    _serviceName = serviceName;
+    MultiClusterTopicManagementServiceConfig config = new MultiClusterTopicManagementServiceConfig(props);
+    String topic = config.getString(CommonServiceConfig.TOPIC_CONFIG);
+    Map<String, Map> propsByCluster =
+        props.containsKey(MultiClusterTopicManagementServiceConfig.PROPS_PER_CLUSTER_CONFIG) ? (Map) props.get(
+            MultiClusterTopicManagementServiceConfig.PROPS_PER_CLUSTER_CONFIG) : new HashMap<>();
+    _topicManagementByCluster = initializeTopicManagementHelper(propsByCluster, topic);
+    _rebalanceIntervalMs = config.getInt(MultiClusterTopicManagementServiceConfig.REBALANCE_INTERVAL_MS_CONFIG);
+    _preferredLeaderElectionIntervalMs =
+        config.getLong(MultiClusterTopicManagementServiceConfig.PREFERRED_LEADER_ELECTION_CHECK_INTERVAL_MS_CONFIG);
+    _executor = Executors.newSingleThreadScheduledExecutor(
+      r -> new Thread(r, _serviceName + "-multi-cluster-topic-management-service"));
+    _topicPartitionResult.complete(null);
+  }
+
+  public CompletableFuture<Void> topicPartitionResult() {
+    return _topicPartitionResult;
+  }
+
+  private Map<String, TopicManagementHelper> initializeTopicManagementHelper(Map<String, Map> propsByCluster,
+      String topic) throws Exception {
+    Map<String, TopicManagementHelper> topicManagementByCluster = new HashMap<>();
+    for (Map.Entry<String, Map> entry : propsByCluster.entrySet()) {
+      String clusterName = entry.getKey();
+      Map serviceProps = entry.getValue();
+      if (serviceProps.containsKey(MultiClusterTopicManagementServiceConfig.TOPIC_CONFIG)) {
+        throw new ConfigException("The raw per-cluster config for MultiClusterTopicManagementService must not contain "
+            + MultiClusterTopicManagementServiceConfig.TOPIC_CONFIG);
+      }
+      serviceProps.put(MultiClusterTopicManagementServiceConfig.TOPIC_CONFIG, topic);
+      topicManagementByCluster.put(clusterName, new TopicManagementHelper(serviceProps));
+    }
+    return topicManagementByCluster;
+  }
+
+  @Override
+  public synchronized void start() {
+    if (_isRunning.compareAndSet(false, true)) {
+      final long topicManagementProcedureInitialDelay = 0;
+      _executor.scheduleWithFixedDelay(
+              new TopicManagementRunnable(),
+              topicManagementProcedureInitialDelay,
+              _rebalanceIntervalMs,
+              TimeUnit.MILLISECONDS);
+
+      LOGGER.info("Topic management periodical procedure started with initial delay {} ms and interval {} ms",
+              topicManagementProcedureInitialDelay, _rebalanceIntervalMs);
+
+      _executor.scheduleWithFixedDelay(new PreferredLeaderElectionRunnable(), _preferredLeaderElectionIntervalMs,
+          _preferredLeaderElectionIntervalMs, TimeUnit.MILLISECONDS);
+      LOGGER.info("Preferred leader election periodical procedure started with initial delay {} ms and interval {} ms",
+              _preferredLeaderElectionIntervalMs, _preferredLeaderElectionIntervalMs);
+    }
+  }
+
+  @Override
+  public synchronized void stop() {
+    if (_isRunning.compareAndSet(true, false)) {
+      _executor.shutdown();
+      LOGGER.info("{}/MultiClusterTopicManagementService stopped.", _serviceName);
+    }
+  }
+
+  @Override
+  public boolean isRunning() {
+    return _isRunning.get() && !_executor.isShutdown();
+  }
+
+  @Override
+  public void awaitShutdown(long timeout, TimeUnit unit) {
+    try {
+      _executor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
+    } catch (InterruptedException e) {
+      LOGGER.info("Thread interrupted when waiting for {}/MultiClusterTopicManagementService to shutdown",
+          _serviceName);
+    }
+    LOGGER.info("{}/MultiClusterTopicManagementService shutdown completed", _serviceName);
+  }
+
+  private class TopicManagementRunnable implements Runnable {
+
+    @Override
+    public void run() {
+      try {
+        for (TopicManagementHelper helper : _topicManagementByCluster.values()) {
+          helper.maybeCreateTopic();
+        }
+
+        /*
+         * The partition number of the monitor topics should be the minimum partition number that satisfies the following conditions:
+         * - partition number of the monitor topics across all monitored clusters should be the same
+         * - partitionNum / brokerNum >= user-configured partitionsToBrokersRatio.
+         * - partitionNum >= user-configured minPartitionNum
+         */
+
+        int minPartitionNum = 0;
+        for (TopicManagementHelper helper : _topicManagementByCluster.values()) {
+          minPartitionNum = Math.max(minPartitionNum, helper.minPartitionNum());
+        }
+        for (TopicManagementHelper helper : _topicManagementByCluster.values()) {
+          helper.maybeAddPartitions(minPartitionNum);
+        }
+
+        for (Map.Entry<String, TopicManagementHelper> entry : _topicManagementByCluster.entrySet()) {
+          String clusterName = entry.getKey();
+          TopicManagementHelper helper = entry.getValue();
+          try {
+            helper.maybeReassignPartitionAndElectLeader();
+          } catch (KafkaException e) {
+            LOGGER.warn(_serviceName + "/MultiClusterTopicManagementService will retry later in cluster " + clusterName,
+                e);
+          }
+        }
+      } catch (Throwable t) {
+        // Need to catch throwable because there is scala API that can throw NoSuchMethodError in runtime
+        // and such error is not caught by compilation
+        LOGGER.error(_serviceName + "/MultiClusterTopicManagementService will stop due to error.", t);
+        stop();
+      }
+    }
+  }
+
+  /**
+   * Check if Preferred leader election is requested during Topic Management (TopicManagementRunnable),
+   * trigger Preferred leader election when there is no partition reassignment in progress.
+   */
+  private class PreferredLeaderElectionRunnable implements Runnable {
+    @Override
+    public void run() {
+      try {
+        for (Map.Entry<String, TopicManagementHelper> entry : _topicManagementByCluster.entrySet()) {
+          String clusterName = entry.getKey();
+          TopicManagementHelper helper = entry.getValue();
+          try {
+            helper.maybeElectLeader();
+          } catch (KafkaException e) {
+            LOGGER.warn(_serviceName + "/MultiClusterTopicManagementService will retry later in cluster " + clusterName,
+                e);
+          }
+        }
+      } catch (Throwable t) {
+        /* Need to catch throwable because there is scala API that can throw NoSuchMethodError in runtime
+         and such error is not caught by compilation. */
+        LOGGER.error(_serviceName
+            + "/MultiClusterTopicManagementService/PreferredLeaderElectionRunnable will stop due to an error.", t);
+        stop();
+      }
+    }
+  }
+
+  @SuppressWarnings("FieldCanBeLocal")
+  static class TopicManagementHelper {
+    private final String _zkConnect;
+    private final int _replicationFactor;
+    private final double _minPartitionsToBrokersRatio;
+    private final int _minPartitionNum;
+    private final Properties _topicProperties;
+    private boolean _preferredLeaderElectionRequested;
+    private final Duration _requestTimeout;
+    private final List<String> _bootstrapServers;
+
+    // package private for unit testing
+    boolean _topicCreationEnabled;
+    boolean _topicAddPartitionEnabled;
+    boolean _topicReassignPartitionAndElectLeaderEnabled;
+    AdminClient _adminClient;
+    String _topic;
+    TopicFactory _topicFactory;
+
+    @SuppressWarnings("unchecked")
+    TopicManagementHelper(Map<String, Object> props) throws Exception {
+
+      TopicManagementServiceConfig config = new TopicManagementServiceConfig(props);
+      AdminClientConfig adminClientConfig = new AdminClientConfig(props);
+      String topicFactoryClassName = config.getString(TopicManagementServiceConfig.TOPIC_FACTORY_CLASS_CONFIG);
+      _topicCreationEnabled = config.getBoolean(TopicManagementServiceConfig.TOPIC_CREATION_ENABLED_CONFIG);
+      _topicAddPartitionEnabled = config.getBoolean(TopicManagementServiceConfig.TOPIC_ADD_PARTITION_ENABLED_CONFIG);
+      _topicReassignPartitionAndElectLeaderEnabled = config.getBoolean(TopicManagementServiceConfig.TOPIC_REASSIGN_PARTITION_AND_ELECT_LEADER_ENABLED_CONFIG);
+      _topic = config.getString(TopicManagementServiceConfig.TOPIC_CONFIG);
+      _zkConnect = config.getString(TopicManagementServiceConfig.ZOOKEEPER_CONNECT_CONFIG);
+      _replicationFactor = config.getInt(TopicManagementServiceConfig.TOPIC_REPLICATION_FACTOR_CONFIG);
+      _minPartitionsToBrokersRatio = config.getDouble(TopicManagementServiceConfig.PARTITIONS_TO_BROKERS_RATIO_CONFIG);
+      _minPartitionNum = config.getInt(TopicManagementServiceConfig.MIN_PARTITION_NUM_CONFIG);
+      _preferredLeaderElectionRequested = false;
+      _requestTimeout = Duration.ofMillis(adminClientConfig.getInt(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG));
+      _bootstrapServers = adminClientConfig.getList(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG);
+      _topicProperties = new Properties();
+      if (props.containsKey(TopicManagementServiceConfig.TOPIC_PROPS_CONFIG)) {
+        for (Map.Entry<String, Object> entry : ((Map<String, Object>) props.get(
+            TopicManagementServiceConfig.TOPIC_PROPS_CONFIG)).entrySet()) {
+          _topicProperties.put(entry.getKey(), entry.getValue().toString());
+        }
+      }
+
+      Map topicFactoryConfig =
+          props.containsKey(TopicManagementServiceConfig.TOPIC_FACTORY_PROPS_CONFIG) ? (Map) props.get(
+              TopicManagementServiceConfig.TOPIC_FACTORY_PROPS_CONFIG) : new HashMap();
+      _topicFactory =
+          (TopicFactory) Class.forName(topicFactoryClassName).getConstructor(Map.class).newInstance(topicFactoryConfig);
+      _adminClient = constructAdminClient(props);
+      LOGGER.info("{} configs: {}", _adminClient.getClass().getSimpleName(), props);
+      logConfigurationValues();
+    }
+
+    private void logConfigurationValues() {
+      LOGGER.info("TopicManagementHelper for cluster with Zookeeper connect {} is configured with " +
+              "[topic={}, topicCreationEnabled={}, topicAddPartitionEnabled={}, " +
+              "topicReassignPartitionAndElectLeaderEnabled={}, minPartitionsToBrokersRatio={}, " +
+              "minPartitionNum={}]", _zkConnect, _topic, _topicCreationEnabled, _topicAddPartitionEnabled,
+              _topicReassignPartitionAndElectLeaderEnabled, _minPartitionsToBrokersRatio, _minPartitionNum);
+    }
+
+    @SuppressWarnings("unchecked")
+    void maybeCreateTopic() throws Exception {
+      if (!_topicCreationEnabled) {
+        LOGGER.info("Topic creation is not enabled for {} in a cluster with Zookeeper URL {}. " +
+                "Refer to config: {}", _topic, _zkConnect, TopicManagementServiceConfig.TOPIC_CREATION_ENABLED_CONFIG);
+        return;
+      }
+      NewTopic newTopic = new NewTopic(_topic, minPartitionNum(), (short) _replicationFactor);
+      newTopic.configs((Map) _topicProperties);
+      _topicFactory.createTopicIfNotExist(_topic, (short) _replicationFactor, _minPartitionsToBrokersRatio,
+              _topicProperties, _adminClient);
+    }
+
+    AdminClient constructAdminClient(Map<String, Object> props) {
+      return AdminClient.create(props);
+    }
+
+    int minPartitionNum() throws InterruptedException, ExecutionException {
+      int brokerCount = _adminClient.describeCluster().nodes().get().size();
+      return Math.max((int) Math.ceil(_minPartitionsToBrokersRatio * brokerCount), _minPartitionNum);
+    }
+
+    void maybeAddPartitions(final int requiredMinPartitionNum)
+            throws ExecutionException, InterruptedException, CancellationException, TimeoutException {
+      if (!_topicAddPartitionEnabled) {
+        LOGGER.info("Adding partition to {} topic is not enabled in a cluster with Zookeeper URL {}. " +
+                "Refer to config: {}", _topic, _zkConnect, TopicManagementServiceConfig.TOPIC_ADD_PARTITION_ENABLED_CONFIG);
+        return;
+      }
+      Map<String, KafkaFuture<TopicDescription>> kafkaFutureMap =
+          _adminClient.describeTopics(Collections.singleton(_topic)).values();
+      KafkaFuture<TopicDescription> topicDescriptions = kafkaFutureMap.get(_topic);
+      List<TopicPartitionInfo> partitions = topicDescriptions.get(_requestTimeout.toMillis(), TimeUnit.MILLISECONDS).partitions();
+
+      final int currPartitionNum = partitions.size();
+      if (currPartitionNum >= requiredMinPartitionNum) {
+        LOGGER.debug("{} will not increase partition of the topic {} in the cluster. Current partition count {} and '" +
+                "minimum required partition count is {}.", this.getClass().toString(), _topic, currPartitionNum, requiredMinPartitionNum);
+        return;
+      }
+      LOGGER.info("{} will increase partition of the topic {} in the cluster from {}" + " to {}.",
+              this.getClass().toString(), _topic, currPartitionNum,  requiredMinPartitionNum);
+      Set<BrokerMetadata> brokers = new HashSet<>();
+      for (Node broker : _adminClient.describeCluster().nodes().get(_requestTimeout.toMillis(), TimeUnit.MILLISECONDS)) {
+        BrokerMetadata brokerMetadata = new BrokerMetadata(broker.id(), null);
+        brokers.add(brokerMetadata);
+      }
+      Set<Integer> excludedBrokers = _topicFactory.getExcludedBrokers(_adminClient);
+      if (!excludedBrokers.isEmpty()) {
+        brokers.removeIf(broker ->  excludedBrokers.contains(broker.id()));
+      }
+
+      List<List<Integer>> newPartitionAssignments =
+              newPartitionAssignments(requiredMinPartitionNum, currPartitionNum, brokers, _replicationFactor);
+
+      NewPartitions newPartitions = NewPartitions.increaseTo(requiredMinPartitionNum, newPartitionAssignments);
+
+      Map<String, NewPartitions> newPartitionsMap = new HashMap<>();
+      newPartitionsMap.put(_topic, newPartitions);
+      _adminClient.createPartitions(newPartitionsMap).all().get(_requestTimeout.toMillis(), TimeUnit.MILLISECONDS);
+      LOGGER.info("{} finished increasing partition of the topic {} in the cluster from {} to {}",
+              this.getClass().toString(), _topic, currPartitionNum,  requiredMinPartitionNum);
+    }
+
+    static List<List<Integer>> newPartitionAssignments(int minPartitionNum, int partitionNum,
+        Set<BrokerMetadata> brokers, int rf) {
+
+      // The replica assignments for the new partitions, and not the old partitions.
+      // .increaseTo(6, asList(asList(1, 2),
+      //                       asList(2, 3),
+      //                       asList(3, 1)))
+      // partition 3's preferred leader will be broker 1,
+      // partition 4's preferred leader will be broker 2 and
+      // partition 5's preferred leader will be broker 3.
+      List<List<Integer>> newPartitionAssignments = new ArrayList<>();
+      int partitionDifference = minPartitionNum - partitionNum;
+
+      // leader assignments -
+      while (newPartitionAssignments.size() != partitionDifference) {
+        List replicas = new ArrayList<>();
+        // leader replica/broker -
+        int brokerMetadata = randomBroker(brokers).id();
+        replicas.add(brokerMetadata);
+
+        newPartitionAssignments.add(replicas);
+      }
+
+      // follower assignments -
+      // Regardless of the partition/replica assignments here, maybeReassignPartitionAndElectLeader()
+      // will reassign the partition as needed periodically.
+      for (List<Integer> replicas : newPartitionAssignments) {
+        for (BrokerMetadata broker : brokers) {
+          if (!replicas.contains(broker.id())) {
+            replicas.add(broker.id());
+          }
+          if (replicas.size() == rf) {
+            break;
+          }
+        }
+      }
+      return newPartitionAssignments;
+    }
+
+    private static BrokerMetadata randomBroker(Set<BrokerMetadata> brokers) {
+
+      if (brokers == null || brokers.size() == 0) {
+        throw new IllegalArgumentException("brokers object is either null or empty.");
+      }
+
+      // Using Set enforces the usage of loop which is O(n).
+      // As the list of brokers does not change in newPartitionAssignments,
+      // the acceptance of a List argument instead of a Set will be faster which is (O(1))
+      List<BrokerMetadata> brokerMetadataList = new ArrayList<>(brokers);
+      // convert to a list so there's no need to create a index and iterate through this set
+      //addAll() is replaced with parameterized constructor call for better performance..
+
+      int brokerSetSize = brokers.size();
+
+      // In practicality, the Random object should be rather more shared than this.
+      int random = new Random().nextInt(brokerSetSize);
+
+      return brokerMetadataList.get(random);
+    }
+
+    /**
+     * Exposed package-private access for testing. Get the total number of partitions for a Kafka topic.
+     * @return total number of topic partitions
+     * @throws InterruptedException when a thread is waiting, sleeping and the thread is interrupted, either before / during the activity.
+     * @throws ExecutionException when attempting to retrieve the result of a task that aborted by throwing an exception.
+     */
+    int numPartitions() throws InterruptedException, ExecutionException {
+
+      return _adminClient.describeTopics(Collections.singleton(_topic)).values().get(_topic).get().partitions().size();
+    }
+
+    private Set<Node> getAvailableBrokers() throws ExecutionException, InterruptedException {
+      Set<Node> brokers = new HashSet<>(_adminClient.describeCluster().nodes().get());
+      Set<Integer> excludedBrokers = _topicFactory.getExcludedBrokers(_adminClient);
+      brokers.removeIf(broker -> excludedBrokers.contains(broker.id()));
+      return brokers;
+    }
+
+    void maybeReassignPartitionAndElectLeader() throws ExecutionException, InterruptedException, TimeoutException {
+      if (!_topicReassignPartitionAndElectLeaderEnabled) {
+        LOGGER.info("Reassign partition and elect leader to {} topic is not enabled in a cluster with Zookeeper URL {}. " +
+                "Refer to config: {}", _topic, _zkConnect, TopicManagementServiceConfig.TOPIC_REASSIGN_PARTITION_AND_ELECT_LEADER_ENABLED_CONFIG);
+        return;
+      }
+      List<TopicPartitionInfo> partitionInfoList =
+          _adminClient.describeTopics(Collections.singleton(_topic)).all().get().get(_topic).partitions();
+      Collection<Node> brokers = this.getAvailableBrokers();
+      boolean partitionReassigned = false;
+      if (partitionInfoList.size() == 0) {
+        throw new IllegalStateException("Topic " + _topic + " does not exist in cluster.");
+      }
+
+      int currentReplicationFactor = getReplicationFactor(partitionInfoList);
+      int expectedReplicationFactor = Math.max(currentReplicationFactor, _replicationFactor);
+
+      if (_replicationFactor < currentReplicationFactor) {
+        LOGGER.debug(
+            "Configured replication factor {} is smaller than the current replication factor {} of the topic {} in cluster.",
+            _replicationFactor, currentReplicationFactor, _topic);
+      }
+
+      if (expectedReplicationFactor > currentReplicationFactor && Utils.ongoingPartitionReassignments(_adminClient)
+          .isEmpty()) {
+
+        LOGGER.info(
+            "MultiClusterTopicManagementService will increase the replication factor of the topic {} in cluster"
+                + "from {} to {}", _topic, currentReplicationFactor, expectedReplicationFactor);
+        reassignPartitions(_adminClient, brokers, _topic, partitionInfoList.size(), expectedReplicationFactor);
+
+        partitionReassigned = true;
+      }
+
+      // Update the properties of the monitor topic if any config is different from the user-specified config
+      ConfigResource topicConfigResource = new ConfigResource(ConfigResource.Type.TOPIC, _topic);
+      Config currentConfig = _adminClient.describeConfigs(Collections.singleton(topicConfigResource)).all().get().get(topicConfigResource);
+      Collection<AlterConfigOp> alterConfigOps = new ArrayList<>();
+      for (Map.Entry<Object, Object> entry : _topicProperties.entrySet()) {
+        String name = String.valueOf(entry.getKey());
+        ConfigEntry configEntry = new ConfigEntry(name, String.valueOf(entry.getValue()));
+        if (!configEntry.equals(currentConfig.get(name))) {
+          alterConfigOps.add(new AlterConfigOp(configEntry, AlterConfigOp.OpType.SET));
+        }
+      }
+
+      if (!alterConfigOps.isEmpty()) {
+        LOGGER.info("MultiClusterTopicManagementService will overwrite properties of the topic {} "
+                + "in cluster with {}.", _topic, alterConfigOps);
+        Map<ConfigResource, Collection<AlterConfigOp>> configs = Collections.singletonMap(topicConfigResource, alterConfigOps);
+        _adminClient.incrementalAlterConfigs(configs);
+      }
+
+      if (partitionInfoList.size() >= brokers.size() && someBrokerNotPreferredLeader(partitionInfoList, brokers)
+          && Utils.ongoingPartitionReassignments(_adminClient).isEmpty()) {
+        LOGGER.info("{} will reassign partitions of the topic {} in cluster.", this.getClass().toString(), _topic);
+        reassignPartitions(_adminClient, brokers, _topic, partitionInfoList.size(), expectedReplicationFactor);
+
+        partitionReassigned = true;
+      }
+
+      if (partitionInfoList.size() >= brokers.size() && someBrokerNotElectedLeader(partitionInfoList, brokers)) {
+        if (!partitionReassigned || Utils.ongoingPartitionReassignments(_adminClient).isEmpty()) {
+          LOGGER.info("MultiClusterTopicManagementService will trigger preferred leader election for the topic {} in "
+              + "cluster.", _topic);
+          triggerPreferredLeaderElection(partitionInfoList, _topic);
+          _preferredLeaderElectionRequested = false;
+        } else {
+          _preferredLeaderElectionRequested = true;
+        }
+      }
+    }
+
+    void maybeElectLeader() throws InterruptedException, ExecutionException, TimeoutException {
+      if (!_preferredLeaderElectionRequested) {
+        return;
+      }
+
+      if (Utils.ongoingPartitionReassignments(_adminClient).isEmpty()) {
+        List<TopicPartitionInfo> partitionInfoList =
+            _adminClient.describeTopics(Collections.singleton(_topic)).all().get().get(_topic).partitions();
+        LOGGER.info("MultiClusterTopicManagementService will trigger requested preferred leader election for the"
+            + " topic {} in cluster.", _topic);
+        triggerPreferredLeaderElection(partitionInfoList, _topic);
+        _preferredLeaderElectionRequested = false;
+      }
+    }
+
+    private void triggerPreferredLeaderElection(List<TopicPartitionInfo> partitionInfoList, String partitionTopic) {
+      Set<TopicPartition> partitions = new HashSet<>();
+      for (TopicPartitionInfo javaPartitionInfo : partitionInfoList) {
+        partitions.add(new TopicPartition(partitionTopic, javaPartitionInfo.partition()));
+      }
+      ElectLeadersResult electLeadersResult = _adminClient.electLeaders(ElectionType.PREFERRED, partitions);
+
+      LOGGER.info("{}: triggerPreferredLeaderElection - {}", this.getClass().toString(),
+          electLeadersResult.all());
+    }
+
+    private static void reassignPartitions(AdminClient adminClient, Collection<Node> brokers, String topic,
+        int partitionCount, int replicationFactor) {
+
+      scala.collection.mutable.ArrayBuffer<BrokerMetadata> brokersMetadata =
+          new scala.collection.mutable.ArrayBuffer<>(brokers.size());
+      for (Node broker : brokers) {
+        brokersMetadata.$plus$eq(new BrokerMetadata(broker.id(), Option$.MODULE$.apply(broker.rack())));
+      }
+      scala.collection.Map<Object, Seq<Object>> assignedReplicas =
+          AdminUtils.assignReplicasToBrokers(brokersMetadata, partitionCount, replicationFactor, 0, 0);
+      scala.collection.immutable.Map<TopicPartition, Seq<Object>> newAssignment =
+          new scala.collection.immutable.HashMap<>();
+      scala.collection.Iterator<scala.Tuple2<Object, scala.collection.Seq<Object>>> it = assignedReplicas.iterator();
+      while (it.hasNext()) {
+        scala.Tuple2<Object, scala.collection.Seq<Object>> scalaTuple = it.next();
+        TopicPartition tp = new TopicPartition(topic, (Integer) scalaTuple._1);
+        newAssignment = newAssignment.$plus(new scala.Tuple2<>(tp, scalaTuple._2));
+      }
+
+      String newAssignmentJson = formatAsNewReassignmentJson(topic, assignedReplicas);
+      LOGGER.info("Reassign partitions for topic " + topic);
+      LOGGER.info("New topic partition replica assignments: {}", newAssignmentJson);
+
+      Set<Map.Entry<TopicPartition, Seq<Object>>> newAssignmentMap =
+          scala.collection.JavaConverters.mapAsJavaMap(newAssignment).entrySet();
+      Map<TopicPartition, Optional<NewPartitionReassignment>> reassignments = new HashMap<>();
+      for (Map.Entry<TopicPartition, Seq<Object>> topicPartitionSeqEntry : newAssignmentMap) {
+        List<Integer> targetReplicas = new ArrayList<>();
+        List<Object> replicas = JavaConverters.seqAsJavaList(topicPartitionSeqEntry.getValue());
+        for (Object replica : replicas) {
+          targetReplicas.add((int) replica);
+        }
+        NewPartitionReassignment newPartitionReassignment = new NewPartitionReassignment(targetReplicas);
+        reassignments.put(topicPartitionSeqEntry.getKey(), Optional.of(newPartitionReassignment));
+      }
+
+      AlterPartitionReassignmentsResult alterPartitionReassignmentsResult =
+          adminClient.alterPartitionReassignments(reassignments);
+      try {
+        alterPartitionReassignmentsResult.all().get();
+      } catch (InterruptedException | ExecutionException e) {
+
+        LOGGER.error("An exception occurred while altering the partition reassignments for {}", topic, e);
+      }
+    }
+
+    static int getReplicationFactor(List<TopicPartitionInfo> partitionInfoList) {
+      if (partitionInfoList.isEmpty()) {
+        throw new RuntimeException("Partition list is empty.");
+      }
+
+      int replicationFactor = partitionInfoList.get(0).replicas().size();
+      for (TopicPartitionInfo partitionInfo : partitionInfoList) {
+        if (replicationFactor != partitionInfo.replicas().size()) {
+          LOGGER.warn("Partitions of the topic have different replication factor.");
+          return -1;
+        }
+      }
+      return replicationFactor;
+    }
+
+    static boolean someBrokerNotPreferredLeader(List<TopicPartitionInfo> partitionInfoList, Collection<Node> brokers) {
+      Set<Integer> brokersNotPreferredLeader = new HashSet<>(brokers.size());
+      for (Node broker : brokers) {
+        brokersNotPreferredLeader.add(broker.id());
+      }
+      for (TopicPartitionInfo partitionInfo : partitionInfoList) {
+        brokersNotPreferredLeader.remove(partitionInfo.replicas().get(0).id());
+      }
+
+      return !brokersNotPreferredLeader.isEmpty();
+    }
+
+    static boolean someBrokerNotElectedLeader(List<TopicPartitionInfo> partitionInfoList, Collection<Node> brokers) {
+      Set<Integer> brokersNotElectedLeader = new HashSet<>(brokers.size());
+      for (Node broker : brokers) {
+        brokersNotElectedLeader.add(broker.id());
+      }
+      for (TopicPartitionInfo partitionInfo : partitionInfoList) {
+        if (partitionInfo.leader() != null) {
+          brokersNotElectedLeader.remove(partitionInfo.leader().id());
+        }
+      }
+      return !brokersNotElectedLeader.isEmpty();
+    }
+
+    /**
+     * @param topic topic
+     * @param partitionsToBeReassigned a map from partition (int) to replica list (int seq)
+     *
+     * @return a json string with the same format as output of kafka.utils.ZkUtils.formatAsReassignmentJson
+     *
+     * Example:
+     * <pre>
+     *   {"version":1,"partitions":[
+     *     {"topic":"kmf-topic","partition":1,"replicas":[0,1]},
+     *     {"topic":"kmf-topic","partition":2,"replicas":[1,2]},
+     *     {"topic":"kmf-topic","partition":0,"replicas":[2,0]}]}
+     * </pre>
+     */
+
+    // TODO (andrewchoi5): uncomment this method when Xinfra Monitor is upgraded to 'org.apache.kafka' 'kafka_2.12' version '2.4.1'
+//    private static String formatAsOldAssignmentJson(String topic, scala.collection.Map<Object, ReplicaAssignment> partitionsToBeReassigned) {
+//      StringBuilder bldr = new StringBuilder();
+//      bldr.append("{\"version\":1,\"partitions\":[\n");
+//      for (int partition = 0; partition < partitionsToBeReassigned.size(); partition++) {
+//        bldr.append("  {\"topic\":\"").append(topic).append("\",\"partition\":").append(partition).append(",\"replicas\":[");
+//        ReplicaAssignment replicas = partitionsToBeReassigned.apply(partition);
+//        for (int replicaIndex = 0; replicaIndex < replicas.replicas().size(); replicaIndex++) {
+//          Object replica = replicas.replicas().apply(replicaIndex);
+//          bldr.append(replica).append(",");
+//        }
+//        bldr.setLength(bldr.length() - 1);
+//        bldr.append("]},\n");
+//      }
+//      bldr.setLength(bldr.length() - 2);
+//      bldr.append("]}");
+//      return bldr.toString();
+//    }
+
+    /**
+     * @param topic Kafka topic
+     * @param partitionsToReassign a map from partition (int) to new replica list (int seq)
+     *
+     * @return a json string with the same format as output of kafka.utils.ZkUtils.formatAsReassignmentJson
+     *
+     * Example:
+     * <pre>
+     *   {"version":1,"partitions":[
+     *     {"topic":"kmf-topic","partition":1,"replicas":[0,1]},
+     *     {"topic":"kmf-topic","partition":2,"replicas":[1,2]},
+     *     {"topic":"kmf-topic","partition":0,"replicas":[2,0]}]}
+     * </pre>
+     */
+    private static String formatAsNewReassignmentJson(String topic,
+        scala.collection.Map<Object, Seq<Object>> partitionsToReassign) {
+      StringBuilder builder = new StringBuilder();
+      builder.append("{\"version\":1,\"partitions\":[\n");
+      for (int partition = 0; partition < partitionsToReassign.size(); partition++) {
+        builder.append("  {\"topic\":\"")
+            .append(topic)
+            .append("\",\"partition\":")
+            .append(partition)
+            .append(",\"replicas\":[");
+        Seq<Object> replicas = partitionsToReassign.apply(partition);
+        for (int replicaIndex = 0; replicaIndex < replicas.size(); replicaIndex++) {
+          Object replica = replicas.apply(replicaIndex);
+          builder.append(replica).append(",");
+        }
+        builder.setLength(builder.length() - 1);
+        builder.append("]},\n");
+      }
+      builder.setLength(builder.length() - 2);
+      builder.append("]}");
+      return builder.toString();
+    }
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/MultiClusterTopicManagementServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/MultiClusterTopicManagementServiceFactory.java
new file mode 100644
index 00000000..9e90169a
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/MultiClusterTopicManagementServiceFactory.java
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.Map;
+
+
+/**
+ * Factory which instantiates a MultiClusterTopicManagementService service object.
+ */
+@SuppressWarnings("rawtypes")
+public class MultiClusterTopicManagementServiceFactory implements ServiceFactory {
+
+  private final Map _properties;
+  private final String _serviceName;
+
+  public MultiClusterTopicManagementServiceFactory(Map properties, String serviceName) {
+
+    _properties = properties;
+    _serviceName = serviceName;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Service createService() throws Exception {
+    return new MultiClusterTopicManagementService(_properties, _serviceName);
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/OffsetCommitService.java b/src/main/java/com/linkedin/xinfra/monitor/services/OffsetCommitService.java
new file mode 100644
index 00000000..42f76a0d
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/OffsetCommitService.java
@@ -0,0 +1,278 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.linkedin.xinfra.monitor.XinfraMonitorConstants;
+import com.linkedin.xinfra.monitor.common.Utils;
+import com.linkedin.xinfra.monitor.services.metrics.OffsetCommitServiceMetrics;
+import java.net.InetSocketAddress;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.kafka.clients.ApiVersions;
+import org.apache.kafka.clients.ClientDnsLookup;
+import org.apache.kafka.clients.ClientResponse;
+import org.apache.kafka.clients.ClientUtils;
+import org.apache.kafka.clients.KafkaClient;
+import org.apache.kafka.clients.Metadata;
+import org.apache.kafka.clients.NetworkClient;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient;
+import org.apache.kafka.clients.consumer.internals.RequestFuture;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.internals.ClusterResourceListeners;
+import org.apache.kafka.common.message.OffsetCommitRequestData;
+import org.apache.kafka.common.metrics.JmxReporter;
+import org.apache.kafka.common.metrics.MetricConfig;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.MetricsReporter;
+import org.apache.kafka.common.network.ChannelBuilder;
+import org.apache.kafka.common.network.Selector;
+import org.apache.kafka.common.requests.AbstractRequest;
+import org.apache.kafka.common.requests.OffsetCommitRequest;
+import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.common.utils.Time;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Service that monitors the commit offset availability of a particular Consumer Group.
+ */
+public class OffsetCommitService implements Service {
+
+  public static final String METRIC_GRP_PREFIX = "xm-offset-commit-service";
+  private static final int MAX_INFLIGHT_REQUESTS_PER_CONNECTION = 100;
+  private static final Logger LOGGER = LoggerFactory.getLogger(OffsetCommitService.class);
+  private static final String SERVICE_SUFFIX = "-consumer-offset-commit-service";
+  private final AtomicBoolean _isRunning;
+  private final ScheduledExecutorService _scheduledExecutorService;
+  private final String _serviceName;
+  private final AdminClient _adminClient;
+  private final String _consumerGroup;
+
+  // the consumer network client that communicates with kafka cluster brokers.
+  private final ConsumerNetworkClient _consumerNetworkClient;
+  private final Time _time;
+  private final OffsetCommitServiceMetrics _offsetCommitServiceMetrics;
+
+  /**
+   *
+   * @param config The consumer configuration keys
+   * @param serviceName name of the xinfra monitor service
+   * @param adminClient Administrative client for Kafka, which supports managing and inspecting topics, brokers, configurations and ACLs.
+   */
+  OffsetCommitService(ConsumerConfig config, String serviceName, AdminClient adminClient)
+      throws JsonProcessingException {
+
+    _time = Time.SYSTEM;
+    _consumerGroup = config.getString(ConsumerConfig.GROUP_ID_CONFIG);
+    _adminClient = adminClient;
+    _isRunning = new AtomicBoolean(false);
+    _serviceName = serviceName;
+
+    List<MetricsReporter> reporters = new ArrayList<>();
+    reporters.add(new JmxReporter(Service.JMX_PREFIX));
+    MetricConfig metricConfig = new MetricConfig().samples(60).timeWindow(1000, TimeUnit.MILLISECONDS);
+    Metrics metrics = new Metrics(metricConfig, reporters, _time);
+    Map<String, String> tags = new HashMap<>();
+    tags.put(XinfraMonitorConstants.TAGS_NAME, serviceName);
+
+    _offsetCommitServiceMetrics = new OffsetCommitServiceMetrics(metrics, tags);
+
+    long retryBackoffMs = config.getLong(ConsumerConfig.RETRY_BACKOFF_MS_CONFIG);
+    int heartbeatIntervalMs = config.getInt(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG);
+
+    String clientId = config.getString(ConsumerConfig.CLIENT_ID_CONFIG);
+
+    List<String> bootstrapServers = config.getList(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG);
+    List<InetSocketAddress> addresses =
+        ClientUtils.parseAndValidateAddresses(bootstrapServers, ClientDnsLookup.DEFAULT);
+
+    LogContext logContext = new LogContext("[Consumer clientId=" + clientId + "] ");
+
+    ChannelBuilder channelBuilder = ClientUtils.createChannelBuilder(config, _time, logContext);
+
+    LOGGER.info("Bootstrap servers config: {} | broker addresses: {}", bootstrapServers, addresses);
+
+    Metadata metadata = new Metadata(retryBackoffMs, config.getLong(ConsumerConfig.METADATA_MAX_AGE_CONFIG), logContext,
+        new ClusterResourceListeners());
+
+    metadata.bootstrap(addresses);
+
+    Selector selector =
+        new Selector(config.getLong(ConsumerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG), new Metrics(), _time,
+            METRIC_GRP_PREFIX, channelBuilder, logContext);
+
+    KafkaClient kafkaClient = new NetworkClient(
+        selector, metadata, clientId, MAX_INFLIGHT_REQUESTS_PER_CONNECTION,
+        config.getLong(ConsumerConfig.RECONNECT_BACKOFF_MS_CONFIG),
+        config.getLong(ConsumerConfig.RECONNECT_BACKOFF_MAX_MS_CONFIG),
+        config.getInt(ConsumerConfig.SEND_BUFFER_CONFIG), config.getInt(ConsumerConfig.RECEIVE_BUFFER_CONFIG),
+        config.getInt(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG),
+        config.getLong(ConsumerConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MS_CONFIG), config.getLong(ConsumerConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MAX_MS_CONFIG),
+        ClientDnsLookup.DEFAULT, _time, true,
+        new ApiVersions(), logContext);
+
+
+    LOGGER.debug("The network client active: {}", kafkaClient.active());
+    LOGGER.debug("The network client has in flight requests: {}", kafkaClient.hasInFlightRequests());
+    LOGGER.debug("The network client in flight request count: {}", kafkaClient.inFlightRequestCount());
+
+    _consumerNetworkClient = new ConsumerNetworkClient(logContext, kafkaClient, metadata, _time, retryBackoffMs,
+        config.getInt(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG), heartbeatIntervalMs);
+
+    ThreadFactory threadFactory = new ThreadFactory() {
+      @Override
+      public Thread newThread(Runnable runnable) {
+        return new Thread(runnable, serviceName + SERVICE_SUFFIX);
+      }
+    };
+    _scheduledExecutorService = Executors.newSingleThreadScheduledExecutor(threadFactory);
+
+    LOGGER.info("OffsetCommitService's ConsumerConfig - {}", Utils.prettyPrint(config.values()));
+  }
+
+  /**
+   * The start logic must only execute once.  If an error occurs then the implementer of this class must assume that
+   * stop() will be called to clean up.  This method must be thread safe and must assume that stop() may be called
+   * concurrently. This can happen if the monitoring application's life cycle is being managed by a container.  Start
+   * will only be called once.
+   */
+  @Override
+  public void start() {
+    if (_isRunning.compareAndSet(false, true)) {
+
+      Runnable runnable = new OffsetCommitServiceRunnable();
+      _scheduledExecutorService.scheduleWithFixedDelay(runnable, 1, 2, TimeUnit.SECONDS);
+      LOGGER.info("Scheduled the offset commit service executor.");
+    }
+  }
+
+  private class OffsetCommitServiceRunnable implements Runnable {
+    @Override
+    public void run() {
+      try {
+        sendOffsetCommitRequest(_consumerNetworkClient, _adminClient, _consumerGroup);
+      } catch (ExecutionException | InterruptedException e) {
+        LOGGER.error("OffsetCommitServiceRunnable class encountered an exception: ", e);
+      }
+    }
+  }
+
+  /**
+   *
+   * @param consumerNetworkClient Kafka consumer network client. Higher level consumer access
+   *                              to the network layer with basic support for request futures.
+   * @param adminClient admin client object
+   * @param consumerGroup consumer group name
+   * @throws ExecutionException when attempting to retrieve the result of a task that aborted by throwing an exception
+   * @throws InterruptedException Thrown when the thread is waiting, sleeping, or otherwise occupied,
+   *                              and the thread is interrupted, either before or during the activity.
+   */
+  private void sendOffsetCommitRequest(ConsumerNetworkClient consumerNetworkClient, AdminClient adminClient,
+      String consumerGroup) throws ExecutionException, InterruptedException, RuntimeException {
+
+
+    LOGGER.trace("Consumer groups available: {}", adminClient.listConsumerGroups().all().get());
+
+    Node groupCoordinator = adminClient.describeConsumerGroups(Collections.singleton(consumerGroup))
+        .all()
+        .get()
+        .get(consumerGroup)
+        .coordinator();
+    LOGGER.trace("Consumer group {} coordinator {}, consumer group {}", consumerGroup, groupCoordinator, consumerGroup);
+
+    consumerNetworkClient.tryConnect(groupCoordinator);
+    consumerNetworkClient.maybeTriggerWakeup();
+
+    OffsetCommitRequestData offsetCommitRequestData = new OffsetCommitRequestData();
+    AbstractRequest.Builder<?> offsetCommitRequestBuilder = new OffsetCommitRequest.Builder(offsetCommitRequestData);
+
+    LOGGER.debug("pending request count: {}", consumerNetworkClient.pendingRequestCount());
+
+    RequestFuture<ClientResponse> future = consumerNetworkClient.send(groupCoordinator, offsetCommitRequestBuilder);
+
+    if (consumerNetworkClient.isUnavailable(groupCoordinator)) {
+      _offsetCommitServiceMetrics.recordUnavailable();
+      throw new RuntimeException("Unavailable consumerNetworkClient for " + groupCoordinator);
+    } else {
+      LOGGER.trace("The consumerNetworkClient is available for {}", groupCoordinator);
+      if (consumerNetworkClient.hasPendingRequests()) {
+
+        boolean consumerNetworkClientPollResult =
+            consumerNetworkClient.poll(future, _time.timer(Duration.ofSeconds(5).toMillis()));
+        LOGGER.debug("result of poll {}", consumerNetworkClientPollResult);
+
+        if (future.failed() && !future.isRetriable()) {
+          _offsetCommitServiceMetrics.recordFailed();
+          throw future.exception();
+        }
+
+        if (future.succeeded() && future.isDone() && consumerNetworkClientPollResult) {
+
+          ClientResponse clientResponse = future.value();
+
+          _offsetCommitServiceMetrics.recordSuccessful();
+          LOGGER.info("ClientResponseRequestFuture value {} for coordinator {} and consumer group {}", clientResponse,
+              groupCoordinator, consumerGroup);
+        }
+      }
+    }
+  }
+
+  /**
+   * This may be called multiple times.  This method must be thread safe and must assume that start() may be called
+   * concurrently.  This can happen if the monitoring application's life cycle is being managed by a container.
+   * Implementations must be non-blocking and should release the resources acquired by the service during start().
+   */
+  @Override
+  public void stop() {
+    if (_isRunning.compareAndSet(true, false)) {
+      _scheduledExecutorService.shutdown();
+    }
+  }
+
+  /**
+   * Implementations of this method must be thread safe as it can be called at any time.  Implementations must be
+   * non-blocking.
+   * @return true if this start() has returned successfully else this must return false.  This must also return false if
+   * the service can no longer perform its function.
+   */
+  @Override
+  public boolean isRunning() {
+    return _isRunning.get() && !_scheduledExecutorService.isShutdown();
+  }
+
+  /**
+   * Implementations of this method must be thread safe and must be blocking.
+   */
+  @Override
+  public void awaitShutdown(long timeout, TimeUnit unit) {
+    try {
+      _scheduledExecutorService.awaitTermination(timeout, unit);
+    } catch (InterruptedException interruptedException) {
+      LOGGER.error("Thread interrupted when waiting for {} to shutdown.", _serviceName, interruptedException);
+    }
+    LOGGER.info("{} shutdown completed.", _serviceName);
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/OffsetCommitServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/OffsetCommitServiceFactory.java
new file mode 100644
index 00000000..87bd4f88
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/OffsetCommitServiceFactory.java
@@ -0,0 +1,84 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.linkedin.xinfra.monitor.XinfraMonitorConstants;
+import com.linkedin.xinfra.monitor.common.Utils;
+import com.linkedin.xinfra.monitor.services.configs.CommonServiceConfig;
+import java.util.Map;
+import java.util.Properties;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Factory for OffsetCommitService
+ */
+@SuppressWarnings("rawtypes")
+public class OffsetCommitServiceFactory implements ServiceFactory {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(OffsetCommitServiceFactory.class);
+  private final Map _properties;
+  private final String _serviceName;
+
+  public OffsetCommitServiceFactory(Map properties, String serviceName) {
+
+    _properties = properties;
+    _serviceName = serviceName;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Service createService() throws JsonProcessingException {
+    LOGGER.info("Creating OffsetCommitService...");
+    AdminClient adminClient = AdminClient.create(_properties);
+
+    Properties preparedProps = this.prepareConfigs(_properties);
+    ConsumerConfig consumerConfig = new ConsumerConfig(preparedProps);
+    LOGGER.info("OffsetCommitServiceFactory consumer config {}", Utils.prettyPrint(consumerConfig.values()));
+
+    return new OffsetCommitService(consumerConfig, _serviceName, adminClient);
+  }
+
+  /**
+   * populate configs for kafka client
+   * @param props Map of String to Object
+   * @return Properties
+   */
+  @SuppressWarnings("unchecked")
+  private Properties prepareConfigs(Map<String, Object> props) {
+
+    String zkConnect = (String) props.get(CommonServiceConfig.ZOOKEEPER_CONNECT_CONFIG);
+    String brokerList = (String) props.get(CommonServiceConfig.BOOTSTRAP_SERVERS_CONFIG);
+
+    Properties consumerProps = new Properties();
+    consumerProps.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, XinfraMonitorConstants.FALSE);
+    consumerProps.put(ConsumerConfig.CLIENT_ID_CONFIG, XinfraMonitorConstants.XINFRA_MONITOR_PREFIX + _serviceName);
+    consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+    consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+    consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
+    consumerProps.put(CommonServiceConfig.ZOOKEEPER_CONNECT_CONFIG, zkConnect);
+
+    Map<String, String> customProps = (Map<String, String>) props.get(CommonServiceConfig.CONSUMER_PROPS_CONFIG);
+    if (customProps != null) {
+      for (Map.Entry<String, String> entry : customProps.entrySet()) {
+        consumerProps.put(entry.getKey(), entry.getValue());
+      }
+    }
+
+    return consumerProps;
+  }
+}
diff --git a/src/main/java/com/linkedin/kmf/services/ProduceService.java b/src/main/java/com/linkedin/xinfra/monitor/services/ProduceService.java
similarity index 58%
rename from src/main/java/com/linkedin/kmf/services/ProduceService.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/ProduceService.java
index 8e2e1290..28e49242 100644
--- a/src/main/java/com/linkedin/kmf/services/ProduceService.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/ProduceService.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,21 +7,25 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services;
-
-import com.linkedin.kmf.common.Utils;
-import com.linkedin.kmf.partitioner.KMPartitioner;
-import com.linkedin.kmf.producer.BaseProducerRecord;
-import com.linkedin.kmf.producer.KMBaseProducer;
-import com.linkedin.kmf.producer.NewProducer;
-import com.linkedin.kmf.services.configs.ProduceServiceConfig;
+package com.linkedin.xinfra.monitor.services;
+
+import com.linkedin.xinfra.monitor.common.Utils;
+import com.linkedin.xinfra.monitor.partitioner.KMPartitioner;
+import com.linkedin.xinfra.monitor.producer.BaseProducerRecord;
+import com.linkedin.xinfra.monitor.producer.KMBaseProducer;
+import com.linkedin.xinfra.monitor.producer.NewProducer;
+import com.linkedin.xinfra.monitor.services.configs.ProduceServiceConfig;
+import com.linkedin.xinfra.monitor.services.metrics.ProduceMetrics;
+import java.time.Duration;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.ThreadFactory;
@@ -29,34 +33,30 @@
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.admin.TopicDescription;
 import org.apache.kafka.clients.producer.ProducerConfig;
 import org.apache.kafka.clients.producer.RecordMetadata;
-import org.apache.kafka.common.MetricName;
 import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.common.metrics.JmxReporter;
-import org.apache.kafka.common.metrics.Measurable;
 import org.apache.kafka.common.metrics.MetricConfig;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.metrics.MetricsReporter;
-import org.apache.kafka.common.metrics.Sensor;
-import org.apache.kafka.common.metrics.stats.Rate;
-import org.apache.kafka.common.metrics.stats.Total;
 import org.apache.kafka.common.utils.SystemTime;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class ProduceService implements Service {
+@SuppressWarnings("rawtypes")
+public class ProduceService extends AbstractService {
   private static final Logger LOG = LoggerFactory.getLogger(ProduceService.class);
-  private static final String METRIC_GROUP_NAME = "produce-service";
-  private static final String[] NONOVERRIDABLE_PROPERTIES = new String[]{
+  private static final String[] NON_OVERRIDABLE_PROPERTIES = new String[]{
     ProduceServiceConfig.BOOTSTRAP_SERVERS_CONFIG,
     ProduceServiceConfig.ZOOKEEPER_CONNECT_CONFIG
   };
-
   private final String _name;
   private final ProduceMetrics _sensors;
   private KMBaseProducer _producer;
-  private KMPartitioner _partitioner;
+  private final KMPartitioner _partitioner;
   private ScheduledExecutorService _produceExecutor;
   private final ScheduledExecutorService _handleNewPartitionsExecutor;
   private final int _produceDelayMs;
@@ -73,15 +73,18 @@ public class ProduceService implements Service {
   private final Map _producerPropsOverride;
   private final String _producerClassName;
   private final int _threadsNum;
-  private final String _zkConnect;
+  private final AdminClient _adminClient;
+  private static final String KEY_SERIALIZER_CLASS = "org.apache.kafka.common.serialization.StringSerializer";
 
   public ProduceService(Map<String, Object> props, String name) throws Exception {
+    // TODO: Make values of below fields come from configs
+    super(10, Duration.ofMinutes(1));
     _name = name;
     ProduceServiceConfig config = new ProduceServiceConfig(props);
-    _zkConnect = config.getString(ProduceServiceConfig.ZOOKEEPER_CONNECT_CONFIG);
     _brokerList = config.getString(ProduceServiceConfig.BOOTSTRAP_SERVERS_CONFIG);
     String producerClass = config.getString(ProduceServiceConfig.PRODUCER_CLASS_CONFIG);
-
+    int latencyPercentileMaxMs = config.getInt(ProduceServiceConfig.LATENCY_PERCENTILE_MAX_MS_CONFIG);
+    int latencyPercentileGranularityMs = config.getInt(ProduceServiceConfig.LATENCY_PERCENTILE_GRANULARITY_MS_CONFIG);
     _partitioner = config.getConfiguredInstance(ProduceServiceConfig.PARTITIONER_CLASS_CONFIG, KMPartitioner.class);
     _threadsNum = config.getInt(ProduceServiceConfig.PRODUCE_THREAD_NUM_CONFIG);
     _topic = config.getString(ProduceServiceConfig.TOPIC_CONFIG);
@@ -89,19 +92,21 @@ public ProduceService(Map<String, Object> props, String name) throws Exception {
     _produceDelayMs = config.getInt(ProduceServiceConfig.PRODUCE_RECORD_DELAY_MS_CONFIG);
     _recordSize = config.getInt(ProduceServiceConfig.PRODUCE_RECORD_SIZE_BYTE_CONFIG);
     _sync = config.getBoolean(ProduceServiceConfig.PRODUCE_SYNC_CONFIG);
+    boolean treatZeroThroughputAsUnavailable =
+        config.getBoolean(ProduceServiceConfig.PRODUCER_TREAT_ZERO_THROUGHPUT_AS_UNAVAILABLE_CONFIG);
     _partitionNum = new AtomicInteger(0);
     _running = new AtomicBoolean(false);
     _nextIndexPerPartition = new ConcurrentHashMap<>();
     _producerPropsOverride = props.containsKey(ProduceServiceConfig.PRODUCER_PROPS_CONFIG)
       ? (Map) props.get(ProduceServiceConfig.PRODUCER_PROPS_CONFIG) : new HashMap<>();
 
-    for (String property: NONOVERRIDABLE_PROPERTIES) {
+    for (String property: NON_OVERRIDABLE_PROPERTIES) {
       if (_producerPropsOverride.containsKey(property)) {
         throw new ConfigException("Override must not contain " + property + " config.");
       }
     }
 
-    _partitionNum.set(Utils.getPartitionNumForTopic(_zkConnect, _topic));
+    _adminClient = AdminClient.create(props);
 
     if (producerClass.equals(NewProducer.class.getCanonicalName()) || producerClass.equals(NewProducer.class.getSimpleName())) {
       _producerClassName = NewProducer.class.getCanonicalName();
@@ -109,7 +114,7 @@ public ProduceService(Map<String, Object> props, String name) throws Exception {
       _producerClassName = producerClass;
     }
 
-    initializeProducer();
+    initializeProducer(props);
 
     _produceExecutor = Executors.newScheduledThreadPool(_threadsNum, new ProduceServiceThreadFactory());
     _handleNewPartitionsExecutor = Executors.newSingleThreadScheduledExecutor(new HandleNewPartitionsThreadFactory());
@@ -120,27 +125,31 @@ public ProduceService(Map<String, Object> props, String name) throws Exception {
     Metrics metrics = new Metrics(metricConfig, reporters, new SystemTime());
     Map<String, String> tags = new HashMap<>();
     tags.put("name", _name);
-    _sensors = new ProduceMetrics(metrics, tags);
+    _sensors =
+        new ProduceMetrics(metrics, tags, latencyPercentileGranularityMs, latencyPercentileMaxMs, _partitionNum,
+            treatZeroThroughputAsUnavailable);
   }
 
-
-  private void initializeProducer() throws Exception {
-
+  private void initializeProducer(Map<String, Object> props) throws Exception {
     Properties producerProps = new Properties();
     // Assign default config. This has the lowest priority.
     producerProps.put(ProducerConfig.ACKS_CONFIG, "-1");
     producerProps.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, "20000");
-    producerProps.put(ProducerConfig.RETRIES_CONFIG, 3);
-    producerProps.put(ProducerConfig.BLOCK_ON_BUFFER_FULL_CONFIG, "true");
+    producerProps.put(ProducerConfig.RETRIES_CONFIG, "3");
+    producerProps.put(ProducerConfig.MAX_BLOCK_MS_CONFIG, Long.MAX_VALUE);
     producerProps.put(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, "1");
-    producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
-    producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+    producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, KEY_SERIALIZER_CLASS);
+    producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KEY_SERIALIZER_CLASS);
     // Assign config specified for ProduceService.
     producerProps.put(ProducerConfig.CLIENT_ID_CONFIG, _producerId);
     producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, _brokerList);
     // Assign config specified for producer. This has the highest priority.
     producerProps.putAll(_producerPropsOverride);
 
+    if (props.containsKey(ProduceServiceConfig.PRODUCER_PROPS_CONFIG)) {
+      props.forEach(producerProps::putIfAbsent);
+    }
+
     _producer = (KMBaseProducer) Class.forName(_producerClassName).getConstructor(Properties.class).newInstance(producerProps);
     LOG.info("{}/ProduceService is initialized.", _name);
   }
@@ -148,28 +157,29 @@ private void initializeProducer() throws Exception {
   @Override
   public synchronized void start() {
     if (_running.compareAndSet(false, true)) {
-      initializeStateForPartitions();
-      _handleNewPartitionsExecutor.scheduleWithFixedDelay(new NewPartitionHandler(), 1000, 30000, TimeUnit.MILLISECONDS);
+      TopicDescription topicDescription = getTopicDescription(_adminClient, _topic);
+      int partitionNum = topicDescription.partitions().size();
+      initializeStateForPartitions(partitionNum);
+      _handleNewPartitionsExecutor.scheduleWithFixedDelay(new NewPartitionHandler(), 1, 30, TimeUnit.SECONDS);
       LOG.info("{}/ProduceService started", _name);
     }
   }
 
-  private void initializeStateForPartitions() {
-    Map<Integer, String> keyMapping = generateKeyMappings();
-    int partitionNum = _partitionNum.get();
+  private void initializeStateForPartitions(int partitionNum) {
+    Map<Integer, String> keyMapping = generateKeyMappings(partitionNum);
     for (int partition = 0; partition < partitionNum; partition++) {
       String key = keyMapping.get(partition);
-      //This is what preserves sequence numbers across restarts
+      /* This is what preserves sequence numbers across restarts */
       if (!_nextIndexPerPartition.containsKey(partition)) {
         _nextIndexPerPartition.put(partition, new AtomicLong(0));
         _sensors.addPartitionSensors(partition);
       }
       _produceExecutor.scheduleWithFixedDelay(new ProduceRunnable(partition, key), _produceDelayMs, _produceDelayMs, TimeUnit.MILLISECONDS);
     }
+    _partitionNum.set(partitionNum);
   }
 
-  private Map<Integer, String> generateKeyMappings() {
-    int partitionNum = _partitionNum.get();
+  private Map<Integer, String> generateKeyMappings(int partitionNum) {
     HashMap<Integer, String> keyMapping = new HashMap<>();
 
     int nextInt = 0;
@@ -191,87 +201,27 @@ public synchronized void stop() {
       _produceExecutor.shutdown();
       _handleNewPartitionsExecutor.shutdown();
       _producer.close();
-      LOG.info("{}/ProduceService stopped", _name);
+      LOG.info("{}/ProduceService stopped.", _name);
     }
   }
 
   @Override
-  public void awaitShutdown() {
+  public void awaitShutdown(long timeout, TimeUnit unit) {
     try {
       _produceExecutor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
       _handleNewPartitionsExecutor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
     } catch (InterruptedException e) {
-      LOG.info("Thread interrupted when waiting for {}/ProduceService to shutdown", _name);
+      LOG.info("Thread interrupted when waiting for {}/ProduceService to shutdown.", _name);
     }
-    LOG.info("{}/ProduceService shutdown completed", _name);
+    LOG.info("{}/ProduceService shutdown completed.", _name);
   }
 
+
   @Override
   public boolean isRunning() {
     return _running.get() && !_handleNewPartitionsExecutor.isShutdown();
   }
 
-  private class ProduceMetrics {
-    public final Metrics metrics;
-    private final Sensor _recordsProduced;
-    private final Sensor _produceError;
-    private final ConcurrentMap<Integer, Sensor> _recordsProducedPerPartition;
-    private final ConcurrentMap<Integer, Sensor> _produceErrorPerPartition;
-    private final Map<String, String> _tags;
-
-    public ProduceMetrics(Metrics metrics, final Map<String, String> tags) {
-      this.metrics = metrics;
-      this._tags = tags;
-
-      _recordsProducedPerPartition = new ConcurrentHashMap<>();
-      _produceErrorPerPartition = new ConcurrentHashMap<>();
-
-      _recordsProduced = metrics.sensor("records-produced");
-      _recordsProduced.add(new MetricName("records-produced-rate", METRIC_GROUP_NAME, "The average number of records per second that are produced", tags), new Rate());
-      _recordsProduced.add(new MetricName("records-produced-total", METRIC_GROUP_NAME, "The total number of records that are produced", tags), new Total());
-
-      _produceError = metrics.sensor("produce-error");
-      _produceError.add(new MetricName("produce-error-rate", METRIC_GROUP_NAME, "The average number of errors per second", tags), new Rate());
-      _produceError.add(new MetricName("produce-error-total", METRIC_GROUP_NAME, "The total number of errors", tags), new Total());
-
-      metrics.addMetric(new MetricName("produce-availability-avg", METRIC_GROUP_NAME, "The average produce availability", tags),
-        new Measurable() {
-          @Override
-          public double measure(MetricConfig config, long now) {
-            double availabilitySum = 0.0;
-            int partitionNum = _partitionNum.get();
-            for (int partition = 0; partition < partitionNum; partition++) {
-              double recordsProduced = _sensors.metrics.metrics().get(new MetricName("records-produced-rate-partition-" + partition, METRIC_GROUP_NAME, tags)).value();
-              double produceError = _sensors.metrics.metrics().get(new MetricName("produce-error-rate-partition-" + partition, METRIC_GROUP_NAME, tags)).value();
-              // If there is no error, error rate sensor may expire and the value may be NaN. Treat NaN as 0 for error rate.
-              if (Double.isNaN(produceError) || Double.isInfinite(produceError)) {
-                produceError = 0;
-              }
-              // If there is either succeeded or failed produce to a partition, consider its availability as 0.
-              if (recordsProduced + produceError > 0) {
-                availabilitySum += recordsProduced / (recordsProduced + produceError);
-              }
-            }
-            // Assign equal weight to per-partition availability when calculating overall availability
-            return availabilitySum / partitionNum;
-          }
-        }
-      );
-    }
-
-    void addPartitionSensors(int partition) {
-      Sensor recordsProducedSensor = metrics.sensor("records-produced-partition-" + partition);
-      recordsProducedSensor.add(new MetricName("records-produced-rate-partition-" + partition, METRIC_GROUP_NAME,
-          "The average number of records per second that are produced to this partition", _tags), new Rate());
-      _recordsProducedPerPartition.put(partition, recordsProducedSensor);
-
-      Sensor errorsSensor = metrics.sensor("produce-error-partition-" + partition);
-      errorsSensor.add(new MetricName("produce-error-rate-partition-" + partition, METRIC_GROUP_NAME,
-          "The average number of errors per second when producing to this partition", _tags), new Rate());
-      _produceErrorPerPartition.put(partition, errorsSensor);
-    }
-  }
-
   /**
    * This creates the records sent to the consumer.
    */
@@ -287,11 +237,14 @@ private class ProduceRunnable implements Runnable {
     public void run() {
       try {
         long nextIndex = _nextIndexPerPartition.get(_partition).get();
-        String message = Utils.jsonFromFields(_topic, nextIndex, System.currentTimeMillis(), _producerId, _recordSize);
+        long currMs = System.currentTimeMillis();
+        String message = Utils.jsonFromFields(_topic, nextIndex, currMs, _producerId, _recordSize);
         BaseProducerRecord record = new BaseProducerRecord(_topic, _partition, _key, message);
         RecordMetadata metadata = _producer.send(record, _sync);
+        _sensors._produceDelay.record(System.currentTimeMillis() - currMs);
         _sensors._recordsProduced.record();
         _sensors._recordsProducedPerPartition.get(_partition).record();
+        _sensors._produceErrorInLastSendPerPartition.put(_partition, false);
         if (nextIndex == -1 && _sync) {
           nextIndex = metadata.offset();
         } else {
@@ -301,6 +254,7 @@ public void run() {
       } catch (Exception e) {
         _sensors._produceError.record();
         _sensors._produceErrorPerPartition.get(_partition).record();
+        _sensors._produceErrorInLastSendPerPartition.put(_partition, true);
         LOG.warn(_name + " failed to send message", e);
       }
     }
@@ -313,37 +267,44 @@ public void run() {
    * sensors are added for the new partitions.
    */
   private class NewPartitionHandler implements Runnable {
-
     public void run() {
-      int currentPartitionCount = Utils.getPartitionNumForTopic(_zkConnect, _topic);
-      if (currentPartitionCount <= 0) {
-        LOG.info("{}/ProduceService topic {} does not exist.", _name, _topic);
-        return;
-      } else if (currentPartitionCount == _partitionNum.get()) {
-        return;
-      }
-      LOG.info("{}/ProduceService detected new partitions of topic {}", _name, _topic);
-      //TODO: Should the ProduceService exit if we can't restart the producer runnables?
-      _produceExecutor.shutdown();
+      LOG.debug("{}/ProduceService check partition number for topic {}.", _name, _topic);
       try {
-        _produceExecutor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
+        int currentPartitionNum =
+            _adminClient.describeTopics(Collections.singleton(_topic)).all().get().get(_topic).partitions().size();
+        if (currentPartitionNum <= 0) {
+          LOG.info("{}/ProduceService topic {} does not exist.", _name, _topic);
+          return;
+        } else if (currentPartitionNum == _partitionNum.get()) {
+          return;
+        }
+        LOG.info("{}/ProduceService detected new partitions of topic {}", _name, _topic);
+        //TODO: Should the ProduceService exit if we can't restart the producer runnables?
+        _produceExecutor.shutdown();
+        try {
+          _produceExecutor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
+        } catch (InterruptedException e) {
+          throw new IllegalStateException(e);
+        }
+        _producer.close();
+        try {
+          initializeProducer(new HashMap<>());
+        } catch (Exception e) {
+          LOG.error("Failed to restart producer.", e);
+          throw new IllegalStateException(e);
+        }
+        _produceExecutor = Executors.newScheduledThreadPool(_threadsNum);
+        initializeStateForPartitions(currentPartitionNum);
+        LOG.info("New partitions added to monitoring.");
       } catch (InterruptedException e) {
-        throw new IllegalStateException(e);
+        LOG.error("InterruptedException occurred.", e);
+      } catch (ExecutionException e) {
+        LOG.error("ExecutionException occurred.", e);
       }
-      _producer.close();
-      _partitionNum.set(currentPartitionCount);
-      try {
-        initializeProducer();
-      } catch (Exception e) {
-        LOG.error("Failed to restart producer.", e);
-        throw new IllegalStateException(e);
-      }
-      _produceExecutor = Executors.newScheduledThreadPool(_threadsNum);
-      initializeStateForPartitions();
-      LOG.info("New partitions added to monitoring.");
     }
   }
 
+
   private class ProduceServiceThreadFactory implements ThreadFactory {
 
     private final AtomicInteger _threadId = new AtomicInteger();
@@ -358,4 +319,4 @@ public Thread newThread(Runnable r) {
     }
   }
 
-}
\ No newline at end of file
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/ProduceServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/ProduceServiceFactory.java
new file mode 100644
index 00000000..9769bac1
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/ProduceServiceFactory.java
@@ -0,0 +1,34 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.Map;
+
+
+/**
+ * Factory that constructs the ProduceService
+ */
+@SuppressWarnings("rawtypes")
+public class ProduceServiceFactory implements ServiceFactory {
+  private final Map _props;
+  private final String _name;
+
+  public ProduceServiceFactory(Map props, String name) {
+    _props = props;
+    _name = name;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Service createService() throws Exception {
+    return new ProduceService(_props, _name);
+  }
+}
diff --git a/src/main/java/com/linkedin/kmf/services/Service.java b/src/main/java/com/linkedin/xinfra/monitor/services/Service.java
similarity index 84%
rename from src/main/java/com/linkedin/kmf/services/Service.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/Service.java
index d18318b3..62f85c62 100644
--- a/src/main/java/com/linkedin/kmf/services/Service.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/Service.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,7 +7,11 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services;
+
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.concurrent.TimeUnit;
+
 
 /**
  * Services are components of a monitoring application that are expected to be running continuously in order to perform
@@ -15,7 +19,7 @@
  */
 public interface Service {
 
-  static final String JMX_PREFIX = "kmf.services";
+  String JMX_PREFIX = "kmf.services";
 
   /**
    * The start logic must only execute once.  If an error occurs then the implementer of this class must assume that
@@ -43,5 +47,10 @@ public interface Service {
   /**
    * Implementations of this method must be thread safe and must be blocking.
    */
-  void awaitShutdown();
+  void awaitShutdown(long timeout, TimeUnit unit);
+
+  default String getServiceName() {
+    return this.getClass().getSimpleName();
+  }
+
 }
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/ServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/ServiceFactory.java
new file mode 100644
index 00000000..a6761654
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/ServiceFactory.java
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+/**
+ * Factory that instantiates an instance of Xinfra Monitor Service.
+ *
+ * INFORMATION:
+ * "Class 'ClusterTopicManipulationServiceFactory' is never used" and
+ * "Constructor 'ClusterTopicManipulationServiceFactory(java.util.Map, java.lang.String)' is never used"
+ * shown as warnings in Intellij IDEA are not true.
+ * XinfraMonitor class uses (ServiceFactory) Class.forName(..)
+ * .getConstructor(...).newInstance(...) to return Class that's associated
+ * with the class or interface with the given string name
+ */
+public interface ServiceFactory {
+
+  /**
+   * This method creates a Xinfra Montior Service.
+   * @return a Xinrfa Monitor service object
+   * @throws Exception that occurs while creating a XM Service
+   */
+  Service createService() throws Exception;
+
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/SignalFxMetricsReporterService.java b/src/main/java/com/linkedin/xinfra/monitor/services/SignalFxMetricsReporterService.java
new file mode 100644
index 00000000..e84f1200
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/SignalFxMetricsReporterService.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2018 SignalFx, Inc. Licensed under the Apache 2 License.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.codahale.metrics.MetricRegistry;
+import com.linkedin.xinfra.monitor.common.MbeanAttributeValue;
+import com.linkedin.xinfra.monitor.common.Utils;
+import com.linkedin.xinfra.monitor.services.configs.SignalFxMetricsReporterServiceConfig;
+import com.signalfx.codahale.metrics.SettableDoubleGauge;
+import com.signalfx.codahale.reporter.MetricMetadata;
+import com.signalfx.codahale.reporter.SignalFxReporter;
+import com.signalfx.endpoint.SignalFxEndpoint;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class SignalFxMetricsReporterService implements Service {
+  private static final Logger LOG = LoggerFactory.getLogger(SignalFxMetricsReporterService.class);
+
+  private final String _name;
+  private final List<String> _metricNames;
+  private final int _reportIntervalSec;
+  private final ScheduledExecutorService _executor;
+  private final MetricRegistry _metricRegistry;
+  private final SignalFxReporter _signalfxReporter;
+
+  private final MetricMetadata _metricMetadata;
+  private final Map<String, SettableDoubleGauge> _metricMap;
+  private Map<String, String> _dimensionsMap;
+
+  public SignalFxMetricsReporterService(Map<String, Object> props, String name) throws Exception {
+    SignalFxMetricsReporterServiceConfig config = new SignalFxMetricsReporterServiceConfig(props);
+
+    _name = name;
+    _metricNames = config.getList(SignalFxMetricsReporterServiceConfig.REPORT_METRICS_CONFIG);
+    _reportIntervalSec = config.getInt(SignalFxMetricsReporterServiceConfig.REPORT_INTERVAL_SEC_CONFIG);
+    String signalfxUrl = config.getString(SignalFxMetricsReporterServiceConfig.REPORT_SIGNALFX_URL);
+    String signalfxToken = config.getString(SignalFxMetricsReporterServiceConfig.SIGNALFX_TOKEN);
+
+    if (StringUtils.isEmpty(signalfxToken)) {
+      throw new IllegalArgumentException("SignalFx token is not configured");
+    }
+
+    _executor = Executors.newSingleThreadScheduledExecutor();
+    _metricRegistry = new MetricRegistry();
+    _metricMap = new HashMap<String, SettableDoubleGauge>();
+    _dimensionsMap = new HashMap<String, String>();
+    if (props.containsKey(SignalFxMetricsReporterServiceConfig.SIGNALFX_METRIC_DIMENSION)) {
+      _dimensionsMap = (Map<String, String>) props.get(SignalFxMetricsReporterServiceConfig.SIGNALFX_METRIC_DIMENSION);
+    }
+
+    SignalFxReporter.Builder sfxReportBuilder = new SignalFxReporter.Builder(
+        _metricRegistry, signalfxToken
+    );
+    if (!StringUtils.isEmpty(signalfxUrl)) {
+      sfxReportBuilder.setEndpoint(getSignalFxEndpoint(signalfxUrl));
+    }
+    _signalfxReporter = sfxReportBuilder.build();
+
+    _metricMetadata = _signalfxReporter.getMetricMetadata();
+  }
+
+  @Override
+  public synchronized void start() {
+    _signalfxReporter.start(_reportIntervalSec, TimeUnit.SECONDS);
+    _executor.scheduleAtFixedRate(() -> {
+      try {
+        captureMetrics();
+      } catch (Exception e) {
+        LOG.error(_name + "/SignalFxMetricsReporterService failed to report metrics", e);
+      }
+    }, _reportIntervalSec, _reportIntervalSec, TimeUnit.SECONDS);
+    LOG.info("{}/SignalFxMetricsReporterService started", _name);
+  }
+
+  @Override
+  public synchronized void stop() {
+    _executor.shutdown();
+    _signalfxReporter.stop();
+    LOG.info("{}/SignalFxMetricsReporterService stopped", _name);
+  }
+
+  @Override
+  public boolean isRunning() {
+    return !_executor.isShutdown();
+  }
+
+  @Override
+  public void awaitShutdown(long timeout, TimeUnit unit) {
+    try {
+      _executor.awaitTermination(5, TimeUnit.MINUTES);
+    } catch (InterruptedException e) {
+      LOG.info("Thread interrupted when waiting for {}/SignalFxMetricsReporterService to shutdown", _name);
+    }
+    LOG.info("{}/SignalFxMetricsReporterService shutdown completed", _name);
+  }
+
+
+  private SignalFxEndpoint getSignalFxEndpoint(String urlStr) throws Exception {
+    URL url = new URL(urlStr);
+    return new SignalFxEndpoint(url.getProtocol(), url.getHost(), url.getPort());
+  }
+
+  private String generateSignalFxMetricName(String bean, String attribute) {
+    String service = bean.split(":")[1];
+    String serviceType = service.split(",")[1].split("=")[1];
+    return String.format("%s.%s", serviceType, attribute);
+  }
+
+  private void captureMetrics() {
+    for (String metricName : _metricNames) {
+      int index = metricName.lastIndexOf(':');
+      String mbeanExpr = metricName.substring(0, index);
+      String attributeExpr = metricName.substring(index + 1);
+
+      List<MbeanAttributeValue> attributeValues = Utils.getMBeanAttributeValues(mbeanExpr, attributeExpr);
+
+      for (final MbeanAttributeValue attributeValue : attributeValues) {
+        String metric = attributeValue.toString();
+        String key = metric.substring(0, metric.lastIndexOf("="));
+        String[] parts = key.split(",");
+        if (parts.length < 2) {
+          continue;
+        }
+        parts = parts[0].split("=");
+        if (parts.length < 2 || !parts[1].contains("cluster-monitor")) {
+          continue;
+        }
+        setMetricValue(attributeValue);
+      }
+    }
+  }
+
+  private void setMetricValue(MbeanAttributeValue attributeValue) {
+    String key = attributeValue.mbean() + attributeValue.attribute();
+    SettableDoubleGauge metric = _metricMap.get(key);
+    if (metric == null) {
+      metric = createMetric(attributeValue);
+      _metricMap.put(key, metric);
+    }
+    metric.setValue(attributeValue.value());
+  }
+
+  private SettableDoubleGauge createMetric(MbeanAttributeValue attributeValue) {
+    String signalFxMetricName = generateSignalFxMetricName(attributeValue.mbean(), attributeValue.attribute());
+    SettableDoubleGauge gauge;
+
+    if (signalFxMetricName.contains("partition")) {
+      gauge = createPartitionMetric(signalFxMetricName);
+    } else {
+      gauge = _metricMetadata.forMetric(new SettableDoubleGauge())
+          .withMetricName(signalFxMetricName).metric();
+    }
+    LOG.info("Creating metric : {}", signalFxMetricName);
+
+    for (Map.Entry<String, String> entry : _dimensionsMap.entrySet()) {
+      _metricMetadata.forMetric(gauge).withDimension(entry.getKey(), entry.getValue());
+    }
+    _metricMetadata.forMetric(gauge).register(_metricRegistry);
+
+    return gauge;
+  }
+
+  private SettableDoubleGauge createPartitionMetric(String signalFxMetricName) {
+    int divider = signalFxMetricName.lastIndexOf('-');
+    String partitionNumber = signalFxMetricName.substring(divider + 1);
+    signalFxMetricName = signalFxMetricName.substring(0,  divider);
+    SettableDoubleGauge gauge = _metricMetadata.forMetric(new SettableDoubleGauge())
+        .withMetricName(signalFxMetricName).metric();
+    _metricMetadata.forMetric(gauge).withDimension("partition", partitionNumber);
+    return gauge;
+  }
+}
+
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/SignalFxMetricsReporterServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/SignalFxMetricsReporterServiceFactory.java
new file mode 100644
index 00000000..9ffc6bbd
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/SignalFxMetricsReporterServiceFactory.java
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.Map;
+
+
+/**
+ * Factory class which instantiates a SignalFxMetricsReporterServiceFactory service.
+ */
+@SuppressWarnings("rawtypes")
+public class SignalFxMetricsReporterServiceFactory implements ServiceFactory {
+
+  private final Map _properties;
+  private final String _serviceName;
+
+  public SignalFxMetricsReporterServiceFactory(Map properties, String serviceName) {
+
+    _properties = properties;
+    _serviceName = serviceName;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Service createService() throws Exception {
+    return new SignalFxMetricsReporterService(_properties, _serviceName);
+  }
+}
+
diff --git a/src/main/java/com/linkedin/kmf/services/StatsdMetricsReporterService.java b/src/main/java/com/linkedin/xinfra/monitor/services/StatsdMetricsReporterService.java
similarity index 71%
rename from src/main/java/com/linkedin/kmf/services/StatsdMetricsReporterService.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/StatsdMetricsReporterService.java
index 9838b350..77ce1307 100644
--- a/src/main/java/com/linkedin/kmf/services/StatsdMetricsReporterService.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/StatsdMetricsReporterService.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,24 +7,23 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services;
 
+package com.linkedin.xinfra.monitor.services;
 
-import com.linkedin.kmf.common.MbeanAttributeValue;
-import com.linkedin.kmf.services.configs.StatsdMetricsReporterServiceConfig;
+import com.linkedin.xinfra.monitor.common.MbeanAttributeValue;
+import com.linkedin.xinfra.monitor.common.Utils;
+import com.linkedin.xinfra.monitor.services.configs.StatsdMetricsReporterServiceConfig;
 import com.timgroup.statsd.NonBlockingStatsDClient;
 import com.timgroup.statsd.StatsDClient;
-import org.apache.commons.lang.StringUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-import static com.linkedin.kmf.common.Utils.getMBeanAttributeValues;
 
 public class StatsdMetricsReporterService implements Service {
   private static final Logger LOG = LoggerFactory.getLogger(StatsdMetricsReporterService.class);
@@ -34,7 +33,6 @@ public class StatsdMetricsReporterService implements Service {
   private final int _reportIntervalSec;
   private final ScheduledExecutorService _executor;
   private final StatsDClient _statsdClient;
-  private final String _metricNamePrefix;
 
   public StatsdMetricsReporterService(Map<String, Object> props, String name) {
     StatsdMetricsReporterServiceConfig config = new StatsdMetricsReporterServiceConfig(props);
@@ -43,25 +41,20 @@ public StatsdMetricsReporterService(Map<String, Object> props, String name) {
     _metricNames = config.getList(StatsdMetricsReporterServiceConfig.REPORT_METRICS_CONFIG);
     _reportIntervalSec = config.getInt(StatsdMetricsReporterServiceConfig.REPORT_INTERVAL_SEC_CONFIG);
     _executor = Executors.newSingleThreadScheduledExecutor();
-    _metricNamePrefix = config.getString(StatsdMetricsReporterServiceConfig.REPORT_STATSD_PREFIX);
-    _statsdClient = new NonBlockingStatsDClient(_metricNamePrefix,
+    _statsdClient = new NonBlockingStatsDClient(config.getString(StatsdMetricsReporterServiceConfig.REPORT_STATSD_PREFIX),
             config.getString(StatsdMetricsReporterServiceConfig.REPORT_STATSD_HOST),
             config.getInt(StatsdMetricsReporterServiceConfig.REPORT_STATSD_PORT));
   }
 
   @Override
   public synchronized void start() {
-    _executor.scheduleAtFixedRate(
-      new Runnable() {
-        @Override
-        public void run() {
-          try {
-            reportMetrics();
-          } catch (Exception e) {
-            LOG.error(_name + "/StatsdMetricsReporterService failed to report metrics", e);
-          }
-        }
-      }, _reportIntervalSec, _reportIntervalSec, TimeUnit.SECONDS
+    _executor.scheduleAtFixedRate(() -> {
+      try {
+        reportMetrics();
+      } catch (Exception e) {
+        LOG.error(_name + "/StatsdMetricsReporterService failed to report metrics", e);
+      }
+    }, _reportIntervalSec, _reportIntervalSec, TimeUnit.SECONDS
     );
     LOG.info("{}/StatsdMetricsReporterService started", _name);
   }
@@ -78,7 +71,7 @@ public boolean isRunning() {
   }
 
   @Override
-  public void awaitShutdown() {
+  public void awaitShutdown(long timeout, TimeUnit unit) {
     try {
       _executor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
     } catch (InterruptedException e) {
@@ -87,21 +80,20 @@ public void awaitShutdown() {
     LOG.info("{}/StatsdMetricsReporterService shutdown completed", _name);
   }
 
+
   private String generateStatsdMetricName(String bean, String attribute) {
     String service = bean.split(":")[1];
     String serviceName = service.split(",")[0].split("=")[1];
     String serviceType = service.split(",")[1].split("=")[1];
-    String[] segs = {_metricNamePrefix, serviceType, serviceName, attribute};
-    String metricName = StringUtils.join(segs, ".");
-
-    return _metricNamePrefix.isEmpty() ? metricName.substring(1) : metricName;
+    String[] segs = {serviceType, serviceName, attribute};
+    return StringUtils.join(segs, ".");
   }
 
   private void reportMetrics() {
     for (String metricName: _metricNames) {
       String mbeanExpr = metricName.substring(0, metricName.lastIndexOf(":"));
       String attributeExpr = metricName.substring(metricName.lastIndexOf(":") + 1);
-      List<MbeanAttributeValue> attributeValues = getMBeanAttributeValues(mbeanExpr, attributeExpr);
+      List<MbeanAttributeValue> attributeValues = Utils.getMBeanAttributeValues(mbeanExpr, attributeExpr);
 
       for (MbeanAttributeValue attributeValue: attributeValues) {
         final String statsdMetricName = generateStatsdMetricName(attributeValue.mbean(), attributeValue.attribute());
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/StatsdMetricsReporterServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/StatsdMetricsReporterServiceFactory.java
new file mode 100644
index 00000000..046d0a2f
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/StatsdMetricsReporterServiceFactory.java
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.Map;
+
+
+/**
+ * Factory class that constructs the StatsdMetricsReporterService.
+ */
+@SuppressWarnings("rawtypes")
+public class StatsdMetricsReporterServiceFactory implements ServiceFactory {
+  private final Map _properties;
+  private final String _name;
+
+  public StatsdMetricsReporterServiceFactory(Map properties, String name) {
+
+    _properties = properties;
+    _name = name;
+  }
+
+  @Override
+  public Service createService() {
+
+    //noinspection unchecked
+    return new StatsdMetricsReporterService(_properties, _name);
+
+  }
+}
diff --git a/src/main/java/com/linkedin/kmf/services/TopicManagementService.java b/src/main/java/com/linkedin/xinfra/monitor/services/TopicManagementService.java
similarity index 67%
rename from src/main/java/com/linkedin/kmf/services/TopicManagementService.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/TopicManagementService.java
index 26333b2b..491b2514 100644
--- a/src/main/java/com/linkedin/kmf/services/TopicManagementService.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/TopicManagementService.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -8,12 +8,14 @@
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
 
-package com.linkedin.kmf.services;
+package com.linkedin.xinfra.monitor.services;
 
-import com.linkedin.kmf.services.configs.MultiClusterTopicManagementServiceConfig;
-import com.linkedin.kmf.services.configs.TopicManagementServiceConfig;
+import com.linkedin.xinfra.monitor.services.configs.MultiClusterTopicManagementServiceConfig;
+import com.linkedin.xinfra.monitor.services.configs.TopicManagementServiceConfig;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.TimeUnit;
 
 
 /**
@@ -28,6 +30,10 @@ public TopicManagementService(Map<String, Object> props, String serviceName) thr
     _multiClusterTopicManagementService = new MultiClusterTopicManagementService(serviceProps, serviceName);
   }
 
+  public CompletableFuture<Void> topicPartitionResult() {
+    return _multiClusterTopicManagementService.topicPartitionResult();
+  }
+
   /**
    * @param props a map of key/value pair used for configuring TopicManagementService
    * @param serviceName service name
@@ -52,8 +58,14 @@ private Map<String, Object> createMultiClusterTopicManagementServiceProps(Map<St
     Map<String, Object> serviceProps = new HashMap<>();
     serviceProps.put(MultiClusterTopicManagementServiceConfig.PROPS_PER_CLUSTER_CONFIG, configPerCluster);
     serviceProps.put(MultiClusterTopicManagementServiceConfig.TOPIC_CONFIG, props.get(TopicManagementServiceConfig.TOPIC_CONFIG));
-    if (props.containsKey(MultiClusterTopicManagementServiceConfig.REBALANCE_INTERVAL_MS_CONFIG))
-      serviceProps.put(MultiClusterTopicManagementServiceConfig.REBALANCE_INTERVAL_MS_CONFIG, props.get(MultiClusterTopicManagementServiceConfig.REBALANCE_INTERVAL_MS_CONFIG));
+    Object providedRebalanceIntervalMsConfig = props.get(MultiClusterTopicManagementServiceConfig.REBALANCE_INTERVAL_MS_CONFIG);
+    if (providedRebalanceIntervalMsConfig != null) {
+      serviceProps.put(MultiClusterTopicManagementServiceConfig.REBALANCE_INTERVAL_MS_CONFIG, providedRebalanceIntervalMsConfig);
+    }
+    Object providedPreferredLeaderElectionIntervalMsConfig = props.get(MultiClusterTopicManagementServiceConfig.PREFERRED_LEADER_ELECTION_CHECK_INTERVAL_MS_CONFIG);
+    if (providedPreferredLeaderElectionIntervalMsConfig != null) {
+      serviceProps.put(MultiClusterTopicManagementServiceConfig.PREFERRED_LEADER_ELECTION_CHECK_INTERVAL_MS_CONFIG, providedPreferredLeaderElectionIntervalMsConfig);
+    }
     return serviceProps;
   }
 
@@ -62,6 +74,7 @@ public synchronized void start() {
     _multiClusterTopicManagementService.start();
   }
 
+
   @Override
   public synchronized void stop() {
     _multiClusterTopicManagementService.stop();
@@ -73,8 +86,9 @@ public boolean isRunning() {
   }
 
   @Override
-  public void awaitShutdown() {
-    _multiClusterTopicManagementService.awaitShutdown();
+  public void awaitShutdown(long timeout, TimeUnit unit) {
+    _multiClusterTopicManagementService.awaitShutdown(timeout, unit);
   }
+
 }
 
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/TopicManagementServiceFactory.java b/src/main/java/com/linkedin/xinfra/monitor/services/TopicManagementServiceFactory.java
new file mode 100644
index 00000000..b27ea335
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/TopicManagementServiceFactory.java
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import java.util.Map;
+
+
+/**
+ * Factory class which constructs the TopicManagementService.
+ */
+@SuppressWarnings("rawtypes")
+public class TopicManagementServiceFactory implements ServiceFactory {
+  private final Map _properties;
+  private final String _serviceName;
+
+  public TopicManagementServiceFactory(Map properties, String serviceName) {
+
+    _properties = properties;
+    _serviceName = serviceName;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Service createService() throws Exception {
+
+    return new TopicManagementService(_properties, _serviceName);
+
+  }
+}
diff --git a/src/main/java/com/linkedin/kmf/services/configs/CommonServiceConfig.java b/src/main/java/com/linkedin/xinfra/monitor/services/configs/CommonServiceConfig.java
similarity index 85%
rename from src/main/java/com/linkedin/kmf/services/configs/CommonServiceConfig.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/configs/CommonServiceConfig.java
index 71c05c23..52d1ba47 100644
--- a/src/main/java/com/linkedin/kmf/services/configs/CommonServiceConfig.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/configs/CommonServiceConfig.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,17 +7,21 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services.configs;
+
+package com.linkedin.xinfra.monitor.services.configs;
 
 import org.apache.kafka.clients.CommonClientConfigs;
 
 public class CommonServiceConfig {
 
+  public static final String CONSUMER_PROPS_CONFIG = "consumer.props";
+  public static final String CONSUMER_PROPS_DOC = "consumer props";
+
   public static final String ZOOKEEPER_CONNECT_CONFIG = "zookeeper.connect";
   public static final String ZOOKEEPER_CONNECT_DOC = "Zookeeper connect string.";
 
   public static final String BOOTSTRAP_SERVERS_CONFIG = CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG;
-  public static final String BOOTSTRAP_SERVERS_DOC = CommonClientConfigs.BOOSTRAP_SERVERS_DOC;
+  public static final String BOOTSTRAP_SERVERS_DOC = CommonClientConfigs.BOOTSTRAP_SERVERS_DOC;
 
   public static final String TOPIC_CONFIG = "topic";
   public static final String TOPIC_DOC = "Topic to be used by the service.";
@@ -31,4 +35,4 @@ public class CommonServiceConfig {
 
   public static final String REPORT_INTERVAL_SEC_CONFIG = "report.interval.sec";
   public static final String REPORT_INTERVAL_SEC_DOC = "The interval in second by which metrics reporter service will report the metrics values.";
-}
\ No newline at end of file
+}
diff --git a/src/main/java/com/linkedin/kmf/services/configs/ConsumeServiceConfig.java b/src/main/java/com/linkedin/xinfra/monitor/services/configs/ConsumeServiceConfig.java
similarity index 94%
rename from src/main/java/com/linkedin/kmf/services/configs/ConsumeServiceConfig.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/configs/ConsumeServiceConfig.java
index dd58f6f4..a5764fa0 100644
--- a/src/main/java/com/linkedin/kmf/services/configs/ConsumeServiceConfig.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/configs/ConsumeServiceConfig.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,13 +7,14 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services.configs;
+package com.linkedin.xinfra.monitor.services.configs;
 
-import com.linkedin.kmf.consumer.NewConsumer;
+import com.linkedin.xinfra.monitor.consumer.NewConsumer;
 import java.util.Map;
 import org.apache.kafka.common.config.AbstractConfig;
 import org.apache.kafka.common.config.ConfigDef;
 
+
 public class ConsumeServiceConfig extends AbstractConfig {
 
   private static final ConfigDef CONFIG;
@@ -29,7 +30,7 @@ public class ConsumeServiceConfig extends AbstractConfig {
 
   public static final String CONSUMER_CLASS_CONFIG = "consume.consumer.class";
   public static final String CONSUMER_CLASS_DOC = "Consumer class that will be instantiated as consumer in the consume service. "
-    + "It can be NewConsumer, OldConsumer, or full class name of any class that implements the KMBaseConsumer interface.";
+    + "It can be NewConsumer or full class name of any class that implements the KMBaseConsumer interface.";
 
   public static final String LATENCY_PERCENTILE_MAX_MS_CONFIG = "consume.latency.percentile.max.ms";
   public static final String LATENCY_PERCENTILE_MAX_MS_DOC = "This is used to derive the bucket number used to configure latency percentile metric. "
@@ -79,7 +80,6 @@ public class ConsumeServiceConfig extends AbstractConfig {
                                     20000,
                                     ConfigDef.Importance.MEDIUM,
                                     LATENCY_SLA_MS_DOC);
-
   }
 
   public ConsumeServiceConfig(Map<?, ?> props) {
diff --git a/src/main/java/com/linkedin/kmf/services/configs/DefaultMetricsReporterServiceConfig.java b/src/main/java/com/linkedin/xinfra/monitor/services/configs/DefaultMetricsReporterServiceConfig.java
similarity index 86%
rename from src/main/java/com/linkedin/kmf/services/configs/DefaultMetricsReporterServiceConfig.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/configs/DefaultMetricsReporterServiceConfig.java
index 0c8c9130..575a553e 100644
--- a/src/main/java/com/linkedin/kmf/services/configs/DefaultMetricsReporterServiceConfig.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/configs/DefaultMetricsReporterServiceConfig.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,12 +7,13 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services.configs;
 
+package com.linkedin.xinfra.monitor.services.configs;
+
+import java.util.Collections;
+import java.util.Map;
 import org.apache.kafka.common.config.AbstractConfig;
 import org.apache.kafka.common.config.ConfigDef;
-import java.util.Arrays;
-import java.util.Map;
 
 public class DefaultMetricsReporterServiceConfig extends AbstractConfig {
 
@@ -26,8 +27,7 @@ public class DefaultMetricsReporterServiceConfig extends AbstractConfig {
 
   static {
     CONFIG = new ConfigDef().define(REPORT_METRICS_CONFIG,
-                                    ConfigDef.Type.LIST,
-                                    Arrays.asList("kmf.services:*:*"),
+                                    ConfigDef.Type.LIST, Collections.singletonList("kmf.services:*:*"),
                                     ConfigDef.Importance.MEDIUM,
                                     REPORT_METRICS_DOC)
                             .define(REPORT_INTERVAL_SEC_CONFIG,
diff --git a/src/main/java/com/linkedin/kmf/services/configs/GraphiteMetricsReporterServiceConfig.java b/src/main/java/com/linkedin/xinfra/monitor/services/configs/GraphiteMetricsReporterServiceConfig.java
similarity index 92%
rename from src/main/java/com/linkedin/kmf/services/configs/GraphiteMetricsReporterServiceConfig.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/configs/GraphiteMetricsReporterServiceConfig.java
index 827cb7e4..29ea04ed 100644
--- a/src/main/java/com/linkedin/kmf/services/configs/GraphiteMetricsReporterServiceConfig.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/configs/GraphiteMetricsReporterServiceConfig.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,13 +7,13 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services.configs;
 
-import org.apache.kafka.common.config.AbstractConfig;
-import org.apache.kafka.common.config.ConfigDef;
+package com.linkedin.xinfra.monitor.services.configs;
 
-import java.util.Arrays;
+import java.util.Collections;
 import java.util.Map;
+import org.apache.kafka.common.config.AbstractConfig;
+import org.apache.kafka.common.config.ConfigDef;
 
 public class GraphiteMetricsReporterServiceConfig extends AbstractConfig {
   private static final ConfigDef CONFIG;
@@ -35,8 +35,7 @@ public class GraphiteMetricsReporterServiceConfig extends AbstractConfig {
 
   static {
     CONFIG = new ConfigDef().define(REPORT_METRICS_CONFIG,
-                                    ConfigDef.Type.LIST,
-                                    Arrays.asList("kmf.services:*:*"),
+                                    ConfigDef.Type.LIST, Collections.singletonList("kmf.services:*:*"),
                                     ConfigDef.Importance.MEDIUM,
                                     REPORT_METRICS_DOC)
                             .define(REPORT_INTERVAL_SEC_CONFIG,
diff --git a/src/main/java/com/linkedin/kmf/services/configs/JettyServiceConfig.java b/src/main/java/com/linkedin/xinfra/monitor/services/configs/JettyServiceConfig.java
similarity index 90%
rename from src/main/java/com/linkedin/kmf/services/configs/JettyServiceConfig.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/configs/JettyServiceConfig.java
index 38676631..06fd6e25 100644
--- a/src/main/java/com/linkedin/kmf/services/configs/JettyServiceConfig.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/configs/JettyServiceConfig.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,7 +7,8 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services.configs;
+
+package com.linkedin.xinfra.monitor.services.configs;
 
 import org.apache.kafka.common.config.AbstractConfig;
 import org.apache.kafka.common.config.ConfigDef;
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/configs/KafkaMetricsReporterServiceConfig.java b/src/main/java/com/linkedin/xinfra/monitor/services/configs/KafkaMetricsReporterServiceConfig.java
new file mode 100644
index 00000000..d6c30ac4
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/configs/KafkaMetricsReporterServiceConfig.java
@@ -0,0 +1,76 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services.configs;
+
+import java.util.Collections;
+import java.util.Map;
+import org.apache.kafka.common.config.AbstractConfig;
+import org.apache.kafka.common.config.ConfigDef;
+
+import static org.apache.kafka.common.config.ConfigDef.Range.atLeast;
+
+public class KafkaMetricsReporterServiceConfig extends AbstractConfig {
+
+  private static final ConfigDef CONFIG;
+
+  public static final String REPORT_METRICS_CONFIG = CommonServiceConfig.REPORT_METRICS_CONFIG;
+  public static final String REPORT_METRICS_DOC = CommonServiceConfig.REPORT_METRICS_DOC;
+
+  public static final String REPORT_INTERVAL_SEC_CONFIG = CommonServiceConfig.REPORT_INTERVAL_SEC_CONFIG;
+  public static final String REPORT_INTERVAL_SEC_DOC = CommonServiceConfig.REPORT_INTERVAL_SEC_DOC;
+
+  public static final String ZOOKEEPER_CONNECT_CONFIG = CommonServiceConfig.ZOOKEEPER_CONNECT_CONFIG;
+  public static final String ZOOKEEPER_CONNECT_DOC = CommonServiceConfig.ZOOKEEPER_CONNECT_DOC;
+
+  public static final String BOOTSTRAP_SERVERS_CONFIG = CommonServiceConfig.BOOTSTRAP_SERVERS_CONFIG;
+  public static final String BOOTSTRAP_SERVERS_DOC = CommonServiceConfig.BOOTSTRAP_SERVERS_DOC;
+
+  public static final String TOPIC_CONFIG = CommonServiceConfig.TOPIC_CONFIG;
+  public static final String TOPIC_DOC = CommonServiceConfig.TOPIC_DOC;
+
+  public static final String TOPIC_REPLICATION_FACTOR = "report.kafka.topic.replication.factor";
+  public static final String TOPIC_REPLICATION_FACTOR_DOC = "This replication factor is used to create the metrics reporter topic.";
+
+
+  static {
+    CONFIG = new ConfigDef().define(REPORT_METRICS_CONFIG,
+                                    ConfigDef.Type.LIST, Collections.singletonList("kmf.services:*:*"),
+                                    ConfigDef.Importance.MEDIUM,
+                                    REPORT_METRICS_DOC)
+                            .define(REPORT_INTERVAL_SEC_CONFIG,
+                                    ConfigDef.Type.INT,
+                                    1,
+                                    ConfigDef.Importance.LOW,
+                                    REPORT_INTERVAL_SEC_DOC)
+                            .define(ZOOKEEPER_CONNECT_CONFIG,
+                                    ConfigDef.Type.STRING,
+                                    ConfigDef.Importance.HIGH,
+                                    ZOOKEEPER_CONNECT_DOC)
+                            .define(BOOTSTRAP_SERVERS_CONFIG,
+                                    ConfigDef.Type.STRING,
+                                    ConfigDef.Importance.HIGH,
+                                    BOOTSTRAP_SERVERS_DOC)
+                            .define(TOPIC_CONFIG,
+                                    ConfigDef.Type.STRING,
+                                    ConfigDef.Importance.HIGH,
+                                    TOPIC_DOC)
+                            .define(TOPIC_REPLICATION_FACTOR,
+                                    ConfigDef.Type.INT,
+                                    1,
+                                    atLeast(1),
+                                    ConfigDef.Importance.LOW,
+                                    TOPIC_REPLICATION_FACTOR_DOC);
+  }
+
+  public KafkaMetricsReporterServiceConfig(Map<?, ?> props) {
+    super(CONFIG, props);
+  }
+}
diff --git a/src/main/java/com/linkedin/kmf/services/configs/MultiClusterTopicManagementServiceConfig.java b/src/main/java/com/linkedin/xinfra/monitor/services/configs/MultiClusterTopicManagementServiceConfig.java
similarity index 71%
rename from src/main/java/com/linkedin/kmf/services/configs/MultiClusterTopicManagementServiceConfig.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/configs/MultiClusterTopicManagementServiceConfig.java
index 5ac0f61a..73138c00 100644
--- a/src/main/java/com/linkedin/kmf/services/configs/MultiClusterTopicManagementServiceConfig.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/configs/MultiClusterTopicManagementServiceConfig.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,7 +7,8 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services.configs;
+
+package com.linkedin.xinfra.monitor.services.configs;
 
 import java.util.Map;
 import org.apache.kafka.common.config.AbstractConfig;
@@ -29,6 +30,10 @@ public class MultiClusterTopicManagementServiceConfig extends AbstractConfig {
   public static final String REBALANCE_INTERVAL_MS_DOC = "The gap in ms between the times the cluster balance on the "
       + "monitor topic is checked.  Set this to a large value to disable automatic topic rebalance.";
 
+  public static final String PREFERRED_LEADER_ELECTION_CHECK_INTERVAL_MS_CONFIG = "topic-management.preferred.leader.election.check.interval.ms";
+  public static final String PREFERRED_LEADER_ELECTION_CHECK_INTERVAL_MS_DOC = "The gap in ms between the times to check if preferred leader election"
+      + " can be performed when requested during rebalance";
+
   static {
     CONFIG = new ConfigDef()
       .define(TOPIC_CONFIG,
@@ -40,7 +45,12 @@ public class MultiClusterTopicManagementServiceConfig extends AbstractConfig {
               1000 * 60 * 10,
               atLeast(10),
               ConfigDef.Importance.LOW,
-              REBALANCE_INTERVAL_MS_DOC);
+              REBALANCE_INTERVAL_MS_DOC)
+        .define(PREFERRED_LEADER_ELECTION_CHECK_INTERVAL_MS_CONFIG,
+            ConfigDef.Type.LONG,
+            1000 * 60 * 5,
+            atLeast(5),
+            ConfigDef.Importance.LOW, PREFERRED_LEADER_ELECTION_CHECK_INTERVAL_MS_DOC);
   }
 
   public MultiClusterTopicManagementServiceConfig(Map<?, ?> props) {
diff --git a/src/main/java/com/linkedin/kmf/services/configs/ProduceServiceConfig.java b/src/main/java/com/linkedin/xinfra/monitor/services/configs/ProduceServiceConfig.java
similarity index 68%
rename from src/main/java/com/linkedin/kmf/services/configs/ProduceServiceConfig.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/configs/ProduceServiceConfig.java
index 0a613971..1c434193 100644
--- a/src/main/java/com/linkedin/kmf/services/configs/ProduceServiceConfig.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/configs/ProduceServiceConfig.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,10 +7,11 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services.configs;
 
-import com.linkedin.kmf.partitioner.NewKMPartitioner;
-import com.linkedin.kmf.producer.NewProducer;
+package com.linkedin.xinfra.monitor.services.configs;
+
+import com.linkedin.xinfra.monitor.partitioner.NewKMPartitioner;
+import com.linkedin.xinfra.monitor.producer.NewProducer;
 import java.util.Map;
 import org.apache.kafka.common.config.AbstractConfig;
 import org.apache.kafka.common.config.ConfigDef;
@@ -55,6 +56,20 @@ public class ProduceServiceConfig extends AbstractConfig {
   public static final String PRODUCER_PROPS_CONFIG = "produce.producer.props";
   public static final String PRODUCER_PROPS_DOC = "The properties used to config producer in produce service.";
 
+  public static final String LATENCY_PERCENTILE_MAX_MS_CONFIG = "produce.latency.percentile.max.ms";
+  public static final String LATENCY_PERCENTILE_MAX_MS_DOC = "This is used to derive the bucket number used to configure latency percentile metric. "
+                                                             + "Any latency larger than this max value will be rounded down to the max value.";
+
+  public static final String LATENCY_PERCENTILE_GRANULARITY_MS_CONFIG = "produce.latency.percentile.granularity.ms";
+  public static final String LATENCY_PERCENTILE_GRANULARITY_MS_DOC = "This is used to derive the bucket number used to configure latency percentile metric. "
+                                                                     + "The latency at the specified percentile should be multiple of this value.";
+
+  public static final String PRODUCER_TREAT_ZERO_THROUGHPUT_AS_UNAVAILABLE_CONFIG = "produce.treat.zero.throughput.as.unavailable";
+  public static final String PRODUCER_TREAT_ZERO_THROUGHPUT_AS_UNAVAILABLE_DOC = "If it is set to true, produce availability is set to 0 " +
+      "if no message can be produced, regardless of whether there is exception. If this is set to false, availability will only drop below 1 if there is exception " +
+      "thrown from producer. Depending on the producer configuration, it may take a few minutes for producer to be blocked before it throws exception. Advanced user " +
+      "may want to set this flag to false to exactly measure the availability experienced by users";
+
   static {
     CONFIG = new ConfigDef().define(ZOOKEEPER_CONNECT_CONFIG,
                                     ConfigDef.Type.STRING,
@@ -98,6 +113,21 @@ public class ProduceServiceConfig extends AbstractConfig {
                                     100,
                                     ConfigDef.Importance.LOW,
                                     PRODUCE_RECORD_SIZE_BYTE_DOC)
+                            .define(PRODUCER_TREAT_ZERO_THROUGHPUT_AS_UNAVAILABLE_CONFIG,
+                                    ConfigDef.Type.BOOLEAN,
+                                    true,
+                                    ConfigDef.Importance.MEDIUM,
+                                    PRODUCER_TREAT_ZERO_THROUGHPUT_AS_UNAVAILABLE_DOC)
+                            .define(LATENCY_PERCENTILE_MAX_MS_CONFIG,
+                                    ConfigDef.Type.INT,
+                                    5000,
+                                    ConfigDef.Importance.LOW,
+                                    LATENCY_PERCENTILE_MAX_MS_DOC)
+                            .define(LATENCY_PERCENTILE_GRANULARITY_MS_CONFIG,
+                                    ConfigDef.Type.INT,
+                                    1,
+                                    ConfigDef.Importance.LOW,
+                                    LATENCY_PERCENTILE_GRANULARITY_MS_DOC)
                             .define(PRODUCE_THREAD_NUM_CONFIG,
                                     ConfigDef.Type.INT,
                                     5,
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/configs/SignalFxMetricsReporterServiceConfig.java b/src/main/java/com/linkedin/xinfra/monitor/services/configs/SignalFxMetricsReporterServiceConfig.java
new file mode 100644
index 00000000..5a8e3e9a
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/configs/SignalFxMetricsReporterServiceConfig.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2018 SignalFx, Inc. Licensed under the Apache 2 License.
+ */
+
+package com.linkedin.xinfra.monitor.services.configs;
+
+import java.util.Collections;
+import java.util.Map;
+import org.apache.kafka.common.config.AbstractConfig;
+import org.apache.kafka.common.config.ConfigDef;
+
+/**
+ * key/value pair used for configuring SignalFxMetricsReporterService
+ *
+ */
+public class SignalFxMetricsReporterServiceConfig extends AbstractConfig {
+  private static final ConfigDef CONFIG;
+
+  public static final String REPORT_METRICS_CONFIG = "report.metrics.list";
+  public static final String REPORT_METRICS_DOC = CommonServiceConfig.REPORT_METRICS_DOC;
+
+  public static final String REPORT_INTERVAL_SEC_CONFIG = CommonServiceConfig.REPORT_INTERVAL_SEC_CONFIG;
+  public static final String REPORT_INTERVAL_SEC_DOC = CommonServiceConfig.REPORT_INTERVAL_SEC_DOC;
+
+  public static final String REPORT_SIGNALFX_URL = "report.signalfx.url";
+  public static final String REPORT_SIGNALFX_URL_DOC = "The url of signalfx server which SignalFxMetricsReporterService will report the metrics values.";
+
+  public static final String SIGNALFX_METRIC_DIMENSION = "report.metric.dimensions";
+  public static final String SIGNALFX_METRIC_DIMENSION_DOC = "Dimensions added to each metric. Example: {\"key1:value1\", \"key2:value2\"} ";
+
+  public static final String SIGNALFX_TOKEN = "report.signalfx.token";
+  public static final String SIGNALFX_TOKEN_DOC = "SignalFx access token";
+
+  static {
+    CONFIG = new ConfigDef().define(REPORT_METRICS_CONFIG,
+                                    ConfigDef.Type.LIST, Collections.singletonList("kmf.services:*:*"),
+                                    ConfigDef.Importance.MEDIUM,
+                                    REPORT_METRICS_DOC)
+                             .define(REPORT_INTERVAL_SEC_CONFIG,
+                                    ConfigDef.Type.INT,
+                                    1,
+                                    ConfigDef.Importance.LOW,
+                                    REPORT_INTERVAL_SEC_DOC)
+                             .define(REPORT_SIGNALFX_URL,
+                                    ConfigDef.Type.STRING,
+                                    "",
+                                    ConfigDef.Importance.LOW,
+                                    REPORT_SIGNALFX_URL_DOC)
+                             .define(SIGNALFX_TOKEN,
+                                    ConfigDef.Type.STRING,
+                                    "",
+                                    ConfigDef.Importance.HIGH,
+                                    SIGNALFX_TOKEN_DOC);
+  }
+
+  public SignalFxMetricsReporterServiceConfig(Map<?, ?> props) {
+    super(CONFIG, props);
+  }
+}
+
diff --git a/src/main/java/com/linkedin/kmf/services/configs/StatsdMetricsReporterServiceConfig.java b/src/main/java/com/linkedin/xinfra/monitor/services/configs/StatsdMetricsReporterServiceConfig.java
similarity index 67%
rename from src/main/java/com/linkedin/kmf/services/configs/StatsdMetricsReporterServiceConfig.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/configs/StatsdMetricsReporterServiceConfig.java
index a0b98efc..8438fd78 100644
--- a/src/main/java/com/linkedin/kmf/services/configs/StatsdMetricsReporterServiceConfig.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/configs/StatsdMetricsReporterServiceConfig.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -8,37 +8,16 @@
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  *
  *
- * In order to enable the StatsD metrics export, add the following section to kafka-monitor.properties file
+ * In order to enable the StatsD metrics export, add the following section to xinfra-monitor.properties file
  *
- ==========================================================================================
-  "statsd-service": {
-       "class.name": "com.linkedin.kmf.services.StatsdMetricsReporterService",
-       "report.statsd.host": "localhost",
-       "report.statsd.port": "8125",
-       "report.statsd.prefix": "kafka-monitor",
-       "report.interval.sec": 1,
-       "report.metrics.list": [
-       "kmf.services:type=produce-service,name=*:produce-availability-avg",
-       "kmf.services:type=consume-service,name=*:consume-availability-avg",
-       "kmf.services:type=produce-service,name=*:records-produced-total",
-       "kmf.services:type=consume-service,name=*:records-consumed-total",
-       "kmf.services:type=consume-service,name=*:records-lost-total",
-       "kmf.services:type=consume-service,name=*:records-duplicated-total",
-       "kmf.services:type=consume-service,name=*:records-delay-ms-avg",
-       "kmf.services:type=produce-service,name=*:records-produced-rate",
-       "kmf.services:type=produce-service,name=*:produce-error-rate",
-       "kmf.services:type=consume-service,name=*:consume-error-rate"
-     ]
-   }
- ==========================================================================================
  */
-package com.linkedin.kmf.services.configs;
 
-import org.apache.kafka.common.config.AbstractConfig;
-import org.apache.kafka.common.config.ConfigDef;
+package com.linkedin.xinfra.monitor.services.configs;
 
-import java.util.Arrays;
+import java.util.Collections;
 import java.util.Map;
+import org.apache.kafka.common.config.AbstractConfig;
+import org.apache.kafka.common.config.ConfigDef;
 
 public class StatsdMetricsReporterServiceConfig extends AbstractConfig {
   private static final ConfigDef CONFIG;
@@ -60,8 +39,7 @@ public class StatsdMetricsReporterServiceConfig extends AbstractConfig {
 
   static {
     CONFIG = new ConfigDef().define(REPORT_METRICS_CONFIG,
-                                    ConfigDef.Type.LIST,
-                                    Arrays.asList("kmf.services:*:*"),
+                                    ConfigDef.Type.LIST, Collections.singletonList("kmf.services:*:*"),
                                     ConfigDef.Importance.MEDIUM,
                                     REPORT_METRICS_DOC)
                             .define(REPORT_INTERVAL_SEC_CONFIG,
diff --git a/src/main/java/com/linkedin/kmf/services/configs/TopicManagementServiceConfig.java b/src/main/java/com/linkedin/xinfra/monitor/services/configs/TopicManagementServiceConfig.java
similarity index 68%
rename from src/main/java/com/linkedin/kmf/services/configs/TopicManagementServiceConfig.java
rename to src/main/java/com/linkedin/xinfra/monitor/services/configs/TopicManagementServiceConfig.java
index 36a1b947..8518ccf4 100644
--- a/src/main/java/com/linkedin/kmf/services/configs/TopicManagementServiceConfig.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/configs/TopicManagementServiceConfig.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,10 +7,11 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.services.configs;
 
-import com.linkedin.kmf.topicfactory.DefaultTopicFactory;
-import com.linkedin.kmf.topicfactory.TopicFactory;
+package com.linkedin.xinfra.monitor.services.configs;
+
+import com.linkedin.xinfra.monitor.topicfactory.DefaultTopicFactory;
+import com.linkedin.xinfra.monitor.topicfactory.TopicFactory;
 import java.util.Map;
 import org.apache.kafka.common.config.AbstractConfig;
 import org.apache.kafka.common.config.ConfigDef;
@@ -40,14 +41,24 @@ public class TopicManagementServiceConfig extends AbstractConfig {
       + " This config provides a loose lower bound on the partition number of the monitor topic when the topic is created or when partition is added.";
 
   public static final String TOPIC_REPLICATION_FACTOR_CONFIG = "topic-management.replicationFactor";
-  public static final String TOPIC_REPLICATION_FACTOR_DOC = "When a topic is created automatically this is the "
-      + "replication factor used.";
+  public static final String TOPIC_REPLICATION_FACTOR_DOC = "This replication factor is used to create the monitor topic. "
+      + "The larger one of the current replication factor and the configured replication factor is used to expand partition "
+      + "of the monitor topic.";
 
   public static final String TOPIC_CREATION_ENABLED_CONFIG = "topic-management.topicCreationEnabled";
   public static final String TOPIC_CREATION_ENABLED_DOC = String.format("When true this service automatically creates the topic named"
       + " in the config with replication factor %s and min ISR as max(%s - 1, 1). The partition number is determined based on %s and %s",
       TOPIC_REPLICATION_FACTOR_CONFIG, TOPIC_REPLICATION_FACTOR_CONFIG, PARTITIONS_TO_BROKERS_RATIO_CONFIG, MIN_PARTITION_NUM_DOC);
 
+  public static final String TOPIC_ADD_PARTITION_ENABLED_CONFIG = "topic-management.topicAddPartitionEnabled";
+  public static final String TOPIC_ADD_PARTITION_ENABLED_DOC = String.format("When true this service automatically add topic partition(s) "
+      + "if the current topic partition count is smaller than the partition number which is determined based on %s and %s",
+          PARTITIONS_TO_BROKERS_RATIO_CONFIG, MIN_PARTITION_NUM_DOC);
+
+  public static final String TOPIC_REASSIGN_PARTITION_AND_ELECT_LEADER_ENABLED_CONFIG = "topic-management.topicReassignPartitionAndElectLeaderEnabled";
+  public static final String TOPIC_REASSIGN_PARTITION_AND_ELECT_LEADER_ENABLED_DOC = "When true this service automatically balance topic partitions in"
+      + " a cluster to ensure a minimum number of leader replicas on each alive broker.";
+
   public static final String TOPIC_FACTORY_CLASS_CONFIG = "topic-management.topicFactory.class.name";
   public static final String TOPIC_FACTORY_CLASS_DOC = "The name of the class used to create topics.  This class must implement "
       + TopicFactory.class.getName() + ".";
@@ -58,8 +69,16 @@ public class TopicManagementServiceConfig extends AbstractConfig {
   public static final String TOPIC_PROPS_CONFIG = "topic-management.topic.props";
   public static final String TOPIC_PROPS_DOC = "A configuration map for the topic";
 
+  public static final String TOPIC_MANAGEMENT_ENABLED_CONFIG = "topic-management.topicManagementEnabled";
+  public static final String TOPIC_MANAGEMENT_ENABLED_DOC = "Boolean switch for enabling Topic Management Service";
+
   static {
     CONFIG = new ConfigDef()
+      .define(TOPIC_MANAGEMENT_ENABLED_CONFIG,
+              ConfigDef.Type.BOOLEAN,
+              true,
+              ConfigDef.Importance.HIGH,
+              TOPIC_MANAGEMENT_ENABLED_DOC)
       .define(ZOOKEEPER_CONNECT_CONFIG,
               ConfigDef.Type.STRING,
               ConfigDef.Importance.HIGH,
@@ -83,6 +102,16 @@ public class TopicManagementServiceConfig extends AbstractConfig {
               true,
               ConfigDef.Importance.LOW,
               TOPIC_CREATION_ENABLED_DOC)
+      .define(TOPIC_ADD_PARTITION_ENABLED_CONFIG,
+              ConfigDef.Type.BOOLEAN,
+              true,
+              ConfigDef.Importance.LOW,
+              TOPIC_ADD_PARTITION_ENABLED_DOC)
+      .define(TOPIC_REASSIGN_PARTITION_AND_ELECT_LEADER_ENABLED_CONFIG,
+              ConfigDef.Type.BOOLEAN,
+              true,
+              ConfigDef.Importance.LOW,
+              TOPIC_REASSIGN_PARTITION_AND_ELECT_LEADER_ENABLED_DOC)
       .define(TOPIC_REPLICATION_FACTOR_CONFIG,
               ConfigDef.Type.INT,
               1,
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/metrics/ClusterTopicManipulationMetrics.java b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/ClusterTopicManipulationMetrics.java
new file mode 100644
index 00000000..947177e9
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/ClusterTopicManipulationMetrics.java
@@ -0,0 +1,126 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services.metrics;
+
+import java.util.Map;
+import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.stats.Avg;
+import org.apache.kafka.common.metrics.stats.Max;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Metrics sub-class for Cluster Topic Manipulation Service that extends the parent class XinfraMonitorMetrics.
+ */
+public class ClusterTopicManipulationMetrics extends XinfraMonitorMetrics {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(ClusterTopicManipulationMetrics.class);
+  private final Sensor _topicCreationSensor;
+  private final Sensor _topicDeletionSensor;
+  private long _topicCreationStartTimeMs;
+  private long _topicDeletionStartTimeMs;
+  public static final String METRIC_GROUP_NAME = "cluster-topic-manipulation-service";
+
+  /**
+   *
+   * @param metrics a named, numerical measurement. sensor is a handle to record numerical measurements as they occur.
+   * @param tags metrics/sensor's tags
+   */
+  public ClusterTopicManipulationMetrics(final Metrics metrics, final Map<String, String> tags) {
+    super(metrics, tags);
+    _topicCreationSensor = metrics.sensor("topic-creation-metadata-propagation");
+    _topicDeletionSensor = metrics.sensor("topic-deletion-metadata-propagation");
+    _topicCreationSensor.add(new MetricName("topic-creation-metadata-propagation-ms-avg", METRIC_GROUP_NAME,
+        "The average propagation duration in ms of propagating topic creation data and metadata to all brokers in the cluster",
+        tags), new Avg());
+    _topicCreationSensor.add(new MetricName("topic-creation-metadata-propagation-ms-max", METRIC_GROUP_NAME,
+        "The maximum propagation time in ms of propagating topic creation data and metadata to all brokers in the cluster",
+        tags), new Max());
+    _topicDeletionSensor.add(new MetricName("topic-deletion-metadata-propagation-ms-avg", METRIC_GROUP_NAME,
+        "The average propagation duration in milliseconds of propagating the topic deletion data and metadata "
+            + "across all the brokers in the cluster.", tags), new Avg());
+    _topicDeletionSensor.add(new MetricName("topic-deletion-metadata-propagation-ms-max", METRIC_GROUP_NAME,
+        "The maximum propagation time in milliseconds of propagating the topic deletion data and metadata "
+            + "across all the brokers in the cluster.", tags), new Max());
+
+    LOGGER.debug("{} constructor was initialized successfully.", "ClusterTopicManipulationMetrics");
+  }
+
+  /**
+   * start measuring the topic creation process and its RPC (remote programmable client)
+   */
+  public void startTopicCreationMeasurement() {
+    this.setTopicCreationStartTimeMs(System.currentTimeMillis());
+    LOGGER.debug("Started measuring.");
+  }
+
+  public void startTopicDeletionMeasurement() {
+    this.setTopicDeletionStartTimeMs(System.currentTimeMillis());
+    LOGGER.debug("Started measuring the cluster topic deletion process.");
+  }
+
+  /**
+   *
+   * @param millis time in milliseconds in long data type
+   */
+  void setTopicCreationStartTimeMs(long millis) {
+    _topicCreationStartTimeMs = millis;
+  }
+
+  /**
+   *
+   * @param millis time in milli-seconds as a long data type
+   */
+  void setTopicDeletionStartTimeMs(long millis) {
+    _topicDeletionStartTimeMs = millis;
+  }
+
+  /**
+   *
+   */
+  public void finishTopicCreationMeasurement() {
+    long completedMs = System.currentTimeMillis();
+    long startMs = this.topicCreationStartTimeMs();
+    this._topicCreationSensor.record(completedMs - startMs);
+
+    LOGGER.debug("Finished measuring topic creation.");
+  }
+
+  public void finishTopicDeletionMeasurement() {
+    long completeMs = System.currentTimeMillis();
+    long startMs = this.topicDeletionStartTimeMs();
+    this._topicDeletionSensor.record(completeMs - startMs);
+
+    LOGGER.debug("Finished measuring topic deletion");
+  }
+
+  /**
+   *
+   * @return the _topicCreationStartTimeMs as a long data type
+   */
+  private long topicCreationStartTimeMs() {
+    return _topicCreationStartTimeMs;
+  }
+
+  private long topicDeletionStartTimeMs() {
+    return _topicDeletionStartTimeMs;
+  }
+
+  @Override
+  public String toString() {
+    return this.getClass().getSimpleName();
+  }
+}
+
+
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/metrics/CommitAvailabilityMetrics.java b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/CommitAvailabilityMetrics.java
new file mode 100644
index 00000000..9643a8d0
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/CommitAvailabilityMetrics.java
@@ -0,0 +1,61 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services.metrics;
+
+import java.util.Map;
+import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.metrics.MetricConfig;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.stats.CumulativeSum;
+import org.apache.kafka.common.metrics.stats.Rate;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class CommitAvailabilityMetrics {
+
+  private static final String METRIC_GROUP_NAME = "commit-availability-service";
+  private static final Logger LOG = LoggerFactory.getLogger(CommitAvailabilityMetrics.class);
+  public final Sensor _offsetsCommitted;
+  public final Sensor _failedCommitOffsets;
+
+  /**
+   * Metrics for Calculating the offset commit availability of a consumer.
+   * @param metrics the commit offset metrics
+   * @param tags the tags associated, i.e) kmf.services:name=single-cluster-monitor
+   */
+  public CommitAvailabilityMetrics(final Metrics metrics, final Map<String, String> tags) {
+    LOG.info("{} called.", this.getClass().getSimpleName());
+    _offsetsCommitted = metrics.sensor("offsets-committed");
+    _offsetsCommitted.add(new MetricName("offsets-committed-total", METRIC_GROUP_NAME,
+        "The total number of offsets per second that are committed.", tags), new CumulativeSum());
+
+    _failedCommitOffsets = metrics.sensor("failed-commit-offsets");
+    _failedCommitOffsets.add(new MetricName("failed-commit-offsets-avg", METRIC_GROUP_NAME,
+        "The average number of offsets per second that have failed.", tags), new Rate());
+    _failedCommitOffsets.add(new MetricName("failed-commit-offsets-total", METRIC_GROUP_NAME,
+        "The total number of offsets per second that have failed.", tags), new CumulativeSum());
+
+    metrics.addMetric(new MetricName("offsets-committed-avg", METRIC_GROUP_NAME, "The average offset commits availability.", tags),
+      (MetricConfig config, long now) -> {
+        Object offsetCommitTotal = metrics.metrics().get(metrics.metricName("offsets-committed-total", METRIC_GROUP_NAME, tags)).metricValue();
+        Object offsetCommitFailTotal = metrics.metrics().get(metrics.metricName("failed-commit-offsets-total", METRIC_GROUP_NAME, tags)).metricValue();
+        if (offsetCommitTotal != null && offsetCommitFailTotal != null) {
+          double offsetsCommittedCount = (double) offsetCommitTotal;
+          double offsetsCommittedErrorCount = (double) offsetCommitFailTotal;
+          return offsetsCommittedCount / (offsetsCommittedCount + offsetsCommittedErrorCount);
+        } else {
+          return 0;
+        }
+      });
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/metrics/CommitLatencyMetrics.java b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/CommitLatencyMetrics.java
new file mode 100644
index 00000000..495ea545
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/CommitLatencyMetrics.java
@@ -0,0 +1,105 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services.metrics;
+
+import java.util.Map;
+import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.stats.Avg;
+import org.apache.kafka.common.metrics.stats.Max;
+import org.apache.kafka.common.metrics.stats.Percentile;
+import org.apache.kafka.common.metrics.stats.Percentiles;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * The CommitLatencyMetrics class contains methods that measures and
+ * determines the latency of Kafka consumer offset commit().
+ */
+public class CommitLatencyMetrics {
+  private static final String METRIC_GROUP_NAME = "commit-latency-service";
+  private static final Logger LOG = LoggerFactory.getLogger(CommitLatencyMetrics.class);
+  private final Sensor _commitOffsetLatency;
+  private long _commitStartTimeMs;
+  private volatile boolean _inProgressCommit;
+
+  /**
+   * Metrics for Calculating the offset commit latency of a consumer.
+   * @param metrics the commit offset metrics
+   * @param tags the tags associated, i.e) kmf.services:name=single-cluster-monitor
+   */
+  public CommitLatencyMetrics(Metrics metrics, Map<String, String> tags, int latencyPercentileMaxMs,
+      int latencyPercentileGranularityMs) {
+    _inProgressCommit = false;
+    _commitOffsetLatency = metrics.sensor("commit-offset-latency");
+    _commitOffsetLatency.add(new MetricName("commit-offset-latency-ms-avg", METRIC_GROUP_NAME, "The average latency in ms of committing offset", tags), new Avg());
+    _commitOffsetLatency.add(new MetricName("commit-offset-latency-ms-max", METRIC_GROUP_NAME, "The maximum latency in ms of committing offset", tags), new Max());
+
+    if (latencyPercentileGranularityMs == 0) {
+      throw new IllegalArgumentException("The latency percentile granularity was incorrectly passed a zero value.");
+    }
+
+    // 2 extra buckets exist which are respectively designated for values which are less than 0.0 or larger than max.
+    int bucketNum = latencyPercentileMaxMs / latencyPercentileGranularityMs + 2;
+    int sizeInBytes = bucketNum * 4;
+    _commitOffsetLatency.add(new Percentiles(sizeInBytes, latencyPercentileMaxMs, Percentiles.BucketSizing.CONSTANT,
+        new Percentile(new MetricName("commit-offset-latency-ms-99th", METRIC_GROUP_NAME, "The 99th percentile latency of committing offset", tags), 99.0),
+        new Percentile(new MetricName("commit-offset-latency-ms-999th", METRIC_GROUP_NAME, "The 99.9th percentile latency of committing offset", tags), 99.9),
+        new Percentile(new MetricName("commit-offset-latency-ms-9999th", METRIC_GROUP_NAME, "The 99.99th percentile latency of committing offset", tags), 99.99)));
+    LOG.info("{} was constructed successfully.", this.getClass().getSimpleName());
+  }
+
+  /**
+   * start the recording of consumer offset commit
+   */
+  public void recordCommitStart() {
+    if (!_inProgressCommit) {
+      this.setCommitStartTimeMs(System.currentTimeMillis());
+      _inProgressCommit = true;
+    } else {
+      // inProgressCommit is already set to TRUE;
+      LOG.debug("Offset commit is already in progress.");
+    }
+  }
+
+  /**
+   * finish the recording of consumer offset commit
+   */
+  public void recordCommitComplete() {
+    if (_inProgressCommit) {
+      long commitCompletedMs = System.currentTimeMillis();
+      long commitStartMs = this.commitStartTimeMs();
+      this._commitOffsetLatency.record(commitCompletedMs - commitStartMs);
+      _inProgressCommit = false;
+    } else {
+      // inProgressCommit is already set to FALSE;
+      LOG.debug("Offset commit is not in progress. CommitLatencyMetrics shouldn't completing a record commit here.");
+    }
+  }
+
+  /**
+   * set in milliseconds the start time of consumer offset commit
+   * @param time commit start time in ms
+   */
+  public void setCommitStartTimeMs(long time) {
+    _commitStartTimeMs = time;
+  }
+
+  /**
+   * retrieve the start time of consumer offset commit
+   * @return _commitStartTimeMs
+   */
+  public long commitStartTimeMs() {
+    return _commitStartTimeMs;
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/metrics/ConsumeMetrics.java b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/ConsumeMetrics.java
new file mode 100644
index 00000000..82d902e9
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/ConsumeMetrics.java
@@ -0,0 +1,92 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services.metrics;
+
+import java.util.Map;
+import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.stats.Avg;
+import org.apache.kafka.common.metrics.stats.CumulativeSum;
+import org.apache.kafka.common.metrics.stats.Max;
+import org.apache.kafka.common.metrics.stats.Percentile;
+import org.apache.kafka.common.metrics.stats.Percentiles;
+import org.apache.kafka.common.metrics.stats.Rate;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class ConsumeMetrics {
+  public final Sensor _consumeError;
+  public final Sensor _bytesConsumed;
+  public final Sensor _recordsConsumed;
+  public final Sensor _recordsDuplicated;
+  public final Sensor _recordsLost;
+  public final Sensor _recordsDelay;
+  public final Sensor _recordsDelayed;
+  private static final String METRIC_GROUP_NAME = "consume-service";
+  private static final Logger LOG = LoggerFactory.getLogger(ConsumeMetrics.class);
+
+  public ConsumeMetrics(final Metrics metrics, Map<String, String> tags, int latencyPercentileMaxMs,
+      int latencyPercentileGranularityMs) {
+
+    _bytesConsumed = metrics.sensor("bytes-consumed");
+    _bytesConsumed.add(new MetricName("bytes-consumed-rate", METRIC_GROUP_NAME, "The average number of bytes per second that are consumed", tags), new Rate());
+
+    _consumeError = metrics.sensor("consume-error");
+    _consumeError.add(new MetricName("consume-error-rate", METRIC_GROUP_NAME, "The average number of errors per second", tags), new Rate());
+    _consumeError.add(new MetricName("consume-error-total", METRIC_GROUP_NAME, "The total number of errors", tags), new CumulativeSum());
+
+    _recordsConsumed = metrics.sensor("records-consumed");
+    _recordsConsumed.add(new MetricName("records-consumed-rate", METRIC_GROUP_NAME, "The average number of records per second that are consumed", tags), new Rate());
+    _recordsConsumed.add(new MetricName("records-consumed-total", METRIC_GROUP_NAME, "The total number of records that are consumed", tags), new CumulativeSum());
+
+    _recordsDuplicated = metrics.sensor("records-duplicated");
+    _recordsDuplicated.add(new MetricName("records-duplicated-rate", METRIC_GROUP_NAME, "The average number of records per second that are duplicated", tags), new Rate());
+    _recordsDuplicated.add(new MetricName("records-duplicated-total", METRIC_GROUP_NAME, "The total number of records that are duplicated", tags), new CumulativeSum());
+
+    _recordsLost = metrics.sensor("records-lost");
+    _recordsLost.add(new MetricName("records-lost-rate", METRIC_GROUP_NAME, "The average number of records per second that are lost", tags), new Rate());
+    _recordsLost.add(new MetricName("records-lost-total", METRIC_GROUP_NAME, "The total number of records that are lost", tags), new CumulativeSum());
+
+    _recordsDelayed = metrics.sensor("records-delayed");
+    _recordsDelayed.add(new MetricName("records-delayed-rate", METRIC_GROUP_NAME, "The average number of records per second that are either lost or arrive after maximum allowed latency under SLA", tags), new Rate());
+    _recordsDelayed.add(new MetricName("records-delayed-total", METRIC_GROUP_NAME, "The total number of records that are either lost or arrive after maximum allowed latency under SLA", tags), new CumulativeSum());
+
+    _recordsDelay = metrics.sensor("records-delay");
+    _recordsDelay.add(new MetricName("records-delay-ms-avg", METRIC_GROUP_NAME, "The average latency of records from producer to consumer", tags), new Avg());
+    _recordsDelay.add(new MetricName("records-delay-ms-max", METRIC_GROUP_NAME, "The maximum latency of records from producer to consumer", tags), new Max());
+
+    // There are 2 extra buckets use for values smaller than 0.0 or larger than max, respectively.
+    int bucketNum = latencyPercentileMaxMs / latencyPercentileGranularityMs + 2;
+    int sizeInBytes = 4 * bucketNum;
+    _recordsDelay.add(new Percentiles(sizeInBytes, latencyPercentileMaxMs, Percentiles.BucketSizing.CONSTANT,
+        new Percentile(new MetricName("records-delay-ms-99th", METRIC_GROUP_NAME, "The 99th percentile latency of records from producer to consumer", tags), 99.0),
+        new Percentile(new MetricName("records-delay-ms-999th", METRIC_GROUP_NAME, "The 99.9th percentile latency of records from producer to consumer", tags), 99.9),
+        new Percentile(new MetricName("records-delay-ms-9999th", METRIC_GROUP_NAME, "The 99.99th percentile latency of records from producer to consumer", tags), 99.99)));
+
+    metrics.addMetric(new MetricName("consume-availability-avg", METRIC_GROUP_NAME, "The average consume availability", tags),
+      (config, now) -> {
+        double recordsConsumedRate = (double) metrics.metrics().get(metrics.metricName("records-consumed-rate", METRIC_GROUP_NAME, tags)).metricValue();
+        double recordsLostRate = (double) metrics.metrics().get(metrics.metricName("records-lost-rate", METRIC_GROUP_NAME, tags)).metricValue();
+        double recordsDelayedRate = (double) metrics.metrics().get(metrics.metricName("records-delayed-rate", METRIC_GROUP_NAME, tags)).metricValue();
+
+        if (new Double(recordsLostRate).isNaN())
+          recordsLostRate = 0;
+        if (new Double(recordsDelayedRate).isNaN())
+          recordsDelayedRate = 0;
+
+        return recordsConsumedRate + recordsLostRate > 0
+            ? (recordsConsumedRate - recordsDelayedRate) / (recordsConsumedRate + recordsLostRate) : 0;
+      }
+    );
+  }
+}
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/metrics/OffsetCommitServiceMetrics.java b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/OffsetCommitServiceMetrics.java
new file mode 100644
index 00000000..b6a6e753
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/OffsetCommitServiceMetrics.java
@@ -0,0 +1,113 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services.metrics;
+
+import java.util.Map;
+import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.metrics.Measurable;
+import org.apache.kafka.common.metrics.MetricConfig;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.stats.Avg;
+import org.apache.kafka.common.metrics.stats.CumulativeSum;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class OffsetCommitServiceMetrics extends XinfraMonitorMetrics {
+
+  private final Sensor _offsetCommittedSensor;
+  private final Sensor _offsetCommitFailSensor;
+  private static final Logger LOGGER = LoggerFactory.getLogger(OffsetCommitServiceMetrics.class);
+  private static final String METRIC_GROUP_NAME = "offset-commit-service";
+  private static final String SUCCESS_SENSOR_NAME = "offset-commit-service-success";
+  private static final String SUCCESS_RATE_METRIC = "offset-commit-service-success-rate";
+  private static final String SUCCESS_METRIC_TOTAL = "offset-commit-service-success-total";
+  private static final String FAILURE_SENSOR_NAME = "offset-commit-service-failure";
+  private static final String FAILURE_RATE_METRIC = "offset-commit-service-failure-rate";
+  private static final String FAILURE_METRIC_TOTAL = "offset-commit-service-failure-total";
+
+  /**
+   *
+   * @param metrics a named, numerical measurement.
+   *                Sensor is a handle to record numerical measurements as they occur.
+   * @param tags metrics/sensor's tags
+   */
+  public OffsetCommitServiceMetrics(final Metrics metrics, final Map<String, String> tags) {
+    super(metrics, tags);
+    _offsetCommittedSensor = metrics.sensor(SUCCESS_SENSOR_NAME);
+    _offsetCommittedSensor.add(new MetricName(SUCCESS_RATE_METRIC, METRIC_GROUP_NAME,
+        "The success rate of group coordinator accepting consumer offset commit requests.", tags), new Avg());
+    _offsetCommittedSensor.add(new MetricName(SUCCESS_METRIC_TOTAL, METRIC_GROUP_NAME,
+            "The total count of group coordinator successfully accepting consumer offset commit requests.", tags),
+        new CumulativeSum());
+
+    _offsetCommitFailSensor = metrics.sensor(FAILURE_SENSOR_NAME);
+    /* NaN will persist as long as no record is submitted to the failure sensor.
+       we'll continue with NaN for now since we'd rather that the Sensor itself is a true and unaltered record of what values it recorded. */
+    _offsetCommitFailSensor.add(new MetricName(FAILURE_RATE_METRIC, METRIC_GROUP_NAME,
+        "The failure rate of group coordinator accepting consumer offset commit requests.", tags), new Avg());
+    _offsetCommitFailSensor.add(new MetricName(FAILURE_METRIC_TOTAL, METRIC_GROUP_NAME,
+            "The total count of group coordinator unsuccessfully receiving consumer offset commit requests.", tags),
+        new CumulativeSum());
+
+    Measurable measurable = new Measurable() {
+      @Override
+      public double measure(MetricConfig config, long now) {
+        double offsetCommitSuccessRate = (double) metrics.metrics()
+            .get(metrics.metricName(SUCCESS_RATE_METRIC, METRIC_GROUP_NAME, tags))
+            .metricValue();
+        double offsetCommitFailureRate = (double) metrics.metrics()
+            .get(metrics.metricName(FAILURE_RATE_METRIC, METRIC_GROUP_NAME, tags))
+            .metricValue();
+
+        if (new Double(offsetCommitSuccessRate).isNaN()) {
+          offsetCommitSuccessRate = 0;
+        }
+
+        if (new Double(offsetCommitFailureRate).isNaN()) {
+          offsetCommitFailureRate = 0;
+        }
+
+        return offsetCommitSuccessRate + offsetCommitFailureRate > 0 ? offsetCommitSuccessRate / (
+            offsetCommitSuccessRate + offsetCommitFailureRate) : 0;
+      }
+    };
+
+    metrics.addMetric(new MetricName("offset-commit-availability-avg", METRIC_GROUP_NAME,
+        "The average offset commit availability with respect to the group coordinator.", tags), measurable);
+  }
+
+  /**
+   * start measuring and its RPC (remote programmable client)
+   */
+  public void recordSuccessful() {
+    _offsetCommittedSensor.record();
+    LOGGER.debug("recorded successful.");
+  }
+
+  public void recordFailed() {
+    _offsetCommitFailSensor.record();
+    LOGGER.error("The offset commit failed due to the response future failing and the future NOT being retriable.");
+  }
+
+  public void recordUnavailable() {
+    _offsetCommitFailSensor.record();
+    LOGGER.error("The offset commit failed due to coordinator being unavailable.");
+  }
+
+  @Override
+  public String toString() {
+    return this.getClass().getSimpleName();
+  }
+}
+
+
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/metrics/ProduceMetrics.java b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/ProduceMetrics.java
new file mode 100644
index 00000000..1ce7202e
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/ProduceMetrics.java
@@ -0,0 +1,139 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services.metrics;
+
+import com.linkedin.xinfra.monitor.XinfraMonitorConstants;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.stats.Avg;
+import org.apache.kafka.common.metrics.stats.CumulativeSum;
+import org.apache.kafka.common.metrics.stats.Max;
+import org.apache.kafka.common.metrics.stats.Percentile;
+import org.apache.kafka.common.metrics.stats.Percentiles;
+import org.apache.kafka.common.metrics.stats.Rate;
+
+
+public class ProduceMetrics {
+
+  public final Metrics _metrics;
+  public final Sensor _recordsProduced;
+  public final Sensor _produceError;
+  public final Sensor _produceDelay;
+  public final ConcurrentMap<Integer, Sensor> _recordsProducedPerPartition;
+  public final ConcurrentMap<Integer, Sensor> _produceErrorPerPartition;
+  public final ConcurrentMap<Integer, Boolean> _produceErrorInLastSendPerPartition;
+  private final Map<String, String> _tags;
+
+  public ProduceMetrics(final Metrics metrics, final Map<String, String> tags, int latencyPercentileGranularityMs,
+      int latencyPercentileMaxMs, AtomicInteger partitionNumber, boolean treatZeroThroughputAsUnavailable) {
+    _metrics = metrics;
+    _tags = tags;
+
+    _recordsProducedPerPartition = new ConcurrentHashMap<>();
+    _produceErrorPerPartition = new ConcurrentHashMap<>();
+    _produceErrorInLastSendPerPartition = new ConcurrentHashMap<>();
+
+    _recordsProduced = metrics.sensor("records-produced");
+    _recordsProduced.add(
+        new MetricName("records-produced-rate", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
+            "The average number of records per second that are produced", tags), new Rate());
+    _recordsProduced.add(
+        new MetricName("records-produced-total", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
+            "The total number of records that are produced", tags), new CumulativeSum());
+
+    _produceError = metrics.sensor("produce-error");
+    _produceError.add(new MetricName("produce-error-rate", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
+        "The average number of errors per second", tags), new Rate());
+    _produceError.add(new MetricName("produce-error-total", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
+        "The total number of errors", tags), new CumulativeSum());
+
+    _produceDelay = metrics.sensor("produce-delay");
+    _produceDelay.add(new MetricName("produce-delay-ms-avg", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
+        "The average delay in ms for produce request", tags), new Avg());
+    _produceDelay.add(new MetricName("produce-delay-ms-max", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
+        "The maximum delay in ms for produce request", tags), new Max());
+
+    // There are 2 extra buckets use for values smaller than 0.0 or larger than max, respectively.
+    int bucketNum = latencyPercentileMaxMs / latencyPercentileGranularityMs + 2;
+    int sizeInBytes = 4 * bucketNum;
+    _produceDelay.add(new Percentiles(sizeInBytes, latencyPercentileMaxMs, Percentiles.BucketSizing.CONSTANT,
+        new Percentile(new MetricName("produce-delay-ms-99th", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
+            "The 99th percentile delay in ms for produce request", tags), 99.0), new Percentile(
+        new MetricName("produce-delay-ms-999th", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
+            "The 99.9th percentile delay in ms for produce request", tags), 99.9), new Percentile(
+        new MetricName("produce-delay-ms-9999th", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
+            "The 99.99th percentile delay in ms for produce request", tags), 99.99)));
+
+    metrics.addMetric(
+        new MetricName("produce-availability-avg", XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
+            "The average produce availability", tags), (config, now) -> {
+        double availabilitySum = 0.0;
+        int partitionNum = partitionNumber.get();
+        for (int partition = 0; partition < partitionNum; partition++) {
+          double recordsProduced = (double) metrics.metrics()
+              .get(metrics.metricName("records-produced-rate-partition-" + partition,
+                  XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE, tags))
+              .metricValue();
+          double produceError = (double) metrics.metrics()
+              .get(metrics.metricName("produce-error-rate-partition-" + partition,
+                  XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE, tags))
+              .metricValue();
+          // If there is no error, error rate sensor may expire and the value may be NaN. Treat NaN as 0 for error rate.
+          if (Double.isNaN(produceError) || Double.isInfinite(produceError)) {
+            produceError = 0;
+          }
+          // If there is either succeeded or failed produce to a partition, consider its availability as 0.
+          if (recordsProduced + produceError > 0) {
+            availabilitySum += recordsProduced / (recordsProduced + produceError);
+          } else if (!treatZeroThroughputAsUnavailable) {
+            // If user configures treatZeroThroughputAsUnavailable to be false, a partition's availability
+            // is 1.0 as long as there is no exception thrown from producer.
+            // This allows kafka admin to exactly monitor the availability experienced by Kafka users which
+            // will block and retry for a certain amount of time based on its configuration (e.g. retries, retry.backoff.ms).
+            // Note that if it takes a long time for messages to be retries and sent, the latency in the ConsumeService
+            // will increase and it will reduce ConsumeAvailability if the latency exceeds consume.latency.sla.ms
+            // If timeout is set to more than 60 seconds (the current samples window duration),
+            // the error sample might be expired before the next error can be produced.
+            // In order to detect offline partition with high producer timeout config, the error status during last
+            // send is also checked before declaring 1.0 availability for the partition.
+            Boolean lastSendError = _produceErrorInLastSendPerPartition.get(partition);
+            if (lastSendError == null || !lastSendError) {
+              availabilitySum += 1.0;
+            }
+          }
+        }
+
+        // Assign equal weight to per-partition availability when calculating overall availability
+        return availabilitySum / partitionNum;
+      }
+    );
+  }
+
+  public void addPartitionSensors(int partition) {
+    Sensor recordsProducedSensor = _metrics.sensor("records-produced-partition-" + partition);
+    recordsProducedSensor.add(new MetricName("records-produced-rate-partition-" + partition,
+        XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
+        "The average number of records per second that are produced to this partition", _tags), new Rate());
+    _recordsProducedPerPartition.put(partition, recordsProducedSensor);
+
+    Sensor errorsSensor = _metrics.sensor("produce-error-partition-" + partition);
+    errorsSensor.add(new MetricName("produce-error-rate-partition-" + partition,
+        XinfraMonitorConstants.METRIC_GROUP_NAME_PRODUCE_SERVICE,
+        "The average number of errors per second when producing to this partition", _tags), new Rate());
+    _produceErrorPerPartition.put(partition, errorsSensor);
+  }
+}
+
diff --git a/src/main/java/com/linkedin/xinfra/monitor/services/metrics/XinfraMonitorMetrics.java b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/XinfraMonitorMetrics.java
new file mode 100644
index 00000000..a6cc8cee
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/services/metrics/XinfraMonitorMetrics.java
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services.metrics;
+
+import java.util.Map;
+import org.apache.kafka.common.metrics.Metrics;
+
+
+/**
+ * Parent class for Metrics child classes that can be extended by subclasses.
+ */
+class XinfraMonitorMetrics {
+
+  final Metrics _metrics;
+  final Map<String, String> _tags;
+
+  /**
+   *
+   * @param metrics a named, numerical measurement. sensor is a handle to record numerical measurements as they occur.
+   * @param tags metrics/sensor's tags
+   */
+  XinfraMonitorMetrics(Metrics metrics, Map<String, String> tags) {
+    _metrics = metrics;
+    _tags = tags;
+  }
+
+}
diff --git a/src/main/java/com/linkedin/kmf/tests/BasicEndToEndTest.java b/src/main/java/com/linkedin/xinfra/monitor/tests/BasicEndToEndTest.java
similarity index 63%
rename from src/main/java/com/linkedin/kmf/tests/BasicEndToEndTest.java
rename to src/main/java/com/linkedin/xinfra/monitor/tests/BasicEndToEndTest.java
index bd1ff8d1..4a8729e6 100644
--- a/src/main/java/com/linkedin/kmf/tests/BasicEndToEndTest.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/tests/BasicEndToEndTest.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,15 +7,19 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.tests;
 
-import com.linkedin.kmf.apps.SingleClusterMonitor;
-import com.linkedin.kmf.services.TopicManagementService;
-import com.linkedin.kmf.services.ConsumeService;
-import com.linkedin.kmf.services.ProduceService;
+package com.linkedin.xinfra.monitor.tests;
+
+import com.linkedin.xinfra.monitor.apps.SingleClusterMonitor;
+import com.linkedin.xinfra.monitor.services.ConsumeService;
+import com.linkedin.xinfra.monitor.services.ConsumerFactoryImpl;
+import com.linkedin.xinfra.monitor.services.ProduceService;
+import com.linkedin.xinfra.monitor.services.TopicManagementService;
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.TimeUnit;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import java.util.Map;
 
 
 /*
@@ -43,16 +47,24 @@ public class BasicEndToEndTest implements Test {
   public BasicEndToEndTest(Map<String, Object> props, String name) throws Exception {
     _name = name;
     _topicManagementService = new TopicManagementService(props, name);
+    CompletableFuture<Void> topicPartitionReady = _topicManagementService.topicPartitionResult();
     _produceService = new ProduceService(props, name);
-    _consumeService = new ConsumeService(props, name);
+    ConsumerFactoryImpl consumerFactory = new ConsumerFactoryImpl(props);
+    _consumeService = new ConsumeService(name, topicPartitionReady, consumerFactory);
   }
 
   @Override
   public void start() {
     _topicManagementService.start();
-    _produceService.start();
-    _consumeService.start();
-    LOG.info(_name + "/BasicEndToEndTest started");
+    CompletableFuture<Void> topicPartitionResult = _topicManagementService.topicPartitionResult();
+    topicPartitionResult.thenRun(() -> {
+      try {
+        _produceService.start();
+        _consumeService.start();
+      } finally {
+        LOG.info("{} /BasicEndToEndTest started.", _name);
+      }
+    });
   }
 
   @Override
@@ -60,7 +72,7 @@ public void stop() {
     _topicManagementService.stop();
     _produceService.stop();
     _consumeService.stop();
-    LOG.info(_name + "/BasicEndToEndTest stopped");
+    LOG.info("{} /BasicEndToEndTest stopped.", _name);
   }
 
   @Override
@@ -70,9 +82,9 @@ public boolean isRunning() {
 
   @Override
   public void awaitShutdown() {
-    _topicManagementService.awaitShutdown();
-    _produceService.awaitShutdown();
-    _consumeService.awaitShutdown();
+    _topicManagementService.awaitShutdown(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
+    _produceService.awaitShutdown(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
+    _consumeService.awaitShutdown(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
   }
 
   public static void main(String[] args) throws Exception {
diff --git a/src/main/java/com/linkedin/kmf/tests/Test.java b/src/main/java/com/linkedin/xinfra/monitor/tests/Test.java
similarity index 79%
rename from src/main/java/com/linkedin/kmf/tests/Test.java
rename to src/main/java/com/linkedin/xinfra/monitor/tests/Test.java
index 09e6a10a..6da22e66 100644
--- a/src/main/java/com/linkedin/kmf/tests/Test.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/tests/Test.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,9 +7,9 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.tests;
+package com.linkedin.xinfra.monitor.tests;
 
-import com.linkedin.kmf.apps.App;
+import com.linkedin.xinfra.monitor.apps.App;
 
 /**
  * @deprecated This interface has been deprecated and will be removed in a future release. Please use com.linkedin.kmf.apps.App instead.
diff --git a/src/main/java/com/linkedin/xinfra/monitor/topicfactory/DefaultTopicFactory.java b/src/main/java/com/linkedin/xinfra/monitor/topicfactory/DefaultTopicFactory.java
new file mode 100644
index 00000000..8b2ddef7
--- /dev/null
+++ b/src/main/java/com/linkedin/xinfra/monitor/topicfactory/DefaultTopicFactory.java
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.topicfactory;
+
+import com.linkedin.xinfra.monitor.common.Utils;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import org.apache.kafka.clients.admin.AdminClient;
+
+
+public class DefaultTopicFactory implements TopicFactory {
+
+  /** This constructor is required by TopicFactory but does nothing. */
+  public DefaultTopicFactory(Map<String, ?> config) {
+  }
+
+  @Override
+  public int createTopicIfNotExist(String topic, short replicationFactor, double partitionToBrokerRatio, Properties topicConfig, AdminClient adminClient)
+      throws ExecutionException, InterruptedException {
+    return Utils.createTopicIfNotExists(topic, replicationFactor, partitionToBrokerRatio, 1, topicConfig, adminClient);
+  }
+
+  @Override
+  public Set<Integer> getExcludedBrokers(AdminClient adminClient) {
+    return Collections.emptySet();
+  }
+}
diff --git a/src/main/java/com/linkedin/kmf/topicfactory/TopicFactory.java b/src/main/java/com/linkedin/xinfra/monitor/topicfactory/TopicFactory.java
similarity index 61%
rename from src/main/java/com/linkedin/kmf/topicfactory/TopicFactory.java
rename to src/main/java/com/linkedin/xinfra/monitor/topicfactory/TopicFactory.java
index 3f4c59f3..b3d0a706 100644
--- a/src/main/java/com/linkedin/kmf/topicfactory/TopicFactory.java
+++ b/src/main/java/com/linkedin/xinfra/monitor/topicfactory/TopicFactory.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  * file except in compliance with the License. You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
@@ -7,15 +7,19 @@
  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
-package com.linkedin.kmf.topicfactory;
+
+package com.linkedin.xinfra.monitor.topicfactory;
 
 import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import org.apache.kafka.clients.admin.AdminClient;
 
 
 /**
- * Constructs the monitor topic if it does not exist.
+ * Constructs the monitor topic if it does not exist, and provide blacklisted brokers info for topic management service
  *
- * Implementations of this class should have a public constructor with the following signature: <br/>
+ * Implementations of this class should have a public constructor with the following signature:
  *   Constructor(Map&lt;String, ?&gt; config) where config are additional configuration parameters passed in from the Kafka
  *   Monitor configuration.
  */
@@ -23,7 +27,6 @@ public interface TopicFactory {
 
   /**
    * Creates the specified topic if it does not exist.
-   * @param zkUrl zookeeper connection url
    * @param topic topic name
    * @param replicationFactor the replication factor for the topic
    * @param partitionToBrokerRatio This is multiplied by the number brokers to compute the number of partitions in the topic.
@@ -32,6 +35,13 @@ public interface TopicFactory {
    * @return The number of partitions for the specified topic.
    */
 
-  int createTopicIfNotExist(String zkUrl, String topic, int replicationFactor, double partitionToBrokerRatio, Properties topicProperties);
+  int createTopicIfNotExist(String topic, short replicationFactor, double partitionToBrokerRatio, Properties topicProperties, AdminClient adminClient)
+      throws ExecutionException, InterruptedException;
+
+  /**
+   * @param adminClient AdminClient object
+   * @return A set of brokers that don't take new partitions or reassigned partitions for topics.
+   */
+  Set<Integer> getExcludedBrokers(AdminClient adminClient);
 
 }
diff --git a/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java b/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java
deleted file mode 100644
index f1232d51..00000000
--- a/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java
+++ /dev/null
@@ -1,137 +0,0 @@
-/**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
- * file except in compliance with the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
- * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-package com.linkedin.kmf;
-
-import com.linkedin.kmf.services.Service;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertFalse;
-
-import java.util.concurrent.atomic.AtomicReference;
-import org.testng.annotations.Test;
-
-
-@Test
-public class KafkaMonitorTest {
-
-  @Test
-  public void lifecycleTest() throws Exception {
-    KafkaMonitor kafkaMonitor = kafkaMonitor();
-
-    // Nothing should be started
-    assertEquals(FakeService.startCount.get(), 0);
-    assertEquals(FakeService.stopCount.get(), 0);
-
-    // Should accept but ignore start because start has not been called
-    kafkaMonitor.stop();
-    assertEquals(FakeService.stopCount.get(), 0);
-
-    // Should start
-    kafkaMonitor.start();
-    assertEquals(FakeService.startCount.get(), 1);
-
-    // Should allow start to be called more than once
-    kafkaMonitor.stop();
-    kafkaMonitor.stop();
-    assertEquals(FakeService.startCount.get(), 1);
-    assertEquals(FakeService.stopCount.get(), 1);
-
-    // Should be allowed to shutdown more than once.
-    kafkaMonitor.awaitShutdown();
-    kafkaMonitor.awaitShutdown();
-  }
-
-  @Test
-  public void awaitShutdownOtherThread() throws Exception {
-    final KafkaMonitor kafkaMonitor = kafkaMonitor();
-    final AtomicReference<Throwable> error = new AtomicReference<>();
-
-    Thread t = new Thread("test awaitshutdown thread") {
-      @Override
-      public void run() {
-        try {
-          kafkaMonitor.awaitShutdown();
-        } catch (Throwable t) {
-          error.set(t);
-        }
-      }
-    };
-
-    t.start();
-    kafkaMonitor.start();
-    Thread.sleep(100);
-    kafkaMonitor.stop();
-    t.join(500);
-    assertFalse(t.isAlive());
-    assertEquals(error.get(), null);
-  }
-
-  private KafkaMonitor kafkaMonitor() throws Exception {
-    FakeService.clearCounters();
-    Map<String, Map> config = new HashMap<>();
-    Map<String, Object> fakeServiceConfig = new HashMap<>();
-    config.put("fake-service", fakeServiceConfig);
-    fakeServiceConfig.put(KafkaMonitor.CLASS_NAME_CONFIG, FakeService.class.getName());
-    return new KafkaMonitor(config);
-  }
-
-
-  static final class FakeService implements Service {
-
-    private static AtomicInteger startCount = new AtomicInteger();
-    private static AtomicInteger stopCount = new AtomicInteger();
-    private final AtomicBoolean _isRunning = new AtomicBoolean();
-
-    /** required */
-    public FakeService(Map<String, Map> config, String serviceInstanceName) {
-
-    }
-
-    private static void clearCounters() {
-      startCount.set(0);
-      stopCount.set(0);
-    }
-
-    @Override
-    public void start() {
-      _isRunning.compareAndSet(false, true);
-      startCount.incrementAndGet();
-    }
-
-    @Override
-    public synchronized void stop() {
-      _isRunning.compareAndSet(true, false);
-      stopCount.incrementAndGet();
-      this.notifyAll();
-    }
-
-    @Override
-    public boolean isRunning() {
-      return _isRunning.get();
-    }
-
-    @Override
-    public synchronized void awaitShutdown() {
-      try {
-        if (stopCount.get() == 0) {
-          wait(3_000);
-          if (stopCount.get() == 0) {
-            throw new IllegalStateException("Never notified.");
-          }
-        }
-      } catch (InterruptedException e) {
-        throw new IllegalStateException(e);
-      }
-    }
-  }
-}
diff --git a/src/test/java/com/linkedin/kmf/services/TopicManagementServiceTest.java b/src/test/java/com/linkedin/kmf/services/TopicManagementServiceTest.java
deleted file mode 100644
index 2e997c24..00000000
--- a/src/test/java/com/linkedin/kmf/services/TopicManagementServiceTest.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- * Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
- * file except in compliance with the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
- * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-package com.linkedin.kmf.services;
-
-import java.util.ArrayList;
-import java.util.List;
-import kafka.cluster.Broker;
-import org.apache.kafka.common.Node;
-import org.apache.kafka.common.PartitionInfo;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-import com.linkedin.kmf.services.MultiClusterTopicManagementService.TopicManagementHelper;
-
-@Test
-public class TopicManagementServiceTest {
-
-  private static final String TOPIC = "kmf-unit-test-topic";
-
-  private List<Broker> brokers(int brokerCount) {
-    List<Broker> brokers = new ArrayList<>();
-    for (int i = 0; i < brokerCount; i++) {
-      brokers.add(new Broker(i, "", -1, null));
-    }
-    return brokers;
-  }
-
-  private Node[] nodes(int brokerCount) {
-    Node[] nodes = new Node[brokerCount];
-    for (int i = 0; i < brokerCount; i++) {
-      nodes[i] = new Node(i, "", -1);
-    }
-    return nodes;
-  }
-
-  @Test
-  public void noDetection() {
-    List<PartitionInfo> partitions = new ArrayList<>();
-    Node[] node = nodes(2);
-    partitions.add(new PartitionInfo(TOPIC, 0, node[0], new Node[] {node[0], node[1]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 1, node[0], new Node[] {node[0], node[1]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 2, node[1], new Node[] {node[1], node[0]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 3, node[1], new Node[] {node[1], node[0]}, null));
-
-    Assert.assertFalse(TopicManagementHelper.someBrokerNotPreferredLeader(partitions, brokers(2)));
-    Assert.assertFalse(TopicManagementHelper.someBrokerNotElectedLeader(partitions, brokers(2)));
-  }
-
-  @Test
-  public void detectLowTotalNumberOfPartitions() {
-    List<PartitionInfo> partitions = new ArrayList<>();
-    Node[] node = nodes(3);
-    partitions.add(new PartitionInfo(TOPIC, 0, node[0], new Node[] {node[0], node[1]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 1, node[1], new Node[] {node[1], node[0]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 2, node[2], new Node[] {node[2], node[0]}, null));
-
-    Assert.assertFalse(TopicManagementHelper.someBrokerNotPreferredLeader(partitions, brokers(3)));
-    Assert.assertFalse(TopicManagementHelper.someBrokerNotElectedLeader(partitions, brokers(3)));
-    Assert.assertEquals(TopicManagementHelper.getReplicationFactor(partitions), 2);
-  }
-
-
-  @Test
-  public void detectBrokerWithoutLeader() {
-    List<PartitionInfo> partitions = new ArrayList<>();
-    Node[] node = nodes(3);
-    partitions.add(new PartitionInfo(TOPIC, 0, node[0], new Node[] {node[0], node[1]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 1, node[0], new Node[] {node[0], node[1]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 2, node[1], new Node[] {node[1], node[0]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 3, node[1], new Node[] {node[2], node[1]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 4, node[1], new Node[] {node[2], node[0]}, null));
-
-    Assert.assertFalse(TopicManagementHelper.someBrokerNotPreferredLeader(partitions, brokers(3)));
-    Assert.assertTrue(TopicManagementHelper.someBrokerNotElectedLeader(partitions, brokers(3)));
-  }
-
-  @Test
-  public void detectBrokerWithoutPreferredLeader() {
-    List<PartitionInfo> partitions = new ArrayList<>();
-    Node[] node = nodes(3);
-    partitions.add(new PartitionInfo(TOPIC, 0, node[0], new Node[] {node[0], node[1]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 1, node[0], new Node[] {node[0], node[1]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 2, node[1], new Node[] {node[0], node[0]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 3, node[1], new Node[] {node[2], node[1]}, null));
-    partitions.add(new PartitionInfo(TOPIC, 4, node[1], new Node[] {node[2], node[0]}, null));
-
-    Assert.assertTrue(TopicManagementHelper.someBrokerNotPreferredLeader(partitions, brokers(3)));
-    Assert.assertTrue(TopicManagementHelper.someBrokerNotElectedLeader(partitions, brokers(3)));
-  }
-}
diff --git a/src/test/java/com/linkedin/xinfra/monitor/XinfraMonitorTest.java b/src/test/java/com/linkedin/xinfra/monitor/XinfraMonitorTest.java
new file mode 100644
index 00000000..9867718d
--- /dev/null
+++ b/src/test/java/com/linkedin/xinfra/monitor/XinfraMonitorTest.java
@@ -0,0 +1,164 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor;
+
+import com.linkedin.xinfra.monitor.services.ServiceFactory;
+import com.linkedin.xinfra.monitor.services.Service;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+import org.testng.annotations.Test;
+
+
+@Test
+public class XinfraMonitorTest {
+
+  @Test
+  public void lifecycleTest() throws Exception {
+    XinfraMonitor xinfraMonitor = xinfraMonitor();
+
+    /* Nothing should be started */
+    org.testng.Assert.assertEquals(FakeService.START_COUNT.get(), 0);
+    org.testng.Assert.assertEquals(FakeService.STOP_COUNT.get(), 0);
+
+    /* Should accept but ignore start because start has not been called */
+
+    xinfraMonitor.stop();
+    org.testng.Assert.assertEquals(FakeService.STOP_COUNT.get(), 0);
+
+    /* Should start */
+    xinfraMonitor.start();
+    org.testng.Assert.assertEquals(FakeService.START_COUNT.get(), 1);
+
+    /* Should allow start to be called more than once */
+    xinfraMonitor.stop();
+    xinfraMonitor.stop();
+    org.testng.Assert.assertEquals(FakeService.START_COUNT.get(), 1);
+    org.testng.Assert.assertEquals(FakeService.STOP_COUNT.get(), 1);
+
+
+    /* Should be allowed to shutdown more than once. */
+    xinfraMonitor.awaitShutdown();
+    xinfraMonitor.awaitShutdown();
+  }
+
+  @Test
+  public void awaitShutdownOtherThread() throws Exception {
+    final XinfraMonitor xinfraMonitor = xinfraMonitor();
+    final AtomicReference<Throwable> error = new AtomicReference<>();
+
+    Thread t = new Thread("test awaitshutdown thread") {
+      @Override
+      public void run() {
+        try {
+          xinfraMonitor.awaitShutdown();
+        } catch (Throwable t) {
+          error.set(t);
+        }
+      }
+    };
+
+    t.start();
+    xinfraMonitor.start();
+    Thread.sleep(100);
+    xinfraMonitor.stop();
+    t.join(500);
+    org.testng.Assert.assertFalse(t.isAlive());
+    org.testng.Assert.assertEquals(error.get(), null);
+  }
+
+  private XinfraMonitor xinfraMonitor() throws Exception {
+    FakeService.clearCounters();
+    Map<String, Map> config = new HashMap<>();
+    Map<String, Object> fakeServiceConfig = new HashMap<>();
+
+    fakeServiceConfig.put(XinfraMonitorConstants.CLASS_NAME_CONFIG, FakeService.class.getName());
+    config.put("fake-service", fakeServiceConfig);
+    return new XinfraMonitor(config);
+
+  }
+
+  /**
+   * Factory class which instantiates a new FakeService service object.
+   */
+  @SuppressWarnings("rawtypes")
+  static final class FakeServiceFactory implements ServiceFactory {
+
+    private final Map _config;
+    private final String _serviceInstanceName;
+
+    public FakeServiceFactory(Map config, String serviceInstanceName) {
+
+      this._config = config;
+      this._serviceInstanceName = serviceInstanceName;
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public Service createService() throws Exception {
+
+      return new XinfraMonitorTest.FakeService(_config, _serviceInstanceName);
+
+    }
+  }
+
+  static final class FakeService implements Service {
+
+    private static final AtomicInteger START_COUNT = new AtomicInteger();
+    private static final AtomicInteger STOP_COUNT = new AtomicInteger();
+    private final AtomicBoolean _isRunning = new AtomicBoolean();
+
+    /** required */
+    public FakeService(Map<String, Map> config, String serviceInstanceName) {
+
+    }
+
+    private static void clearCounters() {
+      START_COUNT.set(0);
+      STOP_COUNT.set(0);
+    }
+
+    @Override
+    public void start() {
+      _isRunning.compareAndSet(false, true);
+      START_COUNT.incrementAndGet();
+    }
+
+    @Override
+    public synchronized void stop() {
+      _isRunning.compareAndSet(true, false);
+      STOP_COUNT.incrementAndGet();
+      notifyAll();
+    }
+
+    @Override
+    public boolean isRunning() {
+      return _isRunning.get();
+    }
+
+    @Override
+    public synchronized void awaitShutdown(long timeout, TimeUnit timeUnit) {
+      try {
+        if (STOP_COUNT.get() == 0) {
+          wait(3_000);
+          if (STOP_COUNT.get() == 0) {
+            throw new IllegalStateException("Never notified.");
+          }
+        }
+      } catch (InterruptedException e) {
+        throw new IllegalStateException(e);
+      }
+    }
+  }
+}
diff --git a/src/test/java/com/linkedin/xinfra/monitor/consumer/NewConsumerTest.java b/src/test/java/com/linkedin/xinfra/monitor/consumer/NewConsumerTest.java
new file mode 100644
index 00000000..da93a175
--- /dev/null
+++ b/src/test/java/com/linkedin/xinfra/monitor/consumer/NewConsumerTest.java
@@ -0,0 +1,103 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.consumer;
+
+import com.linkedin.xinfra.monitor.common.ConsumerGroupCoordinatorUtils;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.ExecutionException;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.admin.DescribeTopicsResult;
+import org.apache.kafka.clients.admin.TopicDescription;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.internals.KafkaFutureImpl;
+import org.apache.kafka.common.internals.Topic;
+import org.mockito.Mockito;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+
+@Test
+public class NewConsumerTest {
+  private static final int NUM_OFFSETS_TOPIC_PARTITIONS = 5;
+  private static final String TARGET_CONSUMER_GROUP_ID = "target-group-id";
+
+  @BeforeMethod
+  public void beforeMethod() {
+    System.out.println("Running beforeMethod of " + this.getClass());
+  }
+
+  @AfterMethod
+  public void afterMethod() {
+    System.out.println("Finished running testConsumerGroupCoordinatorHashing() of " + this.getClass());
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testConsumerGroupCoordinatorHashing() throws ExecutionException, InterruptedException {
+    Properties consumerProperties = new Properties();
+
+    AdminClient adminClient = Mockito.mock(AdminClient.class);
+
+    /*
+     * Mock the behavior of AdminClient only.
+     */
+    Mockito.when(adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME)))
+        .thenReturn(Mockito.mock(DescribeTopicsResult.class));
+    Mockito.when(adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME)).values())
+        .thenReturn(Mockito.mock(Map.class));
+    Mockito.when(adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME))
+        .values()
+        .get(Topic.GROUP_METADATA_TOPIC_NAME)).thenReturn(Mockito.mock(KafkaFutureImpl.class));
+
+    Mockito.when(adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME))
+        .values()
+        .get(Topic.GROUP_METADATA_TOPIC_NAME)
+        .get()).thenReturn(Mockito.mock(TopicDescription.class));
+
+    Mockito.when(adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME))
+        .values()
+        .get(Topic.GROUP_METADATA_TOPIC_NAME)
+        .get()
+        .partitions()).thenReturn(Mockito.mock(List.class));
+
+    Mockito.when(adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME))
+        .values()
+        .get(Topic.GROUP_METADATA_TOPIC_NAME)
+        .get()
+        .partitions()
+        .size()).thenReturn(NUM_OFFSETS_TOPIC_PARTITIONS);
+
+    consumerProperties.put(ConsumerConfig.GROUP_ID_CONFIG,
+        NewConsumer.configureGroupId(TARGET_CONSUMER_GROUP_ID, adminClient));
+    System.out.println("Consumer properties after configuration: " + consumerProperties);
+    Assert.assertNotNull(consumerProperties.get(ConsumerConfig.GROUP_ID_CONFIG));
+
+    // Testing I: run partitionsFor() on the result to make sure they are the same
+    int hashedResult =
+        ConsumerGroupCoordinatorUtils.partitionFor(consumerProperties.get(ConsumerConfig.GROUP_ID_CONFIG).toString(),
+            NUM_OFFSETS_TOPIC_PARTITIONS);
+    int hashedResult2 =
+        ConsumerGroupCoordinatorUtils.partitionFor(TARGET_CONSUMER_GROUP_ID, NUM_OFFSETS_TOPIC_PARTITIONS);
+
+    Assert.assertEquals(hashedResult, hashedResult2);
+    System.out.println("Modulo result as an absolute value: " + hashedResult);
+    System.out.println("Modulo result as an absolute value: " + hashedResult2);
+
+    // Testing II: Also test that the groupIds are different.
+    Assert.assertNotEquals(TARGET_CONSUMER_GROUP_ID, consumerProperties.get(ConsumerConfig.GROUP_ID_CONFIG));
+
+  }
+}
diff --git a/src/test/java/com/linkedin/xinfra/monitor/services/ClusterTopicManipulationServiceTest.java b/src/test/java/com/linkedin/xinfra/monitor/services/ClusterTopicManipulationServiceTest.java
new file mode 100644
index 00000000..b3aec5c9
--- /dev/null
+++ b/src/test/java/com/linkedin/xinfra/monitor/services/ClusterTopicManipulationServiceTest.java
@@ -0,0 +1,120 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import com.linkedin.xinfra.monitor.XinfraMonitorConstants;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.requests.DescribeLogDirsResponse;
+import org.mockito.Mockito;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+
+/**
+ * Testing methods for the Xinfra Monitor class of ClusterTopicManipulationService.
+ */
+@Test
+public class ClusterTopicManipulationServiceTest {
+
+  private static final String SERVICE_TEST_TOPIC = "xinfra-monitor-topic-manipulation-test-topic";
+
+  @BeforeMethod
+  private void startTest() {
+    System.out.println("Started " + this.getClass().getSimpleName().toLowerCase() + ".");
+  }
+
+  @AfterMethod
+  private void finishTest() {
+    System.out.println("Finished " + this.getClass().getCanonicalName().toLowerCase() + ".");
+  }
+
+  @Test(invocationCount = 2)
+  void serviceStartTest() throws JsonProcessingException {
+    ClusterTopicManipulationService clusterTopicManipulationService =
+        Mockito.mock(ClusterTopicManipulationService.class);
+
+    Mockito.doCallRealMethod()
+        .when(clusterTopicManipulationService)
+        .processBroker(Mockito.anyMap(), Mockito.any(), Mockito.anyString());
+
+    Mockito.doCallRealMethod()
+        .when(clusterTopicManipulationService)
+        .setExpectedPartitionsCount(Mockito.anyInt());
+
+    Mockito.doCallRealMethod()
+        .when(clusterTopicManipulationService)
+        .expectedPartitionsCount();
+
+    List<Node> brokers = new ArrayList<>();
+    for (int id = 1; id < 3; id++) {
+      brokers.add(new Node(id, "kafka-broker-host", 8000));
+    }
+
+    Map<Integer, Map<String, DescribeLogDirsResponse.LogDirInfo>> logDirectoriesResponseMap1 = new HashMap<>();
+    Map<Integer, Map<String, DescribeLogDirsResponse.LogDirInfo>> logDirectoriesResponseMap2 = new HashMap<>();
+
+    Map<Node, Map<Integer, Map<String, DescribeLogDirsResponse.LogDirInfo>>> brokerMapHashMap = new HashMap<>();
+    brokerMapHashMap.putIfAbsent(brokers.get(0), logDirectoriesResponseMap1);
+    brokerMapHashMap.putIfAbsent(brokers.get(1), logDirectoriesResponseMap2);
+
+    Map<String, DescribeLogDirsResponse.LogDirInfo> logDirInfoMap1 = new HashMap<>();
+    Map<String, DescribeLogDirsResponse.LogDirInfo> logDirInfoMap2 = new HashMap<>();
+
+    logDirectoriesResponseMap1.put(brokers.get(0).id(), logDirInfoMap1);
+    logDirectoriesResponseMap2.put(brokers.get(1).id(), logDirInfoMap2);
+
+    Map<TopicPartition, DescribeLogDirsResponse.ReplicaInfo> replicaInfos1 = new HashMap<>();
+    Map<TopicPartition, DescribeLogDirsResponse.ReplicaInfo> replicaInfos2 = new HashMap<>();
+
+    for (int topicPartition = 0; topicPartition < 3; topicPartition++) {
+      replicaInfos1.put(new TopicPartition(SERVICE_TEST_TOPIC, topicPartition),
+          new DescribeLogDirsResponse.ReplicaInfo(235, 0, false));
+
+      replicaInfos2.put(new TopicPartition(SERVICE_TEST_TOPIC, topicPartition),
+          new DescribeLogDirsResponse.ReplicaInfo(235, 0, false));
+    }
+
+    int totalPartitions = brokers.size() * replicaInfos1.size();
+    System.out.println(totalPartitions);
+    clusterTopicManipulationService.setExpectedPartitionsCount(totalPartitions);
+    System.out.println(clusterTopicManipulationService.expectedPartitionsCount());
+
+    logDirInfoMap1.put(XinfraMonitorConstants.KAFKA_LOG_DIRECTORY + "-1",
+        new DescribeLogDirsResponse.LogDirInfo(null, replicaInfos1));
+    logDirInfoMap2.put(XinfraMonitorConstants.KAFKA_LOG_DIRECTORY + "-2",
+        new DescribeLogDirsResponse.LogDirInfo(null, replicaInfos2));
+
+    ObjectMapper objectMapper = new ObjectMapper();
+    ObjectWriter objectWriter = objectMapper.writerWithDefaultPrettyPrinter();
+
+    for (Map.Entry<Node, Map<Integer, Map<String, DescribeLogDirsResponse.LogDirInfo>>> nodeMapEntry : brokerMapHashMap.entrySet()) {
+      System.out.println(objectWriter.writeValueAsString(nodeMapEntry.getValue()));
+    }
+
+    for (Node broker : brokers) {
+      clusterTopicManipulationService.processBroker(brokerMapHashMap.get(broker), broker, SERVICE_TEST_TOPIC);
+    }
+
+    Assert.assertEquals(totalPartitions, clusterTopicManipulationService.expectedPartitionsCount());
+    System.out.println();
+  }
+}
+
diff --git a/src/test/java/com/linkedin/xinfra/monitor/services/ConsumeServiceTest.java b/src/test/java/com/linkedin/xinfra/monitor/services/ConsumeServiceTest.java
new file mode 100644
index 00000000..5cb9282c
--- /dev/null
+++ b/src/test/java/com/linkedin/xinfra/monitor/services/ConsumeServiceTest.java
@@ -0,0 +1,242 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.linkedin.xinfra.monitor.common.Utils;
+import com.linkedin.xinfra.monitor.consumer.BaseConsumerRecord;
+import com.linkedin.xinfra.monitor.consumer.KMBaseConsumer;
+import com.linkedin.xinfra.monitor.services.metrics.CommitLatencyMetrics;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.OffsetCommitCallback;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.metrics.Metrics;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testng.Assert;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+
+/**
+ * This public class is a Unit Testing class for the Consume Service Class.
+ * Also tests for Kafka Monitor Consumer offset commits.
+ */
+public class ConsumeServiceTest {
+  private static final String TOPIC = "kafka-monitor-topic-testing";
+  private static final Logger LOG = LoggerFactory.getLogger(ConsumeServiceTest.class);
+  private static final String TAGS_NAME = "name";
+  private static final String METRIC_GROUP_NAME = "commit-availability-service";
+  /* thread start delay in seconds */
+  private static final long THREAD_START_DELAY_SECONDS = 4;
+  private static final String TAG_NAME_VALUE = "name";
+  private static final long MOCK_LAST_COMMITTED_OFFSET = System.currentTimeMillis();
+  private static final int PARTITION = 2;
+  private static final long FIRST_OFFSET = 2;
+  private static final long SECOND_OFFSET = 3;
+  private static Map<String, String> tags;
+
+  @Test
+  public void lifecycleTest() throws Exception {
+    ConsumeService consumeService = consumeService();
+
+    /* Nothing should be started */
+    Assert.assertFalse(consumeService.isRunning());
+    Assert.assertNotNull(consumeService.getServiceName());
+
+    /* Should accept but ignore start because start has not been called */
+    consumeService.stop();
+    Assert.assertFalse(consumeService.isRunning());
+
+    /* Should start */
+    consumeService.startConsumeThreadForTesting();
+    Assert.assertTrue(consumeService.isRunning());
+
+    shutdownConsumeService(consumeService);
+  }
+
+  @Test
+  public void commitAvailabilityTest() throws Exception {
+    ConsumeService consumeService = consumeService();
+    Metrics metrics = consumeServiceMetrics(consumeService);
+
+    Assert.assertNotNull(metrics.metrics().get(metrics.metricName("offsets-committed-total", METRIC_GROUP_NAME, tags)).metricValue());
+    Assert.assertEquals(metrics.metrics().get(metrics.metricName("offsets-committed-total", METRIC_GROUP_NAME, tags)).metricValue(), 0.0);
+
+    /* Should start */
+    consumeService.startConsumeThreadForTesting();
+    Assert.assertTrue(consumeService.isRunning());
+
+    /* in milliseconds */
+    long threadStartDelay = TimeUnit.SECONDS.toMillis(THREAD_START_DELAY_SECONDS);
+
+    /* Thread.sleep safe to do here instead of ScheduledExecutorService
+    *  We want to sleep current thread so that consumeService can start running for enough seconds. */
+    Thread.sleep(threadStartDelay);
+    Assert.assertNotNull(metrics.metrics().get(metrics.metricName("offsets-committed-total", METRIC_GROUP_NAME, tags)).metricValue());
+    Assert.assertNotNull(metrics.metrics().get(metrics.metricName("failed-commit-offsets-total", METRIC_GROUP_NAME,
+        tags)).metricValue());
+    Assert.assertEquals(metrics.metrics().get(metrics.metricName("failed-commit-offsets-total", METRIC_GROUP_NAME, tags)).metricValue(), 0.0);
+    Assert.assertNotEquals(metrics.metrics().get(metrics.metricName("offsets-committed-total", METRIC_GROUP_NAME, tags)).metricValue(), 0.0);
+    shutdownConsumeService(consumeService);
+  }
+
+  @Test
+  public void commitLatencyTest() throws Exception {
+    CommitLatencyMetrics commitLatencyMetrics = Mockito.mock(CommitLatencyMetrics.class);
+    Assert.assertNotNull(commitLatencyMetrics);
+
+    ConsumeService consumeService = consumeService();
+    Metrics metrics = consumeServiceMetrics(consumeService);
+
+    Assert.assertNull(metrics.metrics().get(metrics.metricName("commit-offset-latency-ms-avg", METRIC_GROUP_NAME, tags)));
+    Assert.assertNull(metrics.metrics().get(metrics.metricName("commit-offset-latency-ms-max", METRIC_GROUP_NAME, tags)));
+
+    /* Should start */
+    consumeService.startConsumeThreadForTesting();
+    Assert.assertTrue(consumeService.isRunning());
+
+    /* in milliseconds */
+    long threadStartDelay = TimeUnit.SECONDS.toMillis(THREAD_START_DELAY_SECONDS);
+
+    /* Thread.sleep safe to do here instead of ScheduledExecutorService
+     *  We want to sleep current thread so that consumeService can start running for enough seconds. */
+    Thread.sleep(threadStartDelay);
+
+    shutdownConsumeService(consumeService);
+  }
+
+  /**
+   * Sample ConsumeService instance for unit testing
+   * @return Sample ConsumeService object.
+   * @throws Exception should the ConsumeService creation fail or throws an error / exception
+   */
+  private ConsumeService consumeService() throws Exception {
+    LOG.info("Creating an instance of Consume Service for testing..");
+
+    ConsumerFactory consumerFactory = Mockito.mock(ConsumerFactory.class);
+    AdminClient adminClient = Mockito.mock(AdminClient.class);
+    KMBaseConsumer kmBaseConsumer = Mockito.mock(KMBaseConsumer.class);
+
+    Mockito.when(consumerFactory.adminClient()).thenReturn(adminClient);
+    Mockito.when(consumerFactory.latencySlaMs()).thenReturn(20000);
+    Mockito.when(consumerFactory.baseConsumer()).thenReturn(kmBaseConsumer);
+    Mockito.when(consumerFactory.topic()).thenReturn(TOPIC);
+
+    /* LATENCY_PERCENTILE_MAX_MS_CONFIG, */
+    Mockito.when(consumerFactory.latencyPercentileMaxMs()).thenReturn(5000);
+
+    /* LATENCY_PERCENTILE_GRANULARITY_MS_CONFIG */
+    Mockito.when(consumerFactory.latencyPercentileGranularityMs()).thenReturn(1);
+
+    /* define return value */
+    Mockito.when(kmBaseConsumer.lastCommitted()).thenReturn(MOCK_LAST_COMMITTED_OFFSET);
+    Mockito.when(kmBaseConsumer.committed(Mockito.any())).thenReturn(new OffsetAndMetadata(FIRST_OFFSET));
+    Mockito.doAnswer(new Answer<Void>() {
+      @Override
+      public Void answer(InvocationOnMock invocationOnMock) {
+        OffsetCommitCallback callback = invocationOnMock.getArgument(0);
+        Map<TopicPartition, OffsetAndMetadata> committedOffsets = new HashMap<>();
+        committedOffsets.put(new TopicPartition(TOPIC, PARTITION), new OffsetAndMetadata(FIRST_OFFSET));
+        callback.onComplete(committedOffsets, null);
+
+        return null;
+      }
+    }).when(kmBaseConsumer).commitAsync(Mockito.any(OffsetCommitCallback.class));
+
+
+    /* avro record to KmBaseConsumer record */
+    Mockito.when(kmBaseConsumer.receive()).thenReturn(
+        new BaseConsumerRecord(TOPIC, PARTITION, SECOND_OFFSET, "key",
+            Utils.jsonFromFields(TOPIC, 2, 6000, "producerId", 2)));
+
+    CompletableFuture<Void> topicPartitionResult = new CompletableFuture<>();
+    topicPartitionResult.complete(null);
+
+    return new ConsumeService(TAG_NAME_VALUE, topicPartitionResult, consumerFactory);
+  }
+
+  @Test
+  public void awaitShutdownOtherThread() throws Exception {
+    final ConsumeService consumeService = consumeService();
+    final AtomicReference<Throwable> error = new AtomicReference<>();
+
+    Thread thread = new Thread("test awaitshutdown thread") {
+      @Override
+      public void run() {
+        try {
+          consumeService.awaitShutdown(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
+        } catch (Throwable t) {
+          error.set(t);
+        }
+      }
+    };
+
+    thread.start();
+    consumeService.startConsumeThreadForTesting();
+    Thread.sleep(100);
+
+    consumeService.stop();
+    thread.join(5000);
+
+    Assert.assertFalse(thread.isAlive());
+    Assert.assertEquals(error.get(), null);
+
+  }
+
+  /**
+   * return consume service metrics.
+   * @param consumeService ConsumeService object
+   * @return consume service metrics
+   */
+  private Metrics consumeServiceMetrics(ConsumeService consumeService) {
+    setup();
+    Metrics metrics = consumeService.metrics();
+    return metrics;
+  }
+
+  /**
+   * set up the tags for the metrics
+   */
+  @BeforeMethod
+  public void setup() {
+    tags = new HashMap<>();
+    tags.put(TAGS_NAME, TAG_NAME_VALUE);
+  }
+
+  /**
+   * shutdown the consume service.
+   * @param consumeService object of ConsumeService
+   */
+  private void shutdownConsumeService(ConsumeService consumeService) {
+    /*
+     intentionally attempt stopping twice as such executions shouldn't throw any exceptions.
+     Should allow start to be called more than once
+    */
+    consumeService.stop();
+    consumeService.stop();
+    Assert.assertFalse(consumeService.isRunning());
+
+    /* Should be allowed to shutdown more than once. */
+    consumeService.awaitShutdown(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
+    consumeService.awaitShutdown(Integer.MAX_VALUE, TimeUnit.MILLISECONDS);
+    Assert.assertFalse(consumeService.isRunning());
+  }
+
+}
diff --git a/src/test/java/com/linkedin/xinfra/monitor/services/MultiClusterTopicManagementServiceTest.java b/src/test/java/com/linkedin/xinfra/monitor/services/MultiClusterTopicManagementServiceTest.java
new file mode 100644
index 00000000..3a7deccc
--- /dev/null
+++ b/src/test/java/com/linkedin/xinfra/monitor/services/MultiClusterTopicManagementServiceTest.java
@@ -0,0 +1,167 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.linkedin.xinfra.monitor.topicfactory.TopicFactory;
+import java.util.Collections;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import kafka.admin.BrokerMetadata;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.admin.CreateTopicsResult;
+import org.apache.kafka.clients.admin.DescribeClusterResult;
+import org.apache.kafka.clients.admin.DescribeTopicsResult;
+import org.apache.kafka.clients.admin.TopicDescription;
+import org.apache.kafka.common.KafkaFuture;
+import org.apache.kafka.common.Node;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+import scala.Option;
+
+
+/**
+ * Testing methods for the Xinfra Monitor class of MultiClusterTopicManagementService.
+ */
+@SuppressWarnings("unchecked")
+@Test
+public class MultiClusterTopicManagementServiceTest {
+
+  private static final String SERVICE_TEST_TOPIC = "xinfra-monitor-Multi-Cluster-Topic-Management-Service-Test-topic";
+  private static Set<Node> nodeSet;
+  private MultiClusterTopicManagementService.TopicManagementHelper _topicManagementHelper;
+  private CreateTopicsResult _createTopicsResult;
+  private Map<String, KafkaFuture<Void>> _kafkaFutureMap;
+  private KafkaFuture<Void> _kafkaFuture;
+
+  @BeforeMethod
+  private void startTest() {
+    _createTopicsResult = Mockito.mock(CreateTopicsResult.class);
+    _kafkaFutureMap = Mockito.mock(Map.class);
+    _kafkaFuture = Mockito.mock(KafkaFuture.class);
+
+    nodeSet = new LinkedHashSet<>();
+    nodeSet.add(new Node(1, "host-1", 2132));
+    nodeSet.add(new Node(2, "host-2", 2133));
+    nodeSet.add(new Node(3, "host-3", 2134));
+    nodeSet.add(new Node(4, "host-4", 2135));
+    nodeSet.add(new Node(5, "host-5", 2136));
+    nodeSet.add(new Node(6, "host-5", 2137));
+    nodeSet.add(new Node(7, "host-5", 2138));
+    nodeSet.add(new Node(8, "host-5", 2139));
+    nodeSet.add(new Node(9, "host-5", 2140));
+    nodeSet.add(new Node(10, "host-5", 2141));
+
+    _topicManagementHelper = Mockito.mock(MultiClusterTopicManagementService.TopicManagementHelper.class);
+    _topicManagementHelper._topic = SERVICE_TEST_TOPIC;
+    _topicManagementHelper._adminClient = Mockito.mock(AdminClient.class);
+    _topicManagementHelper._topicFactory = Mockito.mock(TopicFactory.class);
+    _topicManagementHelper._topicCreationEnabled = true;
+    _topicManagementHelper._topicAddPartitionEnabled = true;
+    _topicManagementHelper._topicReassignPartitionAndElectLeaderEnabled = true;
+  }
+
+  @AfterMethod
+  private void finishTest() {
+    System.out.println("Finished " + this.getClass().getCanonicalName().toLowerCase() + ".");
+  }
+
+  @Test(invocationCount = 2)
+  protected void maybeAddPartitionsTest() {
+    Set<BrokerMetadata> brokerMetadataSet = new LinkedHashSet<>();
+    for (Node broker : nodeSet) {
+      brokerMetadataSet.add(new BrokerMetadata(broker.id(), Option.apply(broker.rack())));
+    }
+
+    int minPartitionNum = 14;
+    int partitionNum = 5;
+    int rf = 4;
+
+    List<List<Integer>> newPartitionAssignments =
+        MultiClusterTopicManagementService.TopicManagementHelper.newPartitionAssignments(minPartitionNum, partitionNum, brokerMetadataSet, rf);
+    Assert.assertNotNull(newPartitionAssignments);
+
+    System.out.println(newPartitionAssignments);
+    Assert.assertEquals(newPartitionAssignments.size(), minPartitionNum - partitionNum);
+    Assert.assertEquals(newPartitionAssignments.get(0).size(), rf);
+  }
+
+  @Test
+  protected void MultiClusterTopicManagementServiceTopicCreationTest() throws Exception {
+
+    Mockito.doCallRealMethod().when(_topicManagementHelper).maybeCreateTopic();
+
+    Mockito.when(_topicManagementHelper._adminClient.describeCluster())
+        .thenReturn(Mockito.mock(DescribeClusterResult.class));
+    Mockito.when(_topicManagementHelper._adminClient.describeCluster().nodes())
+        .thenReturn(Mockito.mock(KafkaFuture.class));
+    Mockito.when(_topicManagementHelper._adminClient.describeCluster().nodes().get()).thenReturn(nodeSet);
+
+    Mockito.when(_topicManagementHelper._adminClient.createTopics(Mockito.anyCollection()))
+        .thenReturn(_createTopicsResult);
+    Mockito.when(_topicManagementHelper._adminClient.createTopics(Mockito.anyCollection()).values())
+        .thenReturn(_kafkaFutureMap);
+    Mockito.when(
+        _topicManagementHelper._adminClient.createTopics(Mockito.anyCollection()).values().get(SERVICE_TEST_TOPIC))
+        .thenReturn(_kafkaFuture);
+
+    Answer<Object> createKafkaTopicFutureAnswer = new Answer<Object>() {
+      /**
+       * @param invocation the invocation on the mocked TopicManagementHelper.
+       * @return NULL value.
+       * @throws Throwable the throwable to be thrown when Exception occurs.
+       */
+      @Override
+      public Void answer(InvocationOnMock invocation) throws Throwable {
+
+        Mockito.when(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC)))
+            .thenReturn(Mockito.mock(DescribeTopicsResult.class));
+        Mockito.when(
+            _topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC)).values())
+            .thenReturn(Mockito.mock(Map.class));
+        Mockito.when(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC))
+            .values()
+            .get(SERVICE_TEST_TOPIC)).thenReturn(Mockito.mock(KafkaFuture.class));
+        Mockito.when(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC))
+            .values()
+            .get(SERVICE_TEST_TOPIC)
+            .get()).thenReturn(Mockito.mock(TopicDescription.class));
+        Mockito.when(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC))
+            .values()
+            .get(SERVICE_TEST_TOPIC)
+            .get()
+            .name()).thenReturn(SERVICE_TEST_TOPIC);
+        return null;
+      }
+    };
+
+    Mockito.when(_topicManagementHelper._topicFactory.createTopicIfNotExist(Mockito.anyString(), Mockito.anyShort(),
+        Mockito.anyDouble(), Mockito.any(), Mockito.any())).thenAnswer(createKafkaTopicFutureAnswer);
+
+    _topicManagementHelper.maybeCreateTopic();
+
+    Assert.assertNotNull(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC))
+        .values()
+        .get(SERVICE_TEST_TOPIC)
+        .get());
+    Assert.assertEquals(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC))
+        .values()
+        .get(SERVICE_TEST_TOPIC)
+        .get()
+        .name(), SERVICE_TEST_TOPIC);
+  }
+}
diff --git a/src/test/java/com/linkedin/xinfra/monitor/services/OffsetCommitServiceTest.java b/src/test/java/com/linkedin/xinfra/monitor/services/OffsetCommitServiceTest.java
new file mode 100644
index 00000000..8b543774
--- /dev/null
+++ b/src/test/java/com/linkedin/xinfra/monitor/services/OffsetCommitServiceTest.java
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+
+@Test
+public class OffsetCommitServiceTest {
+
+  @BeforeMethod
+  private void startTest() {
+    System.out.println("Started " + this.getClass().getSimpleName().toLowerCase() + ".");
+  }
+
+  @AfterMethod
+  private void finishTest() {
+    System.out.println("Finished " + this.getClass().getCanonicalName().toLowerCase() + ".");
+  }
+
+  @Test(invocationCount = 2)
+  void serviceStartTest() {
+
+    // TODO (@andrewchoi5): implement offset commit service test
+
+  }
+}
+
diff --git a/src/test/java/com/linkedin/xinfra/monitor/services/TopicManagementServiceTest.java b/src/test/java/com/linkedin/xinfra/monitor/services/TopicManagementServiceTest.java
new file mode 100644
index 00000000..ca8d77b1
--- /dev/null
+++ b/src/test/java/com/linkedin/xinfra/monitor/services/TopicManagementServiceTest.java
@@ -0,0 +1,96 @@
+/**
+ * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+ * file except in compliance with the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+package com.linkedin.xinfra.monitor.services;
+
+import com.linkedin.xinfra.monitor.services.MultiClusterTopicManagementService.TopicManagementHelper;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.TopicPartitionInfo;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+@Test
+public class TopicManagementServiceTest {
+
+  private static final String TOPIC = "kmf-unit-test-topic";
+
+  private List<Node> brokers(int brokerCount) {
+    List<Node> brokers = new ArrayList<>();
+    for (int i = 0; i < brokerCount; i++) {
+      brokers.add(new Node(i, "", -1));
+    }
+    return brokers;
+  }
+
+  private Node[] nodes(int brokerCount) {
+    Node[] nodes = new Node[brokerCount];
+    for (int i = 0; i < brokerCount; i++) {
+      nodes[i] = new Node(i, "", -1);
+    }
+    return nodes;
+  }
+
+  @Test
+  public void noDetection() {
+    List<TopicPartitionInfo> partitions = new ArrayList<>();
+    Node[] node = nodes(2);
+    partitions.add(new TopicPartitionInfo(0, node[0], new ArrayList<>(Arrays.asList(node[0], node[1])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(1, node[0], new ArrayList<>(Arrays.asList(node[0], node[1])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(2, node[1], new ArrayList<>(Arrays.asList(node[1], node[0])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(3, node[1], new ArrayList<>(Arrays.asList(node[1], node[0])), new ArrayList<>()));
+
+    Assert.assertFalse(TopicManagementHelper.someBrokerNotPreferredLeader(partitions, brokers(2)));
+    Assert.assertFalse(TopicManagementHelper.someBrokerNotElectedLeader(partitions, brokers(2)));
+  }
+
+  @Test
+  public void detectLowTotalNumberOfPartitions() {
+    List<TopicPartitionInfo> partitions = new ArrayList<>();
+    Node[] node = nodes(3);
+    partitions.add(new TopicPartitionInfo(0, node[0], new ArrayList<>(Arrays.asList(node[0], node[1])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(1, node[1], new ArrayList<>(Arrays.asList(node[1], node[0])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(2, node[2], new ArrayList<>(Arrays.asList(node[2], node[0])), new ArrayList<>()));
+    Assert.assertFalse(TopicManagementHelper.someBrokerNotPreferredLeader(partitions, brokers(3)));
+    Assert.assertFalse(TopicManagementHelper.someBrokerNotElectedLeader(partitions, brokers(3)));
+    Assert.assertEquals(TopicManagementHelper.getReplicationFactor(partitions), 2);
+  }
+
+
+  @Test
+  public void detectBrokerWithoutLeader() {
+    List<TopicPartitionInfo> partitions = new ArrayList<>();
+    Node[] node = nodes(3);
+    partitions.add(new TopicPartitionInfo(0, node[0], new ArrayList<>(Arrays.asList(node[0], node[1])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(1, node[0], new ArrayList<>(Arrays.asList(node[0], node[1])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(2, node[1], new ArrayList<>(Arrays.asList(node[1], node[0])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(3, node[1], new ArrayList<>(Arrays.asList(node[2], node[1])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(4, node[1], new ArrayList<>(Arrays.asList(node[2], node[0])), new ArrayList<>()));
+
+    Assert.assertFalse(TopicManagementHelper.someBrokerNotPreferredLeader(partitions, brokers(3)));
+    Assert.assertTrue(TopicManagementHelper.someBrokerNotElectedLeader(partitions, brokers(3)));
+  }
+
+  @Test
+  public void detectBrokerWithoutPreferredLeader() {
+    List<TopicPartitionInfo> partitions = new ArrayList<>();
+    Node[] node = nodes(3);
+    partitions.add(new TopicPartitionInfo(0, node[0], new ArrayList<>(Arrays.asList(node[0], node[1])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(1, node[0], new ArrayList<>(Arrays.asList(node[0], node[1])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(2, node[1], new ArrayList<>(Arrays.asList(node[0], node[0])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(3, node[1], new ArrayList<>(Arrays.asList(node[2], node[1])), new ArrayList<>()));
+    partitions.add(new TopicPartitionInfo(4, node[1], new ArrayList<>(Arrays.asList(node[2], node[0])), new ArrayList<>()));
+
+    Assert.assertTrue(TopicManagementHelper.someBrokerNotPreferredLeader(partitions, brokers(3)));
+    Assert.assertTrue(TopicManagementHelper.someBrokerNotElectedLeader(partitions, brokers(3)));
+  }
+}
diff --git a/webapp/index.html b/webapp/index.html
index c27cfaa2..03c22fb8 100644
--- a/webapp/index.html
+++ b/webapp/index.html
@@ -1,5 +1,5 @@
 <!--
-  Copyright 2016 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+  Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
   file except in compliance with the License. You may obtain a copy of the License at
 
     http://www.apache.org/licenses/LICENSE-2.0
@@ -10,6 +10,7 @@
 
 <!DOCTYPE html>
 <html>
+  
 <head>
   <meta charset="utf-8">
   <title>JS Bin</title>
@@ -186,11 +187,16 @@ <h1 style="text-align:center">Kafka Monitor GUI</h1>
   var monitoredMbeans = [
     {name: "kmf.services:type=produce-service,name=*", attribute: "produce-availability-avg"},
     {name: "kmf.services:type=produce-service,name=*", attribute: "records-produced-rate"},
+    {name: "kmf.services:type=produce-service,name=*", attribute: "produce-delay-ms-avg"},
+    {name: "kmf.services:type=produce-service,name=*", attribute: "produce-delay-ms-99th"},
+    {name: "kmf.services:type=produce-service,name=*", attribute: "produce-delay-ms-999th"},
+    {name: "kmf.services:type=produce-service,name=*", attribute: "produce-delay-ms-9999th"},
     {name: "kmf.services:type=consume-service,name=*", attribute: "records-lost-rate"},
     {name: "kmf.services:type=consume-service,name=*", attribute: "records-duplicated-rate"},
     {name: "kmf.services:type=consume-service,name=*", attribute: "records-delay-ms-avg"},
     {name: "kmf.services:type=consume-service,name=*", attribute: "records-delay-ms-99th"},
     {name: "kmf.services:type=consume-service,name=*", attribute: "records-delay-ms-999th"},
+    {name: "kmf.services:type=consume-service,name=*", attribute: "records-delay-ms-9999th"},
     {name: "kmf.services:type=consume-service,name=*", attribute: "records-delay-ms-max"},
   ];
 
@@ -211,3 +217,4 @@ <h1 style="text-align:center">Kafka Monitor GUI</h1>
 
 
 </html>
+