From b6d5802cca8e139e5cf8d1207e3933c8cb272c38 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Thu, 14 May 2020 21:39:40 -0700 Subject: [PATCH 01/89] addition of CircleCI Signed-off-by: Andrew Choi --- .circleci/config.yml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 00000000..996268f9 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,8 @@ +version: 2.1 +jobs: + build: + docker: + - image: circleci/node:4.8.2 # the primary container, where your job's commands are run + steps: + - checkout # check out the code in the project directory + - run: echo "Kafka Monitor CircleCI" From d10f7df4f08a914a00f42c6ae0a2686f9ec37d31 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Fri, 15 May 2020 14:18:58 -0700 Subject: [PATCH 02/89] Get topicPartitionsWithRetry and a minor spelling error in services/MultiClusterTopicManagementService (#243) 1 - minor spelling error in services/MultiClusterTopicManagementService `satisifies` 2 - Get topicPartitionsWithRetry after Creation of Topic 3 - LOGGER renaming 4 - Mark CircleCI as TODO --- .circleci/config.yml | 19 ++++-- .../MultiClusterTopicManagementService.java | 64 ++++++++++--------- 2 files changed, 47 insertions(+), 36 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 996268f9..85f9a2a1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,8 +1,13 @@ -version: 2.1 -jobs: - build: - docker: - - image: circleci/node:4.8.2 # the primary container, where your job's commands are run +version: 2 # use CircleCI 2.0 + +jobs: # a collection of steps + build: # runs not using Workflows must have a `build` job as entry point + + docker: # run the steps with Docker + - image: circleci/openjdk:8-jdk-stretch # with this image as the primary container; this is where all `steps` will run. + steps: - - checkout # check out the code in the project directory - - run: echo "Kafka Monitor CircleCI" + # TODO: populate the necessary steps for CircleCI + - run: echo "Kafka Monitor CircleCI on GitHub" + + # See https://circleci.com/docs/2.0/deployment-integrations/ for deploy examples \ No newline at end of file diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java index ec5cf2c8..4dd0c9e0 100644 --- a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java +++ b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java @@ -73,7 +73,7 @@ */ @SuppressWarnings({"rawtypes", "unchecked"}) public class MultiClusterTopicManagementService implements Service { - private static final Logger LOG = LoggerFactory.getLogger(MultiClusterTopicManagementService.class); + private static final Logger LOGGER = LoggerFactory.getLogger(MultiClusterTopicManagementService.class); private static final String METRIC_GROUP_NAME = "topic-management-service"; private final CompletableFuture _topicPartitionResult = new CompletableFuture<>(); private final AtomicBoolean _isRunning = new AtomicBoolean(false); @@ -126,7 +126,7 @@ public synchronized void start() { Runnable pleRunnable = new PreferredLeaderElectionRunnable(); _executor.scheduleWithFixedDelay(pleRunnable, _preferredLeaderElectionIntervalMs, _preferredLeaderElectionIntervalMs, TimeUnit.MILLISECONDS); - LOG.info("{}/MultiClusterTopicManagementService started.", _serviceName); + LOGGER.info("{}/MultiClusterTopicManagementService started.", _serviceName); } } @@ -134,7 +134,7 @@ public synchronized void start() { public synchronized void stop() { if (_isRunning.compareAndSet(true, false)) { _executor.shutdown(); - LOG.info("{}/MultiClusterTopicManagementService stopped.", _serviceName); + LOGGER.info("{}/MultiClusterTopicManagementService stopped.", _serviceName); } } @@ -148,9 +148,9 @@ public void awaitShutdown() { try { _executor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { - LOG.info("Thread interrupted when waiting for {}/MultiClusterTopicManagementService to shutdown", _serviceName); + LOGGER.info("Thread interrupted when waiting for {}/MultiClusterTopicManagementService to shutdown", _serviceName); } - LOG.info("{}/MultiClusterTopicManagementService shutdown completed", _serviceName); + LOGGER.info("{}/MultiClusterTopicManagementService shutdown completed", _serviceName); } @@ -165,7 +165,7 @@ public void run() { } /* - * The partition number of the monitor topics should be the minimum partition number that satisifies the following conditions: + * The partition number of the monitor topics should be the minimum partition number that satisfies the following conditions: * - partition number of the monitor topics across all monitored clusters should be the same * - partitionNum / brokerNum >= user-configured partitionsToBrokersRatio. * - partitionNum >= user-configured minPartitionNum @@ -185,13 +185,13 @@ public void run() { try { helper.maybeReassignPartitionAndElectLeader(); } catch (IOException | KafkaException e) { - LOG.warn(_serviceName + "/MultiClusterTopicManagementService will retry later in cluster " + clusterName, e); + LOGGER.warn(_serviceName + "/MultiClusterTopicManagementService will retry later in cluster " + clusterName, e); } } } catch (Throwable t) { // Need to catch throwable because there is scala API that can throw NoSuchMethodError in runtime // and such error is not caught by compilation - LOG.error(_serviceName + "/MultiClusterTopicManagementService will stop due to error.", t); + LOGGER.error(_serviceName + "/MultiClusterTopicManagementService will stop due to error.", t); stop(); } } @@ -211,18 +211,20 @@ public void run() { try { helper.maybeElectLeader(); } catch (IOException | KafkaException e) { - LOG.warn(_serviceName + "/MultiClusterTopicManagementService will retry later in cluster " + clusterName, e); + LOGGER.warn(_serviceName + "/MultiClusterTopicManagementService will retry later in cluster " + clusterName, e); } } } catch (Throwable t) { /* Need to catch throwable because there is scala API that can throw NoSuchMethodError in runtime and such error is not caught by compilation. */ - LOG.error(_serviceName + "/MultiClusterTopicManagementService will stop due to error.", t); + LOGGER.error(_serviceName + + "/MultiClusterTopicManagementService/PreferredLeaderElectionRunnable will stop due to an error.", t); stop(); } } } + @SuppressWarnings("FieldCanBeLocal") static class TopicManagementHelper { private final boolean _topicCreationEnabled; private final String _topic; @@ -233,9 +235,9 @@ static class TopicManagementHelper { private final TopicFactory _topicFactory; private final Properties _topicProperties; private boolean _preferredLeaderElectionRequested; - private int _requestTimeoutMs; - private List _bootstrapServers; - private final AdminClient _adminClient; + private final int _requestTimeoutMs; + private final List _bootstrapServers; + AdminClient _adminClient; @SuppressWarnings("unchecked") @@ -263,7 +265,7 @@ static class TopicManagementHelper { _topicFactory = (TopicFactory) Class.forName(topicFactoryClassName).getConstructor(Map.class).newInstance(topicFactoryConfig); _adminClient = constructAdminClient(props); - LOG.info("{} configs: {}", _adminClient.getClass().getSimpleName(), props); + LOGGER.info("{} configs: {}", _adminClient.getClass().getSimpleName(), props); } @SuppressWarnings("unchecked") @@ -274,7 +276,10 @@ void maybeCreateTopic() throws Exception { NewTopic newTopic = new NewTopic(_topic, numPartitions, (short) _replicationFactor); newTopic.configs((Map) _topicProperties); CreateTopicsResult createTopicsResult = _adminClient.createTopics(Collections.singletonList(newTopic)); - LOG.info("CreateTopicsResult: {}.", createTopicsResult.values()); + + // waits for this topic creation future to complete, and then returns its result. + createTopicsResult.values().get(_topic).get(); + LOGGER.info("CreateTopicsResult: {}.", createTopicsResult.values()); } } @@ -288,13 +293,14 @@ int minPartitionNum() throws InterruptedException, ExecutionException { } void maybeAddPartitions(int minPartitionNum) throws ExecutionException, InterruptedException { - Collection topicNames = _adminClient.listTopics().names().get(); - Map> kafkaFutureMap = _adminClient.describeTopics(topicNames).values(); + Map> kafkaFutureMap = + _adminClient.describeTopics(Collections.singleton(_topic)).values(); KafkaFuture topicDescriptions = kafkaFutureMap.get(_topic); List partitions = topicDescriptions.get().partitions(); + int partitionNum = partitions.size(); if (partitionNum < minPartitionNum) { - LOG.info("{} will increase partition of the topic {} in the cluster from {}" + LOGGER.info("{} will increase partition of the topic {} in the cluster from {}" + " to {}.", this.getClass().toString(), _topic, partitionNum, minPartitionNum); Set blackListedBrokers = _topicFactory.getBlackListedBrokers(_zkConnect); List> replicaAssignment = new ArrayList<>(new ArrayList<>()); @@ -339,13 +345,13 @@ void maybeReassignPartitionAndElectLeader() throws Exception { int expectedReplicationFactor = Math.max(currentReplicationFactor, _replicationFactor); if (_replicationFactor < currentReplicationFactor) - LOG.debug( + LOGGER.debug( "Configured replication factor {} is smaller than the current replication factor {} of the topic {} in cluster.", _replicationFactor, currentReplicationFactor, _topic); if (expectedReplicationFactor > currentReplicationFactor && !zkClient .reassignPartitionsInProgress()) { - LOG.info( + LOGGER.info( "MultiClusterTopicManagementService will increase the replication factor of the topic {} in cluster" + "from {} to {}", _topic, currentReplicationFactor, expectedReplicationFactor); reassignPartitions(zkClient, brokers, _topic, partitionInfoList.size(), @@ -362,7 +368,7 @@ void maybeReassignPartitionAndElectLeader() throws Exception { expectedProperties.put(key, _topicProperties.get(key)); if (!currentProperties.equals(expectedProperties)) { - LOG.info("MultiClusterTopicManagementService will overwrite properties of the topic {} " + LOGGER.info("MultiClusterTopicManagementService will overwrite properties of the topic {} " + "in cluster from {} to {}.", _topic, currentProperties, expectedProperties); zkClient.setOrCreateEntityConfigs(ConfigType.Topic(), _topic, expectedProperties); } @@ -370,7 +376,7 @@ void maybeReassignPartitionAndElectLeader() throws Exception { if (partitionInfoList.size() >= brokers.size() && someBrokerNotPreferredLeader(partitionInfoList, brokers) && !zkClient .reassignPartitionsInProgress()) { - LOG.info("{} will reassign partitions of the topic {} in cluster.", + LOGGER.info("{} will reassign partitions of the topic {} in cluster.", this.getClass().toString(), _topic); reassignPartitions(zkClient, brokers, _topic, partitionInfoList.size(), expectedReplicationFactor); @@ -380,7 +386,7 @@ void maybeReassignPartitionAndElectLeader() throws Exception { if (partitionInfoList.size() >= brokers.size() && someBrokerNotElectedLeader(partitionInfoList, brokers)) { if (!partitionReassigned || !zkClient.reassignPartitionsInProgress()) { - LOG.info( + LOGGER.info( "MultiClusterTopicManagementService will trigger preferred leader election for the topic {} in " + "cluster.", _topic ); @@ -403,7 +409,7 @@ void maybeElectLeader() throws Exception { if (!zkClient.reassignPartitionsInProgress()) { List partitionInfoList = _adminClient .describeTopics(Collections.singleton(_topic)).all().get().get(_topic).partitions(); - LOG.info( + LOGGER.info( "MultiClusterTopicManagementService will trigger requested preferred leader election for the" + " topic {} in cluster.", _topic); triggerPreferredLeaderElection(partitionInfoList, _topic); @@ -424,7 +430,7 @@ private void triggerPreferredLeaderElection(List partitionIn Set topicPartitions = new HashSet<>(partitions); ElectLeadersResult electLeadersResult = _adminClient.electLeaders(electionType, topicPartitions, newOptions); - LOG.info("{}: triggerPreferredLeaderElection - {}", this.getClass().toString(), electLeadersResult.all().get()); + LOGGER.info("{}: triggerPreferredLeaderElection - {}", this.getClass().toString(), electLeadersResult.all().get()); } private static void reassignPartitions(KafkaZkClient zkClient, Collection brokers, String topic, int partitionCount, int replicationFactor) { @@ -448,9 +454,9 @@ private static void reassignPartitions(KafkaZkClient zkClient, Collection String currentAssignmentJson = formatAsReassignmentJson(topic, currentAssignment); String newAssignmentJson = formatAsReassignmentJson(topic, assignedReplicas); - LOG.info("Reassign partitions for topic " + topic); - LOG.info("Current partition replica assignment " + currentAssignmentJson); - LOG.info("New partition replica assignment " + newAssignmentJson); + LOGGER.info("Reassign partitions for topic " + topic); + LOGGER.info("Current partition replica assignment " + currentAssignmentJson); + LOGGER.info("New partition replica assignment " + newAssignmentJson); zkClient.createPartitionReassignment(newAssignment); } @@ -461,7 +467,7 @@ static int getReplicationFactor(List partitionInfoList) { int replicationFactor = partitionInfoList.get(0).replicas().size(); for (TopicPartitionInfo partitionInfo : partitionInfoList) { if (replicationFactor != partitionInfo.replicas().size()) { - LOG.warn("Partitions of the topic have different replication factor."); + LOGGER.warn("Partitions of the topic have different replication factor."); return -1; } } From dd5bd65fa24ac1632e83406acbc6b88ec8eba218 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Mon, 18 May 2020 11:06:03 -0700 Subject: [PATCH 03/89] Addition of KMF/services/MultiClusterTopicManagementServiceTest.java (#244) Addition of KMF/services/MultiClusterTopicManagementServiceTest.java Signed-off-by: Andrew Choi --- .../MultiClusterTopicManagementService.java | 20 ++- ...ultiClusterTopicManagementServiceTest.java | 135 ++++++++++++++++++ 2 files changed, 153 insertions(+), 2 deletions(-) create mode 100644 src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java index 4dd0c9e0..68a7060f 100644 --- a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java +++ b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java @@ -226,8 +226,6 @@ public void run() { @SuppressWarnings("FieldCanBeLocal") static class TopicManagementHelper { - private final boolean _topicCreationEnabled; - private final String _topic; private final String _zkConnect; private final int _replicationFactor; private final double _minPartitionsToBrokersRatio; @@ -237,7 +235,11 @@ static class TopicManagementHelper { private boolean _preferredLeaderElectionRequested; private final int _requestTimeoutMs; private final List _bootstrapServers; + + // package private for unit testing + boolean _topicCreationEnabled; AdminClient _adminClient; + String _topic; @SuppressWarnings("unchecked") @@ -322,6 +324,20 @@ void maybeAddPartitions(int minPartitionNum) throws ExecutionException, Interrup } } + /** + * Exposed package-private access for testing. Get the total number of partitions for a Kafka topic. + * @return total number of topic partitions + * @throws InterruptedException when a thread is waiting, sleeping and the thread is interrupted, either before / during the activity. + * @throws ExecutionException when attempting to retrieve the result of a task that aborted by throwing an exception. + */ + int numPartitions() throws InterruptedException, ExecutionException { + + // TODO (andrewchoi5): connect this to unit testing method for testing maybeAddPartitions! + + return _adminClient.describeTopics(Collections.singleton(_topic)).values().get(_topic).get().partitions().size(); + } + + private Set getAvailableBrokers() throws ExecutionException, InterruptedException { Set brokers = new HashSet<>(_adminClient.describeCluster().nodes().get()); Set blackListedBrokers = _topicFactory.getBlackListedBrokers(_zkConnect); diff --git a/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java b/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java new file mode 100644 index 00000000..7aaaf8d3 --- /dev/null +++ b/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java @@ -0,0 +1,135 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.services; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.CreateTopicsResult; +import org.apache.kafka.clients.admin.DescribeClusterResult; +import org.apache.kafka.clients.admin.DescribeTopicsResult; +import org.apache.kafka.clients.admin.TopicDescription; +import org.apache.kafka.common.KafkaFuture; +import org.apache.kafka.common.Node; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + + +/** + * Testing methods for the Xinfra Monitor class of MultiClusterTopicManagementService. + */ +@SuppressWarnings("unchecked") +@Test +public class MultiClusterTopicManagementServiceTest { + private static final String SERVICE_TEST_TOPIC = "xinfra-monitor-Multi-Cluster-Topic-Management-Service-Test-topic"; + private static Set nodeSet; + private MultiClusterTopicManagementService.TopicManagementHelper _topicManagementHelper; + private CreateTopicsResult _createTopicsResult; + private Map> _kafkaFutureMap; + private KafkaFuture _kafkaFuture; + + @BeforeMethod + private void startTest() { + _createTopicsResult = Mockito.mock(CreateTopicsResult.class); + _kafkaFutureMap = Mockito.mock(Map.class); + _kafkaFuture = Mockito.mock(KafkaFuture.class); + + nodeSet = new HashSet<>(); + nodeSet.add(new Node(1, "host-1", 2132)); + nodeSet.add(new Node(2, "host-2", 2133)); + nodeSet.add(new Node(3, "host-3", 2134)); + nodeSet.add(new Node(4, "host-4", 2135)); + nodeSet.add(new Node(5, "host-5", 2136)); + + _topicManagementHelper = Mockito.mock(MultiClusterTopicManagementService.TopicManagementHelper.class); + _topicManagementHelper._topic = SERVICE_TEST_TOPIC; + _topicManagementHelper._adminClient = Mockito.mock(AdminClient.class); + _topicManagementHelper._topicCreationEnabled = true; + } + + @AfterMethod + private void finishTest() { + System.out.println("Finished " + this.getClass().getCanonicalName().toLowerCase() + "."); + } + + @Test + protected void MultiClusterTopicManagementServiceTopicCreationTest() throws Exception { + + Mockito.doCallRealMethod().when(_topicManagementHelper).maybeCreateTopic(); + + Mockito.when(_topicManagementHelper._adminClient.describeCluster()) + .thenReturn(Mockito.mock(DescribeClusterResult.class)); + Mockito.when(_topicManagementHelper._adminClient.describeCluster().nodes()) + .thenReturn(Mockito.mock(KafkaFuture.class)); + Mockito.when(_topicManagementHelper._adminClient.describeCluster().nodes().get()).thenReturn(nodeSet); + + Mockito.when(_topicManagementHelper._adminClient.createTopics(Mockito.anyCollection())) + .thenReturn(_createTopicsResult); + Mockito.when(_topicManagementHelper._adminClient.createTopics(Mockito.anyCollection()).values()) + .thenReturn(_kafkaFutureMap); + Mockito.when( + _topicManagementHelper._adminClient.createTopics(Mockito.anyCollection()).values().get(SERVICE_TEST_TOPIC)) + .thenReturn(_kafkaFuture); + + Answer createKafkaTopicFutureAnswer = new Answer() { + /** + * @param invocation the invocation on the mocked TopicManagementHelper. + * @return NULL value. + * @throws Throwable the throwable to be thrown when Exception occurs. + */ + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + Mockito.when(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC))) + .thenReturn(Mockito.mock(DescribeTopicsResult.class)); + Mockito.when( + _topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC)).values()) + .thenReturn(Mockito.mock(Map.class)); + Mockito.when(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC)) + .values() + .get(SERVICE_TEST_TOPIC)).thenReturn(Mockito.mock(KafkaFuture.class)); + Mockito.when(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC)) + .values() + .get(SERVICE_TEST_TOPIC) + .get()).thenReturn(Mockito.mock(TopicDescription.class)); + Mockito.when(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC)) + .values() + .get(SERVICE_TEST_TOPIC) + .get() + .name()).thenReturn(SERVICE_TEST_TOPIC); + return null; + } + }; + + Mockito.when(_topicManagementHelper._adminClient.createTopics(Mockito.anyCollection()) + .values() + .get(SERVICE_TEST_TOPIC) + .get()).thenAnswer(createKafkaTopicFutureAnswer); + + _topicManagementHelper.maybeCreateTopic(); + + Assert.assertNotNull(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC)) + .values() + .get(SERVICE_TEST_TOPIC) + .get()); + Assert.assertEquals(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC)) + .values() + .get(SERVICE_TEST_TOPIC) + .get() + .name(), SERVICE_TEST_TOPIC); + } +} From a502ed66d6a4429001a27a9ad98f683404cf19df Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Tue, 26 May 2020 16:10:32 -0700 Subject: [PATCH 04/89] Add topic log in consume produce service (#247) Add topic log in consume produce service (#247) Signed-off-by: Andrew Choi --- src/main/java/com/linkedin/kmf/services/ConsumeService.java | 2 +- src/main/java/com/linkedin/kmf/services/ProduceService.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/linkedin/kmf/services/ConsumeService.java b/src/main/java/com/linkedin/kmf/services/ConsumeService.java index b505bb61..fed2c6d0 100644 --- a/src/main/java/com/linkedin/kmf/services/ConsumeService.java +++ b/src/main/java/com/linkedin/kmf/services/ConsumeService.java @@ -230,7 +230,7 @@ public synchronized void start() { try { topicDescription = topicDescriptionKafkaFuture.get(); } catch (InterruptedException | ExecutionException e) { - LOG.error("Exception occurred while getting the topicDescriptionKafkaFuture", e); + LOG.error("Exception occurred while getting the topicDescriptionKafkaFuture for topic: {}", _topic, e); } double partitionCount = topicDescription.partitions().size(); topicPartitionCount.add( diff --git a/src/main/java/com/linkedin/kmf/services/ProduceService.java b/src/main/java/com/linkedin/kmf/services/ProduceService.java index 0094b280..23d71326 100644 --- a/src/main/java/com/linkedin/kmf/services/ProduceService.java +++ b/src/main/java/com/linkedin/kmf/services/ProduceService.java @@ -171,7 +171,7 @@ public synchronized void start() { _handleNewPartitionsExecutor.scheduleWithFixedDelay(new NewPartitionHandler(), 1, 30, TimeUnit.SECONDS); LOG.info("{}/ProduceService started", _name); } catch (InterruptedException | UnknownTopicOrPartitionException | ExecutionException e) { - LOG.error("Exception occurred while starting produce service: ", e); + LOG.error("Exception occurred while starting produce service for topic: {}", _topic, e); } } } From ffa7d591ffd8664a599e52cde6006236039ce933 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Tue, 26 May 2020 16:31:25 -0700 Subject: [PATCH 05/89] MulticlustertopicmanagementService should block until topic creation is fully complete (#246) * Xinfra Monitor MulticlustertopicmanagementService should block until topic creation is fully complete. Signed-off-by: Andrew Choi * wip Signed-off-by: Andrew Choi --- .../java/com/linkedin/kmf/apps/SingleClusterMonitor.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/com/linkedin/kmf/apps/SingleClusterMonitor.java b/src/main/java/com/linkedin/kmf/apps/SingleClusterMonitor.java index c0ec1778..de88d2f5 100644 --- a/src/main/java/com/linkedin/kmf/apps/SingleClusterMonitor.java +++ b/src/main/java/com/linkedin/kmf/apps/SingleClusterMonitor.java @@ -62,12 +62,17 @@ public SingleClusterMonitor(Map props, String name) throws Excep _name = name; _topicManagementService = new TopicManagementService(props, name); CompletableFuture topicPartitionResult = _topicManagementService.topicPartitionResult(); + + // block on the MultiClusterTopicManagementService to complete. + topicPartitionResult.get(); + _produceService = new ProduceService(props, name); _consumeService = new ConsumeService(name, topicPartitionResult, consumerFactory); _allServices = new ArrayList<>(SERVICES_INITIAL_CAPACITY); _allServices.add(_topicManagementService); _allServices.add(_produceService); _allServices.add(_consumeService); + } @Override From 2ae902a05a4c19f7520c85d604edb6ff5aaaddf1 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Wed, 27 May 2020 10:23:50 -0700 Subject: [PATCH 06/89] Use createTopicIfNotExist instead of AdminClient (#248) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use createTopicIfNotExist instead of AdminClient connects the _topicFactory interface’s createTopicIfNotExist. 🙏🏻 Signed-off-by: Andrew Choi --- src/main/java/com/linkedin/kmf/common/Utils.java | 8 +++++++- .../services/MultiClusterTopicManagementService.java | 10 +++------- .../MultiClusterTopicManagementServiceTest.java | 9 +++++---- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/linkedin/kmf/common/Utils.java b/src/main/java/com/linkedin/kmf/common/Utils.java index 5fcbf55e..0d2c2fd5 100644 --- a/src/main/java/com/linkedin/kmf/common/Utils.java +++ b/src/main/java/com/linkedin/kmf/common/Utils.java @@ -31,6 +31,7 @@ import org.apache.avro.io.Encoder; import org.apache.avro.io.JsonEncoder; import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.CreateTopicsResult; import org.apache.kafka.clients.admin.NewTopic; import org.apache.kafka.common.errors.TopicExistsException; import org.json.JSONObject; @@ -98,7 +99,12 @@ public static int createTopicIfNotExists(String topic, short replicationFactor, List topics = new ArrayList<>(); topics.add(newTopic); - adminClient.createTopics(topics); + CreateTopicsResult result = adminClient.createTopics(topics); + + // waits for this topic creation future to complete, and then returns its result. + result.values().get(topic).get(); + LOG.info("CreateTopicsResult: {}.", result.values()); + } catch (TopicExistsException e) { /* There is a race condition with the consumer. */ LOG.debug("Monitoring topic " + topic + " already exists in the cluster.", e); diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java index 68a7060f..bc6f923c 100644 --- a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java +++ b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java @@ -36,7 +36,6 @@ import kafka.zk.KafkaZkClient; import org.apache.kafka.clients.admin.AdminClient; import org.apache.kafka.clients.admin.AdminClientConfig; -import org.apache.kafka.clients.admin.CreateTopicsResult; import org.apache.kafka.clients.admin.ElectLeadersOptions; import org.apache.kafka.clients.admin.ElectLeadersResult; import org.apache.kafka.clients.admin.NewPartitions; @@ -230,7 +229,6 @@ static class TopicManagementHelper { private final int _replicationFactor; private final double _minPartitionsToBrokersRatio; private final int _minPartitionNum; - private final TopicFactory _topicFactory; private final Properties _topicProperties; private boolean _preferredLeaderElectionRequested; private final int _requestTimeoutMs; @@ -240,6 +238,7 @@ static class TopicManagementHelper { boolean _topicCreationEnabled; AdminClient _adminClient; String _topic; + TopicFactory _topicFactory; @SuppressWarnings("unchecked") @@ -277,11 +276,8 @@ void maybeCreateTopic() throws Exception { int numPartitions = Math.max((int) Math.ceil(brokerCount * _minPartitionsToBrokersRatio), minPartitionNum()); NewTopic newTopic = new NewTopic(_topic, numPartitions, (short) _replicationFactor); newTopic.configs((Map) _topicProperties); - CreateTopicsResult createTopicsResult = _adminClient.createTopics(Collections.singletonList(newTopic)); - - // waits for this topic creation future to complete, and then returns its result. - createTopicsResult.values().get(_topic).get(); - LOGGER.info("CreateTopicsResult: {}.", createTopicsResult.values()); + _topicFactory.createTopicIfNotExist(_topic, (short) _replicationFactor, _minPartitionsToBrokersRatio, + _topicProperties, _adminClient); } } diff --git a/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java b/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java index 7aaaf8d3..da172d06 100644 --- a/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java +++ b/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java @@ -10,6 +10,7 @@ package com.linkedin.kmf.services; +import com.linkedin.kmf.topicfactory.TopicFactory; import java.util.Collections; import java.util.HashSet; import java.util.Map; @@ -59,6 +60,7 @@ private void startTest() { _topicManagementHelper = Mockito.mock(MultiClusterTopicManagementService.TopicManagementHelper.class); _topicManagementHelper._topic = SERVICE_TEST_TOPIC; _topicManagementHelper._adminClient = Mockito.mock(AdminClient.class); + _topicManagementHelper._topicFactory = Mockito.mock(TopicFactory.class); _topicManagementHelper._topicCreationEnabled = true; } @@ -94,6 +96,7 @@ protected void MultiClusterTopicManagementServiceTopicCreationTest() throws Exce */ @Override public Void answer(InvocationOnMock invocation) throws Throwable { + Mockito.when(_topicManagementHelper._adminClient.describeTopics(Collections.singleton(SERVICE_TEST_TOPIC))) .thenReturn(Mockito.mock(DescribeTopicsResult.class)); Mockito.when( @@ -115,10 +118,8 @@ public Void answer(InvocationOnMock invocation) throws Throwable { } }; - Mockito.when(_topicManagementHelper._adminClient.createTopics(Mockito.anyCollection()) - .values() - .get(SERVICE_TEST_TOPIC) - .get()).thenAnswer(createKafkaTopicFutureAnswer); + Mockito.when(_topicManagementHelper._topicFactory.createTopicIfNotExist(Mockito.anyString(), Mockito.anyShort(), + Mockito.anyDouble(), Mockito.any(), Mockito.any())).thenAnswer(createKafkaTopicFutureAnswer); _topicManagementHelper.maybeCreateTopic(); From c63d44123d09ee6698106fa4dfe002e364fbc1ec Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Wed, 27 May 2020 16:29:30 -0700 Subject: [PATCH 07/89] .gitignore for random unit tests (#250) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .gitignore for random unit tests (#250)🙏🏻 Signed-off-by: Andrew Choi --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 848e31f0..a5b7096f 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,7 @@ logs/ .settings/ + + + +src/test/java/com/linkedin/kmf/RandomTests.java From 7417f2a51809685f4fbfa6703ffbf15ce714d816 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Wed, 27 May 2020 23:35:00 -0700 Subject: [PATCH 08/89] Xinfra Monitor Consumer Group Coordinator Assignment (#245) Xinfra Monitor Consumer Group Coordinator Assignment. 1. Implement Consumer Group Coordinator Assignment. 2. Bump up dependencies on Kafka and Kafka Client Signed-off-by: Andrew Choi li_andchoi@microsoft.com --- build.gradle | 4 +- .../common/ConsumerGroupCoordinatorUtils.java | 85 +++++++++++++++ .../linkedin/kmf/consumer/NewConsumer.java | 32 +++++- .../kmf/consumer/NewConsumerConfig.java | 41 +++++++ .../linkedin/kmf/services/ConsumeService.java | 19 +++- .../kmf/services/ConsumerFactoryImpl.java | 9 +- .../MultiClusterTopicManagementService.java | 57 ++++++++-- .../kmf/consumer/NewConsumerTest.java | 103 ++++++++++++++++++ 8 files changed, 322 insertions(+), 28 deletions(-) create mode 100644 src/main/java/com/linkedin/kmf/common/ConsumerGroupCoordinatorUtils.java create mode 100644 src/main/java/com/linkedin/kmf/consumer/NewConsumerConfig.java create mode 100644 src/test/java/com/linkedin/kmf/consumer/NewConsumerTest.java diff --git a/build.gradle b/build.gradle index d9c6aebb..c4c8d60c 100644 --- a/build.gradle +++ b/build.gradle @@ -38,8 +38,8 @@ allprojects { compile 'net.savantly:graphite-client:1.1.0-RELEASE' compile 'com.timgroup:java-statsd-client:3.0.1' compile 'com.signalfx.public:signalfx-codahale:0.0.47' - compile group: 'org.apache.kafka', name: 'kafka_2.12', version: '2.3.1' - compile group: 'org.apache.kafka', name: 'kafka-clients', version: '2.4.0' + compile group: 'org.apache.kafka', name: 'kafka_2.13', version: '2.5.0' + compile group: 'org.apache.kafka', name: 'kafka-clients', version: '2.5.0' testCompile 'org.mockito:mockito-core:2.24.0' testCompile 'org.testng:testng:6.8.8' } diff --git a/src/main/java/com/linkedin/kmf/common/ConsumerGroupCoordinatorUtils.java b/src/main/java/com/linkedin/kmf/common/ConsumerGroupCoordinatorUtils.java new file mode 100644 index 00000000..b2edc01d --- /dev/null +++ b/src/main/java/com/linkedin/kmf/common/ConsumerGroupCoordinatorUtils.java @@ -0,0 +1,85 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.common; + +import com.linkedin.kmf.consumer.NewConsumer; +import java.util.Collections; +import java.util.concurrent.ExecutionException; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.common.internals.Topic; +import org.apache.kafka.common.utils.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class ConsumerGroupCoordinatorUtils { + private static final Logger LOGGER = LoggerFactory.getLogger(NewConsumer.class); + private static final String CONSUMER_GROUP_PREFIX_CANDIDATE = "__shadow_consumer_group-"; + + /** + * https://github.com/apache/kafka/blob/trunk/core/src/main/scala/kafka/coordinator/group/GroupMetadataManager.scala#L189 + * The consumer group string's hash code is used for this modulo operation. + * @param groupId kafka consumer group ID + * @param consumerOffsetsTopicPartitions number of partitions in the __consumer_offsets topic. + * @return hashed integer which represents a number, the Kafka's Utils.abs() value of which is the broker + * ID of the group coordinator, or the leader of the offsets topic partition. + */ + public static int partitionFor(String groupId, int consumerOffsetsTopicPartitions) { + + LOGGER.debug("Hashed and modulo output: {}", groupId.hashCode()); + return Utils.abs(groupId.hashCode()) % consumerOffsetsTopicPartitions; + } + + /** + * Instead of making targetGroupId an instance variable and then assigning it some value which this then looks up + * it can just be a parameter to a method + * hash(group.id) % (number of __consumer_offsets topic partitions). + * The partition's leader is the group coordinator + * Choose B s.t hash(A) % (number of __consumer_offsets topic partitions) == hash(B) % (number of __consumer_offsets topic partitions) + * @param targetGroupId the identifier of the target consumer group + * @param adminClient an Admin Client object + */ + public static String findCollision(String targetGroupId, AdminClient adminClient) + throws ExecutionException, InterruptedException { + if (targetGroupId.equals("")) { + throw new IllegalArgumentException("The target consumer group identifier cannot be empty: " + targetGroupId); + } + + int numOffsetsTopicPartitions = adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME)) + .values() + .get(Topic.GROUP_METADATA_TOPIC_NAME) + .get() + .partitions() + .size(); + + // Extract invariant from loop + int targetConsumerOffsetsPartition = partitionFor(targetGroupId, numOffsetsTopicPartitions); + + // This doesn't need to be an instance variable because we throw this out this value at the end of computation + int groupSuffix = 0; + + // Extract return value so it's not computed twice, this reduces the possibility of bugs + String newConsumerGroup; + + // Use while(true) otherwise halting condition is hard to read. + while (true) { + // TODO: could play fancy StringBuilder games here to make this generate less garbage + newConsumerGroup = CONSUMER_GROUP_PREFIX_CANDIDATE + groupSuffix++; + int newGroupNamePartition = ConsumerGroupCoordinatorUtils.partitionFor(newConsumerGroup, numOffsetsTopicPartitions); + if (newGroupNamePartition == targetConsumerOffsetsPartition) { + break; + } + } + + return newConsumerGroup; + } +} + diff --git a/src/main/java/com/linkedin/kmf/consumer/NewConsumer.java b/src/main/java/com/linkedin/kmf/consumer/NewConsumer.java index fac8fcf0..e1ddf56c 100644 --- a/src/main/java/com/linkedin/kmf/consumer/NewConsumer.java +++ b/src/main/java/com/linkedin/kmf/consumer/NewConsumer.java @@ -10,11 +10,15 @@ package com.linkedin.kmf.consumer; +import com.linkedin.kmf.common.ConsumerGroupCoordinatorUtils; import java.time.Duration; import java.util.Collections; import java.util.Iterator; import java.util.Map; import java.util.Properties; +import java.util.concurrent.ExecutionException; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.clients.consumer.OffsetAndMetadata; @@ -23,25 +27,42 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/* - * Wrap around the new consumer from Apache Kafka and implement the #KMBaseConsumer interface + +/** + * Wraps around the new consumer from Apache Kafka and implements the #KMBaseConsumer interface */ public class NewConsumer implements KMBaseConsumer { private final KafkaConsumer _consumer; private Iterator> _recordIter; - private static final Logger LOG = LoggerFactory.getLogger(NewConsumer.class); + private static final Logger LOGGER = LoggerFactory.getLogger(NewConsumer.class); private static long lastCommitted; - public NewConsumer(String topic, Properties consumerProperties) { + public NewConsumer(String topic, Properties consumerProperties, AdminClient adminClient) + throws ExecutionException, InterruptedException { + LOGGER.info("{} is being instantiated in the constructor..", this.getClass().getSimpleName()); + + NewConsumerConfig newConsumerConfig = new NewConsumerConfig(consumerProperties); + String targetConsumerGroupId = newConsumerConfig.getString(NewConsumerConfig.TARGET_CONSUMER_GROUP_ID_CONFIG); + + if (targetConsumerGroupId != null) { + consumerProperties.put(ConsumerConfig.GROUP_ID_CONFIG, configureGroupId(targetConsumerGroupId, adminClient)); + } _consumer = new KafkaConsumer<>(consumerProperties); _consumer.subscribe(Collections.singletonList(topic)); } + static String configureGroupId(String targetConsumerGroupId, AdminClient adminClient) + throws ExecutionException, InterruptedException { + + return ConsumerGroupCoordinatorUtils.findCollision(targetConsumerGroupId, adminClient); + } + @Override public BaseConsumerRecord receive() { - if (_recordIter == null || !_recordIter.hasNext()) + if (_recordIter == null || !_recordIter.hasNext()) { _recordIter = _consumer.poll(Duration.ofMillis(Long.MAX_VALUE)).iterator(); + } ConsumerRecord record = _recordIter.next(); return new BaseConsumerRecord(record.topic(), record.partition(), record.offset(), record.key(), record.value()); @@ -81,5 +102,4 @@ public long lastCommitted() { public void updateLastCommit() { lastCommitted = System.currentTimeMillis(); } - } diff --git a/src/main/java/com/linkedin/kmf/consumer/NewConsumerConfig.java b/src/main/java/com/linkedin/kmf/consumer/NewConsumerConfig.java new file mode 100644 index 00000000..269a5068 --- /dev/null +++ b/src/main/java/com/linkedin/kmf/consumer/NewConsumerConfig.java @@ -0,0 +1,41 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +package com.linkedin.kmf.consumer; + +import java.util.Map; +import org.apache.kafka.common.config.AbstractConfig; +import org.apache.kafka.common.config.ConfigDef; + + +/** + * Configuration for Xinfra Monitor New Consumer + */ +public class NewConsumerConfig extends AbstractConfig { + + private static final ConfigDef CONFIG_DEF; + + public static final String TARGET_CONSUMER_GROUP_ID_CONFIG = "target.consumer.group.id"; + public static final String TARGET_CONSUMER_GROUP_ID_CONFIG_DOC = + "When defined a consumer group is chosen such that it maps to the same group coordinator as the specified " + + "group coordinator."; + + static { + CONFIG_DEF = new ConfigDef().define(TARGET_CONSUMER_GROUP_ID_CONFIG, + ConfigDef.Type.STRING, + null, + ConfigDef.Importance.MEDIUM, + TARGET_CONSUMER_GROUP_ID_CONFIG_DOC); + } + + public NewConsumerConfig(Map props) { + super(CONFIG_DEF, props); + } +} + diff --git a/src/main/java/com/linkedin/kmf/services/ConsumeService.java b/src/main/java/com/linkedin/kmf/services/ConsumeService.java index fed2c6d0..1a6535d8 100644 --- a/src/main/java/com/linkedin/kmf/services/ConsumeService.java +++ b/src/main/java/com/linkedin/kmf/services/ConsumeService.java @@ -57,7 +57,7 @@ public class ConsumeService implements Service { private CommitAvailabilityMetrics _commitAvailabilityMetrics; private CommitLatencyMetrics _commitLatencyMetrics; private String _topic; - private String _name; + private final String _name; private static final String METRIC_GROUP_NAME = "consume-service"; private static Map tags; @@ -73,8 +73,8 @@ public class ConsumeService implements Service { * @param name Name of the Monitor instance * @param topicPartitionResult The completable future for topic partition * @param consumerFactory Consumer Factory object. - * @throws ExecutionException - * @throws InterruptedException + * @throws ExecutionException when attempting to retrieve the result of a task that aborted by throwing an exception + * @throws InterruptedException when a thread is waiting, sleeping, or otherwise occupied and the thread is interrupted */ public ConsumeService(String name, CompletableFuture topicPartitionResult, @@ -86,6 +86,8 @@ public ConsumeService(String name, _adminClient = consumerFactory.adminClient(); _running = new AtomicBoolean(false); + // Returns a new CompletionStage (topicPartitionFuture) which + // executes the given action - code inside run() - when this stage (topicPartitionResult) completes normally,. CompletableFuture topicPartitionFuture = topicPartitionResult.thenRun(() -> { MetricConfig metricConfig = new MetricConfig().samples(60).timeWindow(1000, TimeUnit.MILLISECONDS); List reporters = new ArrayList<>(); @@ -94,8 +96,10 @@ public ConsumeService(String name, tags = new HashMap<>(); tags.put(TAGS_NAME, name); _topic = consumerFactory.topic(); - _sensors = new ConsumeMetrics(metrics, tags, consumerFactory.latencyPercentileMaxMs(), consumerFactory.latencyPercentileGranularityMs()); - _commitLatencyMetrics = new CommitLatencyMetrics(metrics, tags, consumerFactory.latencyPercentileMaxMs(), consumerFactory.latencyPercentileGranularityMs()); + _sensors = new ConsumeMetrics(metrics, tags, consumerFactory.latencyPercentileMaxMs(), + consumerFactory.latencyPercentileGranularityMs()); + _commitLatencyMetrics = new CommitLatencyMetrics(metrics, tags, consumerFactory.latencyPercentileMaxMs(), + consumerFactory.latencyPercentileGranularityMs()); _commitAvailabilityMetrics = new CommitAvailabilityMetrics(metrics, tags); _consumeThread = new Thread(() -> { try { @@ -107,6 +111,7 @@ public ConsumeService(String name, _consumeThread.setDaemon(true); }); + // In a blocking fashion, waits for this topicPartitionFuture to complete, and then returns its result. topicPartitionFuture.get(); } @@ -124,6 +129,7 @@ record = _baseConsumer.receive(); _sensors._consumeError.record(); LOG.warn(_name + "/ConsumeService failed to receive record", e); /* Avoid busy while loop */ + //noinspection BusyWait Thread.sleep(CONSUME_THREAD_SLEEP_MS); continue; } @@ -134,7 +140,7 @@ record = _baseConsumer.receive(); try { avroRecord = Utils.genericRecordFromJson(record.value()); } catch (Exception exception) { - LOG.error("exception occurred while getting avro record.", exception); + LOG.error("An exception occurred while getting avro record.", exception); } if (avroRecord == null) { @@ -232,6 +238,7 @@ public synchronized void start() { } catch (InterruptedException | ExecutionException e) { LOG.error("Exception occurred while getting the topicDescriptionKafkaFuture for topic: {}", _topic, e); } + @SuppressWarnings("ConstantConditions") double partitionCount = topicDescription.partitions().size(); topicPartitionCount.add( new MetricName("topic-partitions-count", METRIC_GROUP_NAME, "The total number of partitions for the topic.", tags), diff --git a/src/main/java/com/linkedin/kmf/services/ConsumerFactoryImpl.java b/src/main/java/com/linkedin/kmf/services/ConsumerFactoryImpl.java index 49626551..f08e2472 100644 --- a/src/main/java/com/linkedin/kmf/services/ConsumerFactoryImpl.java +++ b/src/main/java/com/linkedin/kmf/services/ConsumerFactoryImpl.java @@ -28,16 +28,17 @@ public class ConsumerFactoryImpl implements ConsumerFactory { private final KMBaseConsumer _baseConsumer; - private String _topic; + private final String _topic; private static final String FALSE = "false"; private final int _latencyPercentileMaxMs; private final int _latencyPercentileGranularityMs; private static final String[] NON_OVERRIDABLE_PROPERTIES = new String[] {ConsumeServiceConfig.BOOTSTRAP_SERVERS_CONFIG, ConsumeServiceConfig.ZOOKEEPER_CONNECT_CONFIG}; - private int _latencySlaMs; + private final int _latencySlaMs; private static AdminClient adminClient; private static final Logger LOG = LoggerFactory.getLogger(ConsumerFactoryImpl.class); + @SuppressWarnings("rawtypes") public ConsumerFactoryImpl(Map props) throws Exception { LOG.info("Creating AdminClient."); adminClient = AdminClient.create(props); @@ -80,7 +81,9 @@ public ConsumerFactoryImpl(Map props) throws Exception { props.forEach(consumerProps::putIfAbsent); } - _baseConsumer = (KMBaseConsumer) Class.forName(consumerClassName).getConstructor(String.class, Properties.class).newInstance(_topic, consumerProps); + _baseConsumer = (KMBaseConsumer) Class.forName(consumerClassName) + .getConstructor(String.class, Properties.class, AdminClient.class) + .newInstance(_topic, consumerProps, adminClient()); } diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java index bc6f923c..088e5f98 100644 --- a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java +++ b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java @@ -32,6 +32,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import kafka.admin.AdminUtils; import kafka.admin.BrokerMetadata; +import kafka.controller.ReplicaAssignment; import kafka.server.ConfigType; import kafka.zk.KafkaZkClient; import org.apache.kafka.clients.admin.AdminClient; @@ -342,8 +343,8 @@ private Set getAvailableBrokers() throws ExecutionException, InterruptedEx } void maybeReassignPartitionAndElectLeader() throws Exception { - try (KafkaZkClient zkClient = KafkaZkClient.apply(_zkConnect, JaasUtils.isZkSecurityEnabled(), com.linkedin.kmf.common.Utils.ZK_SESSION_TIMEOUT_MS, - com.linkedin.kmf.common.Utils.ZK_CONNECTION_TIMEOUT_MS, Integer.MAX_VALUE, Time.SYSTEM, METRIC_GROUP_NAME, "SessionExpireListener", null)) { + try (KafkaZkClient zkClient = KafkaZkClient.apply(_zkConnect, JaasUtils.isZkSaslEnabled(), com.linkedin.kmf.common.Utils.ZK_SESSION_TIMEOUT_MS, + com.linkedin.kmf.common.Utils.ZK_CONNECTION_TIMEOUT_MS, Integer.MAX_VALUE, Time.SYSTEM, METRIC_GROUP_NAME, "SessionExpireListener", null, null)) { List partitionInfoList = _adminClient .describeTopics(Collections.singleton(_topic)).all().get().get(_topic).partitions(); @@ -416,8 +417,8 @@ void maybeElectLeader() throws Exception { return; } - try (KafkaZkClient zkClient = KafkaZkClient.apply(_zkConnect, JaasUtils.isZkSecurityEnabled(), com.linkedin.kmf.common.Utils.ZK_SESSION_TIMEOUT_MS, - com.linkedin.kmf.common.Utils.ZK_CONNECTION_TIMEOUT_MS, Integer.MAX_VALUE, Time.SYSTEM, METRIC_GROUP_NAME, "SessionExpireListener", null)) { + try (KafkaZkClient zkClient = KafkaZkClient.apply(_zkConnect, JaasUtils.isZkSaslEnabled(), com.linkedin.kmf.common.Utils.ZK_SESSION_TIMEOUT_MS, + com.linkedin.kmf.common.Utils.ZK_CONNECTION_TIMEOUT_MS, Integer.MAX_VALUE, Time.SYSTEM, METRIC_GROUP_NAME, "SessionExpireListener", null, null)) { if (!zkClient.reassignPartitionsInProgress()) { List partitionInfoList = _adminClient .describeTopics(Collections.singleton(_topic)).all().get().get(_topic).partitions(); @@ -462,9 +463,10 @@ private static void reassignPartitions(KafkaZkClient zkClient, Collection } scala.collection.immutable.Set topicList = new scala.collection.immutable.Set.Set1<>(topic); - scala.collection.Map> currentAssignment = zkClient.getPartitionAssignmentForTopics(topicList).apply(topic); - String currentAssignmentJson = formatAsReassignmentJson(topic, currentAssignment); - String newAssignmentJson = formatAsReassignmentJson(topic, assignedReplicas); + scala.collection.Map + currentAssignment = zkClient.getPartitionAssignmentForTopics(topicList).apply(topic); + String currentAssignmentJson = formatAsOldAssignmentJson(topic, currentAssignment); + String newAssignmentJson = formatAsNewReassignmentJson(topic, assignedReplicas); LOGGER.info("Reassign partitions for topic " + topic); LOGGER.info("Current partition replica assignment " + currentAssignmentJson); @@ -521,14 +523,14 @@ static boolean someBrokerNotElectedLeader(List partitionInfo * {"topic":"kmf-topic","partition":0,"replicas":[2,0]}]} * */ - private static String formatAsReassignmentJson(String topic, scala.collection.Map> partitionsToBeReassigned) { + private static String formatAsOldAssignmentJson(String topic, scala.collection.Map partitionsToBeReassigned) { StringBuilder bldr = new StringBuilder(); bldr.append("{\"version\":1,\"partitions\":[\n"); for (int partition = 0; partition < partitionsToBeReassigned.size(); partition++) { bldr.append(" {\"topic\":\"").append(topic).append("\",\"partition\":").append(partition).append(",\"replicas\":["); - scala.collection.Seq replicas = partitionsToBeReassigned.apply(partition); - for (int replicaIndex = 0; replicaIndex < replicas.size(); replicaIndex++) { - Object replica = replicas.apply(replicaIndex); + ReplicaAssignment replicas = partitionsToBeReassigned.apply(partition); + for (int replicaIndex = 0; replicaIndex < replicas.replicas().size(); replicaIndex++) { + Object replica = replicas.replicas().apply(replicaIndex); bldr.append(replica).append(","); } bldr.setLength(bldr.length() - 1); @@ -538,5 +540,38 @@ private static String formatAsReassignmentJson(String topic, scala.collection.Ma bldr.append("]}"); return bldr.toString(); } + + /** + * @param topic Kafka topic + * @param partitionsToReassign a map from partition (int) to new replica list (int seq) + * + * @return a json string with the same format as output of kafka.utils.ZkUtils.formatAsReassignmentJson + * + * Example: + *
+     *   {"version":1,"partitions":[
+     *     {"topic":"kmf-topic","partition":1,"replicas":[0,1]},
+     *     {"topic":"kmf-topic","partition":2,"replicas":[1,2]},
+     *     {"topic":"kmf-topic","partition":0,"replicas":[2,0]}]}
+     * 
+ */ + private static String formatAsNewReassignmentJson(String topic, scala.collection.Map> partitionsToReassign) { + StringBuilder builder = new StringBuilder(); + builder.append("{\"version\":1,\"partitions\":[\n"); + for (int partition = 0; partition < partitionsToReassign.size(); partition++) { + builder.append(" {\"topic\":\"").append(topic).append("\",\"partition\":").append(partition).append(",\"replicas\":["); + Seq replicas = partitionsToReassign.apply(partition); + for (int replicaIndex = 0; replicaIndex < replicas.size(); replicaIndex++) { + Object replica = replicas.apply(replicaIndex); + builder.append(replica).append(","); + } + builder.setLength(builder.length() - 1); + builder.append("]},\n"); + } + builder.setLength(builder.length() - 2); + builder.append("]}"); + return builder.toString(); + } + } } diff --git a/src/test/java/com/linkedin/kmf/consumer/NewConsumerTest.java b/src/test/java/com/linkedin/kmf/consumer/NewConsumerTest.java new file mode 100644 index 00000000..b7e8b4cc --- /dev/null +++ b/src/test/java/com/linkedin/kmf/consumer/NewConsumerTest.java @@ -0,0 +1,103 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.consumer; + +import com.linkedin.kmf.common.ConsumerGroupCoordinatorUtils; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.concurrent.ExecutionException; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.DescribeTopicsResult; +import org.apache.kafka.clients.admin.TopicDescription; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.common.internals.KafkaFutureImpl; +import org.apache.kafka.common.internals.Topic; +import org.mockito.Mockito; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + + +@Test +public class NewConsumerTest { + private static final int NUM_OFFSETS_TOPIC_PARTITIONS = 5; + private static final String TARGET_CONSUMER_GROUP_ID = "target-group-id"; + + @BeforeMethod + public void beforeMethod() { + System.out.println("Running beforeMethod of " + this.getClass()); + } + + @AfterMethod + public void afterMethod() { + System.out.println("Finished running testConsumerGroupCoordinatorHashing() of " + this.getClass()); + } + + @SuppressWarnings("unchecked") + @Test + public void testConsumerGroupCoordinatorHashing() throws ExecutionException, InterruptedException { + Properties consumerProperties = new Properties(); + + AdminClient adminClient = Mockito.mock(AdminClient.class); + + /* + * Mock the behavior of AdminClient only. + */ + Mockito.when(adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME))) + .thenReturn(Mockito.mock(DescribeTopicsResult.class)); + Mockito.when(adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME)).values()) + .thenReturn(Mockito.mock(Map.class)); + Mockito.when(adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME)) + .values() + .get(Topic.GROUP_METADATA_TOPIC_NAME)).thenReturn(Mockito.mock(KafkaFutureImpl.class)); + + Mockito.when(adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME)) + .values() + .get(Topic.GROUP_METADATA_TOPIC_NAME) + .get()).thenReturn(Mockito.mock(TopicDescription.class)); + + Mockito.when(adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME)) + .values() + .get(Topic.GROUP_METADATA_TOPIC_NAME) + .get() + .partitions()).thenReturn(Mockito.mock(List.class)); + + Mockito.when(adminClient.describeTopics(Collections.singleton(Topic.GROUP_METADATA_TOPIC_NAME)) + .values() + .get(Topic.GROUP_METADATA_TOPIC_NAME) + .get() + .partitions() + .size()).thenReturn(NUM_OFFSETS_TOPIC_PARTITIONS); + + consumerProperties.put(ConsumerConfig.GROUP_ID_CONFIG, + NewConsumer.configureGroupId(TARGET_CONSUMER_GROUP_ID, adminClient)); + System.out.println("Consumer properties after configuration: " + consumerProperties); + Assert.assertNotNull(consumerProperties.get(ConsumerConfig.GROUP_ID_CONFIG)); + + // Testing I: run partitionsFor() on the result to make sure they are the same + int hashedResult = + ConsumerGroupCoordinatorUtils.partitionFor(consumerProperties.get(ConsumerConfig.GROUP_ID_CONFIG).toString(), + NUM_OFFSETS_TOPIC_PARTITIONS); + int hashedResult2 = + ConsumerGroupCoordinatorUtils.partitionFor(TARGET_CONSUMER_GROUP_ID, NUM_OFFSETS_TOPIC_PARTITIONS); + + Assert.assertEquals(hashedResult, hashedResult2); + System.out.println("Modulo result as an absolute value: " + hashedResult); + System.out.println("Modulo result as an absolute value: " + hashedResult2); + + // Testing II: Also test that the groupIds are different. + Assert.assertNotEquals(TARGET_CONSUMER_GROUP_ID, consumerProperties.get(ConsumerConfig.GROUP_ID_CONFIG)); + + } +} From d037baf9150bec502db36bfb16eb54e8b5b94870 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Thu, 28 May 2020 22:35:45 -0700 Subject: [PATCH 09/89] Partition Assignments when adding new topic partitions (#249) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Random assignment of partition assignments when adding partitions periodically Justifications: regardless of the partition/replica assignments here, maybeReassignPartitionAndElectLeader() will reassign the partition as needed periodically. 🚣🏻 Signed-off-by: Andrew Choi --- .../MultiClusterTopicManagementService.java | 54 +++++++++++++++++-- ...ultiClusterTopicManagementServiceTest.java | 30 ++++++++++- 2 files changed, 77 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java index 088e5f98..ce7ee6f2 100644 --- a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java +++ b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java @@ -37,6 +37,7 @@ import kafka.zk.KafkaZkClient; import org.apache.kafka.clients.admin.AdminClient; import org.apache.kafka.clients.admin.AdminClientConfig; +import org.apache.kafka.clients.admin.CreatePartitionsResult; import org.apache.kafka.clients.admin.ElectLeadersOptions; import org.apache.kafka.clients.admin.ElectLeadersResult; import org.apache.kafka.clients.admin.NewPartitions; @@ -302,7 +303,6 @@ void maybeAddPartitions(int minPartitionNum) throws ExecutionException, Interrup LOGGER.info("{} will increase partition of the topic {} in the cluster from {}" + " to {}.", this.getClass().toString(), _topic, partitionNum, minPartitionNum); Set blackListedBrokers = _topicFactory.getBlackListedBrokers(_zkConnect); - List> replicaAssignment = new ArrayList<>(new ArrayList<>()); Set brokers = new HashSet<>(); for (Node broker : _adminClient.describeCluster().nodes().get()) { BrokerMetadata brokerMetadata = new BrokerMetadata( @@ -314,11 +314,57 @@ void maybeAddPartitions(int minPartitionNum) throws ExecutionException, Interrup if (!blackListedBrokers.isEmpty()) { brokers.removeIf(broker -> blackListedBrokers.contains(broker.id())); } + + List> newPartitionAssignments = newPartitionAssignments(minPartitionNum, partitionNum, brokers, _replicationFactor); + + NewPartitions newPartitions = NewPartitions.increaseTo(minPartitionNum, newPartitionAssignments); + Map newPartitionsMap = new HashMap<>(); - NewPartitions newPartitions = NewPartitions.increaseTo(minPartitionNum, replicaAssignment); newPartitionsMap.put(_topic, newPartitions); - _adminClient.createPartitions(newPartitionsMap); + CreatePartitionsResult createPartitionsResult = _adminClient.createPartitions(newPartitionsMap); + + createPartitionsResult.all().get(); + } + } + + static List> newPartitionAssignments(int minPartitionNum, int partitionNum, + Set brokers, int rf) { + + // The replica assignments for the new partitions, and not the old partitions. + // .increaseTo(6, asList(asList(1, 2), + // asList(2, 3), + // asList(3, 1))) + // partition 3's preferred leader will be broker 1, + // partition 4's preferred leader will be broker 2 and + // partition 5's preferred leader will be broker 3. + List> newPartitionAssignments = new ArrayList<>(new ArrayList<>()); + int partitionDifference = minPartitionNum - partitionNum; + + // leader assignments - + for (BrokerMetadata brokerMetadata : brokers) { + List replicas = new ArrayList<>(); + // leader replica/broker - + replicas.add(brokerMetadata.id()); + newPartitionAssignments.add(replicas); + if (newPartitionAssignments.size() == partitionDifference) { + break; + } } + + // follower assignments - + // Regardless of the partition/replica assignments here, maybeReassignPartitionAndElectLeader() + // will reassign the partition as needed periodically. + for (List replicas : newPartitionAssignments) { + for (BrokerMetadata broker : brokers) { + if (!replicas.contains(broker.id())) { + replicas.add(broker.id()); + } + if (replicas.size() == rf) { + break; + } + } + } + return newPartitionAssignments; } /** @@ -329,8 +375,6 @@ void maybeAddPartitions(int minPartitionNum) throws ExecutionException, Interrup */ int numPartitions() throws InterruptedException, ExecutionException { - // TODO (andrewchoi5): connect this to unit testing method for testing maybeAddPartitions! - return _adminClient.describeTopics(Collections.singleton(_topic)).values().get(_topic).get().partitions().size(); } diff --git a/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java b/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java index da172d06..e97bb407 100644 --- a/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java +++ b/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java @@ -12,9 +12,11 @@ import com.linkedin.kmf.topicfactory.TopicFactory; import java.util.Collections; -import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; import java.util.Map; import java.util.Set; +import kafka.admin.BrokerMetadata; import org.apache.kafka.clients.admin.AdminClient; import org.apache.kafka.clients.admin.CreateTopicsResult; import org.apache.kafka.clients.admin.DescribeClusterResult; @@ -29,6 +31,7 @@ import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import scala.Option; /** @@ -37,6 +40,7 @@ @SuppressWarnings("unchecked") @Test public class MultiClusterTopicManagementServiceTest { + private static final String SERVICE_TEST_TOPIC = "xinfra-monitor-Multi-Cluster-Topic-Management-Service-Test-topic"; private static Set nodeSet; private MultiClusterTopicManagementService.TopicManagementHelper _topicManagementHelper; @@ -50,12 +54,15 @@ private void startTest() { _kafkaFutureMap = Mockito.mock(Map.class); _kafkaFuture = Mockito.mock(KafkaFuture.class); - nodeSet = new HashSet<>(); + nodeSet = new LinkedHashSet<>(); nodeSet.add(new Node(1, "host-1", 2132)); nodeSet.add(new Node(2, "host-2", 2133)); nodeSet.add(new Node(3, "host-3", 2134)); nodeSet.add(new Node(4, "host-4", 2135)); nodeSet.add(new Node(5, "host-5", 2136)); + nodeSet.add(new Node(6, "host-5", 2136)); + nodeSet.add(new Node(7, "host-5", 2136)); + nodeSet.add(new Node(8, "host-5", 2136)); _topicManagementHelper = Mockito.mock(MultiClusterTopicManagementService.TopicManagementHelper.class); _topicManagementHelper._topic = SERVICE_TEST_TOPIC; @@ -69,6 +76,25 @@ private void finishTest() { System.out.println("Finished " + this.getClass().getCanonicalName().toLowerCase() + "."); } + @Test(invocationCount = 2) + protected void maybeAddPartitionsTest() { + Set brokerMetadataSet = new LinkedHashSet<>(); + for (Node broker : nodeSet) { + brokerMetadataSet.add(new BrokerMetadata(broker.id(), Option.apply(broker.rack()))); + } + List> newPartitionAssignments = + MultiClusterTopicManagementService.TopicManagementHelper.newPartitionAssignments(11, 5, brokerMetadataSet, 4); + Assert.assertNotNull(newPartitionAssignments); + + System.out.println(newPartitionAssignments); + Assert.assertEquals(newPartitionAssignments.get(0).get(0).intValue(), 1); + Assert.assertEquals(newPartitionAssignments.get(1).get(0).intValue(), 2); + Assert.assertEquals(newPartitionAssignments.get(2).get(0).intValue(), 3); + Assert.assertEquals(newPartitionAssignments.get(3).get(0).intValue(), 4); + Assert.assertEquals(newPartitionAssignments.get(4).get(0).intValue(), 5); + Assert.assertEquals(newPartitionAssignments.get(5).get(0).intValue(), 6); + } + @Test protected void MultiClusterTopicManagementServiceTopicCreationTest() throws Exception { From 0877cde68d89a3fd9f64f703f47ba1394b4c8a9d Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Fri, 29 May 2020 00:49:42 -0700 Subject: [PATCH 10/89] optional Signed-off-by: Andrew Choi --- src/main/java/com/linkedin/kmf/common/Utils.java | 5 +++-- .../kmf/services/MultiClusterTopicManagementService.java | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/linkedin/kmf/common/Utils.java b/src/main/java/com/linkedin/kmf/common/Utils.java index 0d2c2fd5..8aa0287f 100644 --- a/src/main/java/com/linkedin/kmf/common/Utils.java +++ b/src/main/java/com/linkedin/kmf/common/Utils.java @@ -88,6 +88,7 @@ public static int createTopicIfNotExists(String topic, short replicationFactor, throws ExecutionException, InterruptedException { try { if (adminClient.listTopics().names().get().contains(topic)) { + LOG.info("AdminClient indicates that {} already exists in the cluster. Topic config: {}", topic, topicConfig); return getPartitionNumForTopic(adminClient, topic); } int brokerCount = Utils.getBrokerCount(adminClient); @@ -107,7 +108,7 @@ public static int createTopicIfNotExists(String topic, short replicationFactor, } catch (TopicExistsException e) { /* There is a race condition with the consumer. */ - LOG.debug("Monitoring topic " + topic + " already exists in the cluster.", e); + LOG.info("Monitoring topic " + topic + " already exists in the cluster.", e); return getPartitionNumForTopic(adminClient, topic); } LOG.info("Created monitoring topic {} in cluster with {} partitions and replication factor of {}.", topic, @@ -115,7 +116,7 @@ public static int createTopicIfNotExists(String topic, short replicationFactor, return partitionCount; } finally { - LOG.info("Completed the topic creation if it doesn't exist for {}", topic); + LOG.info("Completed the topic creation if it doesn't exist for {}.", topic); } } diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java index ce7ee6f2..ee8f5d47 100644 --- a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java +++ b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java @@ -54,6 +54,7 @@ import org.apache.kafka.common.utils.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import scala.Option; import scala.Option$; import scala.collection.Seq; @@ -387,8 +388,9 @@ private Set getAvailableBrokers() throws ExecutionException, InterruptedEx } void maybeReassignPartitionAndElectLeader() throws Exception { - try (KafkaZkClient zkClient = KafkaZkClient.apply(_zkConnect, JaasUtils.isZkSaslEnabled(), com.linkedin.kmf.common.Utils.ZK_SESSION_TIMEOUT_MS, - com.linkedin.kmf.common.Utils.ZK_CONNECTION_TIMEOUT_MS, Integer.MAX_VALUE, Time.SYSTEM, METRIC_GROUP_NAME, "SessionExpireListener", null, null)) { + try (KafkaZkClient zkClient = KafkaZkClient.apply(_zkConnect, JaasUtils.isZkSaslEnabled(), + com.linkedin.kmf.common.Utils.ZK_SESSION_TIMEOUT_MS, com.linkedin.kmf.common.Utils.ZK_CONNECTION_TIMEOUT_MS, + Integer.MAX_VALUE, Time.SYSTEM, METRIC_GROUP_NAME, "SessionExpireListener", null, Option.apply(null))) { List partitionInfoList = _adminClient .describeTopics(Collections.singleton(_topic)).all().get().get(_topic).partitions(); From 8793d3234eb196f2668daa137d576510e16e7c5d Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Fri, 29 May 2020 16:54:51 -0700 Subject: [PATCH 11/89] apache kafka scala version 2.13 with code version 2.4.1 (#251) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit apache.kafka use version : '2.4.1' justification: the internal kmf multiproduct currently employs scala version _2.11. Moreover, Anasi (or ELR tool) doesn't allow for intaking scala _2.11 with apache kafka code version 2.5.0 because the maven service doesn't have, and thus does not release, the scala _2.11 version 2.5.0 . we cannot bump up to scala version _2.13 in the kafka-monitoring multiproduct because there are many, many multiproduct dependencies on the scala version _2.11 that have not been ELR.ed yet. Justifications for merging this: the PR won’t break any dependencies because the bump to 2.5.0 happened very recently in this repository 2 - internal MP (xinfra monitor) can be bumped up to scala kafka_2.12 but even that requires modifications to several kmf-depended multiproducts, including trackingrest, container, etc.. which could easily take a long time to all complete. bumping up kmf internal mp to scala 2.12 requires these changes which could take some time to code-complete, merge, and get released. What went wrong: Execution failed for task ':likafka-monitoring:kafka-monitoring-impl_2.12:compileJava'. Could not resolve all files for configuration ':likafka-monitoring:kafka-monitoring-impl_2.12:compileClasspath'. Could not find com.linkedin.linkedin-kafka-clients:linkedin-kafka-clients-message-processor_2.12:8.0.204. Required by: project :likafka-monitoring:kafka-monitoring-impl_2.12 > com.linkedin.kafka-rest:tracking-rest-impl:0.3.150 Could not find com.linkedin.container:tracker-consumer-internal_2.12:36.12.6. Required by: project :likafka-monitoring:kafka-monitoring-impl_2.12 > com.linkedin.kafka-rest:tracking-rest-impl:0.3.150 > com.linkedin.container:tracker-consumer-api:37.1.63 Could not find com.linkedin.likafka-cruise-control:likafka-cruise-control-impl_2.12:1.0.17. Required by: project :likafka-monitoring:kafka-monitoring-impl_2.12 > com.linkedin.kafka-rest:tracking-rest-impl:0.3.150 > com.linkedin.kafka-server:kafka-impl:2.0.0.71 lets have kmf mp stay in 2.11 for now. --- build.gradle | 4 ++-- .../kmf/services/MultiClusterTopicManagementService.java | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/build.gradle b/build.gradle index c4c8d60c..7a58826a 100644 --- a/build.gradle +++ b/build.gradle @@ -38,8 +38,8 @@ allprojects { compile 'net.savantly:graphite-client:1.1.0-RELEASE' compile 'com.timgroup:java-statsd-client:3.0.1' compile 'com.signalfx.public:signalfx-codahale:0.0.47' - compile group: 'org.apache.kafka', name: 'kafka_2.13', version: '2.5.0' - compile group: 'org.apache.kafka', name: 'kafka-clients', version: '2.5.0' + compile group: 'org.apache.kafka', name: 'kafka_2.13', version: '2.4.1' + compile group: 'org.apache.kafka', name: 'kafka-clients', version: '2.4.1' testCompile 'org.mockito:mockito-core:2.24.0' testCompile 'org.testng:testng:6.8.8' } diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java index ee8f5d47..9b7ad805 100644 --- a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java +++ b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java @@ -54,7 +54,6 @@ import org.apache.kafka.common.utils.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import scala.Option; import scala.Option$; import scala.collection.Seq; @@ -388,9 +387,9 @@ private Set getAvailableBrokers() throws ExecutionException, InterruptedEx } void maybeReassignPartitionAndElectLeader() throws Exception { - try (KafkaZkClient zkClient = KafkaZkClient.apply(_zkConnect, JaasUtils.isZkSaslEnabled(), + try (KafkaZkClient zkClient = KafkaZkClient.apply(_zkConnect, JaasUtils.isZkSecurityEnabled(), com.linkedin.kmf.common.Utils.ZK_SESSION_TIMEOUT_MS, com.linkedin.kmf.common.Utils.ZK_CONNECTION_TIMEOUT_MS, - Integer.MAX_VALUE, Time.SYSTEM, METRIC_GROUP_NAME, "SessionExpireListener", null, Option.apply(null))) { + Integer.MAX_VALUE, Time.SYSTEM, METRIC_GROUP_NAME, "SessionExpireListener", null)) { List partitionInfoList = _adminClient .describeTopics(Collections.singleton(_topic)).all().get().get(_topic).partitions(); @@ -463,8 +462,8 @@ void maybeElectLeader() throws Exception { return; } - try (KafkaZkClient zkClient = KafkaZkClient.apply(_zkConnect, JaasUtils.isZkSaslEnabled(), com.linkedin.kmf.common.Utils.ZK_SESSION_TIMEOUT_MS, - com.linkedin.kmf.common.Utils.ZK_CONNECTION_TIMEOUT_MS, Integer.MAX_VALUE, Time.SYSTEM, METRIC_GROUP_NAME, "SessionExpireListener", null, null)) { + try (KafkaZkClient zkClient = KafkaZkClient.apply(_zkConnect, JaasUtils.isZkSecurityEnabled(), com.linkedin.kmf.common.Utils.ZK_SESSION_TIMEOUT_MS, + com.linkedin.kmf.common.Utils.ZK_CONNECTION_TIMEOUT_MS, Integer.MAX_VALUE, Time.SYSTEM, METRIC_GROUP_NAME, "SessionExpireListener", null)) { if (!zkClient.reassignPartitionsInProgress()) { List partitionInfoList = _adminClient .describeTopics(Collections.singleton(_topic)).all().get().get(_topic).partitions(); From 31efd33d2a704523cd94bf588a92fefb7f6bb28c Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Sat, 30 May 2020 19:52:55 -0700 Subject: [PATCH 12/89] scala 2.21 (#254) Signed-off-by: Andrew Choi --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 7a58826a..ec106864 100644 --- a/build.gradle +++ b/build.gradle @@ -38,7 +38,7 @@ allprojects { compile 'net.savantly:graphite-client:1.1.0-RELEASE' compile 'com.timgroup:java-statsd-client:3.0.1' compile 'com.signalfx.public:signalfx-codahale:0.0.47' - compile group: 'org.apache.kafka', name: 'kafka_2.13', version: '2.4.1' + compile group: 'org.apache.kafka', name: 'kafka_2.12', version: '2.4.1' compile group: 'org.apache.kafka', name: 'kafka-clients', version: '2.4.1' testCompile 'org.mockito:mockito-core:2.24.0' testCompile 'org.testng:testng:6.8.8' From b4f9d9e8bede94f22ecfe5729f7f89f93cf0fae4 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Sun, 31 May 2020 01:17:29 -0700 Subject: [PATCH 13/89] Use of org.apache.kafka - version: 2.3.1 #255 Use of org.apache.kafka - version: 2.3.1 Justification: Current linkedin kafka repository, "kafka": "com.linkedin.kafka:kafka_2.12:2.3.0.20", which internal kmf mp depends on, still uses version 2.3.0.20. This linkedin kafka version uses def getPartitionAssignmentForTopics(topics: Set[String]): Map[String, Map[Int, Seq[Int]]] as its parameters for the method getPartitionAssignmentForTopics. However, the compile group: 'org.apache.kafka', name: 'kafka_2.12', version: '2.4.1' uses def getPartitionAssignmentForTopics(topics: Set[String]): Map[String, Map[Int, ReplicaAssignment]] Thus, apache kafka 2.4.1 is not backward compatible with internal kmf MP and linkedin kafka inside kmf MP. -> there is a discrepancy in method definition of linkedin kafka versus apache kafka. Solution: With the older version of apache kafka 2.3.1, there is no discrepancy between the two products. I have checked that there is no breaking changes when reverting back to apache kafka version 2.3.1. I have added a todo item (andrewchoi5) to uncomment the related method when Xinfra Monitor is upgraded to 'org.apache.kafka' 'kafka_2.12' version '2.4.1' later, at which point we expect the linkedin kafka to have the parameters for getPartitionAssignmentForTopics updated. /** * Gets partition the assignments for the given topics. * @param topics the topics whose partitions we wish to get the assignments for. * @return the partition assignment for each partition from the given topics. */ def getPartitionAssignmentForTopics(topics: Set[String]): Map[String, Map[Int, Seq[Int]]] = { val getDataRequests = topics.map(topic => GetDataRequest(TopicZNode.path(topic), ctx = Some(topic))) val getDataResponses = retryRequestsUntilConnected(getDataRequests.toSeq) getDataResponses.flatMap { getDataResponse => val topic = getDataResponse.ctx.get.asInstanceOf[String] if (getDataResponse.resultCode == Code.OK) { val partitionMap = TopicZNode.decode(topic, getDataResponse.data).map { case (k, v) => (k.partition, v) } Map(topic -> partitionMap) } else if (getDataResponse.resultCode == Code.NONODE) { Map.empty[String, Map[Int, Seq[Int]]] } else { throw getDataResponse.resultException.get } }.toMap } Signed-off-by: Andrew Choi --- build.gradle | 2 +- .../MultiClusterTopicManagementService.java | 43 ++++++++++--------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/build.gradle b/build.gradle index ec106864..d1410b36 100644 --- a/build.gradle +++ b/build.gradle @@ -38,7 +38,7 @@ allprojects { compile 'net.savantly:graphite-client:1.1.0-RELEASE' compile 'com.timgroup:java-statsd-client:3.0.1' compile 'com.signalfx.public:signalfx-codahale:0.0.47' - compile group: 'org.apache.kafka', name: 'kafka_2.12', version: '2.4.1' + compile group: 'org.apache.kafka', name: 'kafka_2.12', version: '2.3.1' compile group: 'org.apache.kafka', name: 'kafka-clients', version: '2.4.1' testCompile 'org.mockito:mockito-core:2.24.0' testCompile 'org.testng:testng:6.8.8' diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java index 9b7ad805..81349ec3 100644 --- a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java +++ b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java @@ -32,7 +32,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import kafka.admin.AdminUtils; import kafka.admin.BrokerMetadata; -import kafka.controller.ReplicaAssignment; import kafka.server.ConfigType; import kafka.zk.KafkaZkClient; import org.apache.kafka.clients.admin.AdminClient; @@ -508,9 +507,9 @@ private static void reassignPartitions(KafkaZkClient zkClient, Collection } scala.collection.immutable.Set topicList = new scala.collection.immutable.Set.Set1<>(topic); - scala.collection.Map - currentAssignment = zkClient.getPartitionAssignmentForTopics(topicList).apply(topic); - String currentAssignmentJson = formatAsOldAssignmentJson(topic, currentAssignment); + scala.collection.Map> currentAssignment = + zkClient.getPartitionAssignmentForTopics(topicList).apply(topic); + String currentAssignmentJson = formatAsNewReassignmentJson(topic, currentAssignment); String newAssignmentJson = formatAsNewReassignmentJson(topic, assignedReplicas); LOGGER.info("Reassign partitions for topic " + topic); @@ -568,23 +567,25 @@ static boolean someBrokerNotElectedLeader(List partitionInfo * {"topic":"kmf-topic","partition":0,"replicas":[2,0]}]} * */ - private static String formatAsOldAssignmentJson(String topic, scala.collection.Map partitionsToBeReassigned) { - StringBuilder bldr = new StringBuilder(); - bldr.append("{\"version\":1,\"partitions\":[\n"); - for (int partition = 0; partition < partitionsToBeReassigned.size(); partition++) { - bldr.append(" {\"topic\":\"").append(topic).append("\",\"partition\":").append(partition).append(",\"replicas\":["); - ReplicaAssignment replicas = partitionsToBeReassigned.apply(partition); - for (int replicaIndex = 0; replicaIndex < replicas.replicas().size(); replicaIndex++) { - Object replica = replicas.replicas().apply(replicaIndex); - bldr.append(replica).append(","); - } - bldr.setLength(bldr.length() - 1); - bldr.append("]},\n"); - } - bldr.setLength(bldr.length() - 2); - bldr.append("]}"); - return bldr.toString(); - } + + // TODO (andrewchoi5): uncomment this method when Xinfra Monitor is upgraded to 'org.apache.kafka' 'kafka_2.12' version '2.4.1' +// private static String formatAsOldAssignmentJson(String topic, scala.collection.Map partitionsToBeReassigned) { +// StringBuilder bldr = new StringBuilder(); +// bldr.append("{\"version\":1,\"partitions\":[\n"); +// for (int partition = 0; partition < partitionsToBeReassigned.size(); partition++) { +// bldr.append(" {\"topic\":\"").append(topic).append("\",\"partition\":").append(partition).append(",\"replicas\":["); +// ReplicaAssignment replicas = partitionsToBeReassigned.apply(partition); +// for (int replicaIndex = 0; replicaIndex < replicas.replicas().size(); replicaIndex++) { +// Object replica = replicas.replicas().apply(replicaIndex); +// bldr.append(replica).append(","); +// } +// bldr.setLength(bldr.length() - 1); +// bldr.append("]},\n"); +// } +// bldr.setLength(bldr.length() - 2); +// bldr.append("]}"); +// return bldr.toString(); +// } /** * @param topic Kafka topic From 80a6ba0ff6eab4706a3c2deb4ecbf076d1d1c543 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Sun, 31 May 2020 11:48:26 -0700 Subject: [PATCH 14/89] Update Apache kafka kafka-clients version 2.3.1 #256 Our internal kmf mp replies not on apache kafka, but linkedin/kafka which still to this date replies on the old copy - 2.3.*. It makes more sense to use java kafka-clients 2.3.1 rather than 2.4.1 to better monitor our kafka clusters. compile group: 'org.apache.kafka', name: 'kafka-clients', version: '2.3.1' Signed-off-by: Andrew Choi --- build.gradle | 2 +- .../com/linkedin/kmf/services/ConsumeService.java | 5 ++--- .../services/MultiClusterTopicManagementService.java | 12 +++--------- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/build.gradle b/build.gradle index d1410b36..091bbf0f 100644 --- a/build.gradle +++ b/build.gradle @@ -39,7 +39,7 @@ allprojects { compile 'com.timgroup:java-statsd-client:3.0.1' compile 'com.signalfx.public:signalfx-codahale:0.0.47' compile group: 'org.apache.kafka', name: 'kafka_2.12', version: '2.3.1' - compile group: 'org.apache.kafka', name: 'kafka-clients', version: '2.4.1' + compile group: 'org.apache.kafka', name: 'kafka-clients', version: '2.3.1' testCompile 'org.mockito:mockito-core:2.24.0' testCompile 'org.testng:testng:6.8.8' } diff --git a/src/main/java/com/linkedin/kmf/services/ConsumeService.java b/src/main/java/com/linkedin/kmf/services/ConsumeService.java index 1a6535d8..307dffab 100644 --- a/src/main/java/com/linkedin/kmf/services/ConsumeService.java +++ b/src/main/java/com/linkedin/kmf/services/ConsumeService.java @@ -37,7 +37,7 @@ import org.apache.kafka.common.metrics.Metrics; import org.apache.kafka.common.metrics.MetricsReporter; import org.apache.kafka.common.metrics.Sensor; -import org.apache.kafka.common.metrics.stats.CumulativeSum; +import org.apache.kafka.common.metrics.stats.Total; import org.apache.kafka.common.utils.SystemTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -241,8 +241,7 @@ public synchronized void start() { @SuppressWarnings("ConstantConditions") double partitionCount = topicDescription.partitions().size(); topicPartitionCount.add( - new MetricName("topic-partitions-count", METRIC_GROUP_NAME, "The total number of partitions for the topic.", tags), - new CumulativeSum(partitionCount)); + new MetricName("topic-partitions-count", METRIC_GROUP_NAME, "The total number of partitions for the topic.", tags), new Total(partitionCount)); } } diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java index 81349ec3..d2eb667d 100644 --- a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java +++ b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java @@ -37,12 +37,10 @@ import org.apache.kafka.clients.admin.AdminClient; import org.apache.kafka.clients.admin.AdminClientConfig; import org.apache.kafka.clients.admin.CreatePartitionsResult; -import org.apache.kafka.clients.admin.ElectLeadersOptions; -import org.apache.kafka.clients.admin.ElectLeadersResult; +import org.apache.kafka.clients.admin.ElectPreferredLeadersResult; import org.apache.kafka.clients.admin.NewPartitions; import org.apache.kafka.clients.admin.NewTopic; import org.apache.kafka.clients.admin.TopicDescription; -import org.apache.kafka.common.ElectionType; import org.apache.kafka.common.KafkaException; import org.apache.kafka.common.KafkaFuture; import org.apache.kafka.common.Node; @@ -481,13 +479,9 @@ private void triggerPreferredLeaderElection(List partitionIn for (TopicPartitionInfo javaPartitionInfo : partitionInfoList) { partitions.add(new TopicPartition(partitionTopic, javaPartitionInfo.partition())); } + ElectPreferredLeadersResult electPreferredLeadersResult = _adminClient.electPreferredLeaders(partitions); - ElectLeadersOptions newOptions = new ElectLeadersOptions(); - ElectionType electionType = ElectionType.PREFERRED; - Set topicPartitions = new HashSet<>(partitions); - ElectLeadersResult electLeadersResult = _adminClient.electLeaders(electionType, topicPartitions, newOptions); - - LOGGER.info("{}: triggerPreferredLeaderElection - {}", this.getClass().toString(), electLeadersResult.all().get()); + LOGGER.info("{}: triggerPreferredLeaderElection - {}", this.getClass().toString(), electPreferredLeadersResult.all().get()); } private static void reassignPartitions(KafkaZkClient zkClient, Collection brokers, String topic, int partitionCount, int replicationFactor) { From ac73d65c271df8ba2bc6847776e156ad7a595c93 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Mon, 1 Jun 2020 12:37:21 -0700 Subject: [PATCH 15/89] Xinfra Monitor Rebranding Operation from Kafka Monitor Signed-off-by: Andrew Choi Xinfra Monitor Rebranding Operation from Kafka Monitor Signed-off-by: Andrew Choi --- bin/kafka-monitor-start.sh | 2 +- bin/windows/kafka-monitor-start.bat | 2 +- bin/windows/kmf-run-class.bat | 2 +- .../{KafkaMonitor.java => XinfraMonitor.java} | 14 +++++----- .../com/linkedin/kmf/KafkaMonitorTest.java | 28 +++++++++---------- 5 files changed, 24 insertions(+), 24 deletions(-) rename src/main/java/com/linkedin/kmf/{KafkaMonitor.java => XinfraMonitor.java} (94%) diff --git a/bin/kafka-monitor-start.sh b/bin/kafka-monitor-start.sh index eb4863dc..0c389bfa 100755 --- a/bin/kafka-monitor-start.sh +++ b/bin/kafka-monitor-start.sh @@ -9,4 +9,4 @@ base_dir=$(dirname $0) -exec $base_dir/kmf-run-class.sh com/linkedin/kmf/KafkaMonitor $@ +exec $base_dir/kmf-run-class.sh com/linkedin/kmf/XinfraMonitor $@ diff --git a/bin/windows/kafka-monitor-start.bat b/bin/windows/kafka-monitor-start.bat index abba5640..d8928ee2 100644 --- a/bin/windows/kafka-monitor-start.bat +++ b/bin/windows/kafka-monitor-start.bat @@ -19,7 +19,7 @@ IF [%1] EQU [] ( EXIT /B 1 ) -set COMMAND=%BASE_DIR%\kmf-run-class.bat com.linkedin.kmf.KafkaMonitor %* +set COMMAND=%BASE_DIR%\kmf-run-class.bat com.linkedin.kmf.XinfraMonitor %* rem echo basedir: %BASE_DIR% diff --git a/bin/windows/kmf-run-class.bat b/bin/windows/kmf-run-class.bat index 65fb1cf9..619f70d3 100644 --- a/bin/windows/kmf-run-class.bat +++ b/bin/windows/kmf-run-class.bat @@ -10,7 +10,7 @@ REM an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expre setlocal enabledelayedexpansion IF [%1] EQU [] ( - echo USAGE: %0 com.linkedin.kmf.KafkaMonitor config/kafka-monitor.properties + echo USAGE: %0 com.linkedin.kmf.XinfraMonitor config/kafka-monitor.properties EXIT /B 1 ) diff --git a/src/main/java/com/linkedin/kmf/KafkaMonitor.java b/src/main/java/com/linkedin/kmf/XinfraMonitor.java similarity index 94% rename from src/main/java/com/linkedin/kmf/KafkaMonitor.java rename to src/main/java/com/linkedin/kmf/XinfraMonitor.java index db7c745d..626bf105 100644 --- a/src/main/java/com/linkedin/kmf/KafkaMonitor.java +++ b/src/main/java/com/linkedin/kmf/XinfraMonitor.java @@ -39,8 +39,8 @@ * This is the main entry point of the monitor. It reads the configuration and manages the life cycle of the monitoring * applications. */ -public class KafkaMonitor { - private static final Logger LOG = LoggerFactory.getLogger(KafkaMonitor.class); +public class XinfraMonitor { + private static final Logger LOG = LoggerFactory.getLogger(XinfraMonitor.class); public static final String CLASS_NAME_CONFIG = "class.name"; private static final String METRIC_GROUP_NAME = "kafka-monitor"; private static final String JMX_PREFIX = "kmf"; @@ -63,7 +63,7 @@ public class KafkaMonitor { * @throws Exception */ @SuppressWarnings({"rawtypes", "unchecked"}) - public KafkaMonitor(Map allClusterProps) throws Exception { + public XinfraMonitor(Map allClusterProps) throws Exception { _apps = new ConcurrentHashMap<>(); _services = new ConcurrentHashMap<>(); @@ -179,7 +179,7 @@ public void awaitShutdown() { @SuppressWarnings("rawtypes") public static void main(String[] args) throws Exception { if (args.length <= 0) { - LOG.info("USAGE: java [options] " + KafkaMonitor.class.getName() + " config/kafka-monitor.properties"); + LOG.info("USAGE: java [options] " + XinfraMonitor.class.getName() + " config/kafka-monitor.properties"); return; } @@ -194,11 +194,11 @@ public static void main(String[] args) throws Exception { @SuppressWarnings("unchecked") Map props = new ObjectMapper().readValue(buffer.toString(), Map.class); - KafkaMonitor kafkaMonitor = new KafkaMonitor(props); - kafkaMonitor.start(); + XinfraMonitor xinfraMonitor = new XinfraMonitor(props); + xinfraMonitor.start(); LOG.info("Xinfra Monitor (KafkaMonitor) started."); - kafkaMonitor.awaitShutdown(); + xinfraMonitor.awaitShutdown(); } } diff --git a/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java b/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java index e8d16934..fa6e65c0 100644 --- a/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java +++ b/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java @@ -24,41 +24,41 @@ public class KafkaMonitorTest { @Test public void lifecycleTest() throws Exception { - KafkaMonitor kafkaMonitor = kafkaMonitor(); + XinfraMonitor xinfraMonitor = kafkaMonitor(); /* Nothing should be started */ org.testng.Assert.assertEquals(FakeService.startCount.get(), 0); org.testng.Assert.assertEquals(FakeService.stopCount.get(), 0); /* Should accept but ignore start because start has not been called */ - kafkaMonitor.stop(); + xinfraMonitor.stop(); org.testng.Assert.assertEquals(FakeService.stopCount.get(), 0); /* Should start */ - kafkaMonitor.start(); + xinfraMonitor.start(); org.testng.Assert.assertEquals(FakeService.startCount.get(), 1); /* Should allow start to be called more than once */ - kafkaMonitor.stop(); - kafkaMonitor.stop(); + xinfraMonitor.stop(); + xinfraMonitor.stop(); org.testng.Assert.assertEquals(FakeService.startCount.get(), 1); org.testng.Assert.assertEquals(FakeService.stopCount.get(), 1); /* Should be allowed to shutdown more than once. */ - kafkaMonitor.awaitShutdown(); - kafkaMonitor.awaitShutdown(); + xinfraMonitor.awaitShutdown(); + xinfraMonitor.awaitShutdown(); } @Test public void awaitShutdownOtherThread() throws Exception { - final KafkaMonitor kafkaMonitor = kafkaMonitor(); + final XinfraMonitor xinfraMonitor = kafkaMonitor(); final AtomicReference error = new AtomicReference<>(); Thread t = new Thread("test awaitshutdown thread") { @Override public void run() { try { - kafkaMonitor.awaitShutdown(); + xinfraMonitor.awaitShutdown(); } catch (Throwable t) { error.set(t); } @@ -66,21 +66,21 @@ public void run() { }; t.start(); - kafkaMonitor.start(); + xinfraMonitor.start(); Thread.sleep(100); - kafkaMonitor.stop(); + xinfraMonitor.stop(); t.join(500); org.testng.Assert.assertFalse(t.isAlive()); org.testng.Assert.assertEquals(error.get(), null); } - private KafkaMonitor kafkaMonitor() throws Exception { + private XinfraMonitor kafkaMonitor() throws Exception { FakeService.clearCounters(); Map config = new HashMap<>(); Map fakeServiceConfig = new HashMap<>(); - fakeServiceConfig.put(KafkaMonitor.CLASS_NAME_CONFIG, FakeService.class.getName()); + fakeServiceConfig.put(XinfraMonitor.CLASS_NAME_CONFIG, FakeService.class.getName()); config.put("fake-service", fakeServiceConfig); - return new KafkaMonitor(config); + return new XinfraMonitor(config); } From 8c1ea3a7be64ac5b5dbaf4aa10a1d6848c7afe7f Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Mon, 1 Jun 2020 17:18:07 -0700 Subject: [PATCH 16/89] Ensure KafkaMetricsReporterSerivce carries the correct constructor parameters. Ensure KafkaMetricsReporterSerivce carries the correct constructor parameters. Abstract out the helper methods with constructorContainsClass The way Service Instantiations are done within Kafka Monitor will be updated with https://github.com/linkedin/kafka-monitor/pull/257. testing: produce.treat.zero.throughput.as.unavailable = true topic = kafka-monitor-topic1 zookeeper.connect = localhost:2181 (com.linkedin.kmf.services.configs.ProduceServiceConfig) [2020-05-29 14:34:57,457] INFO produce-service/ProduceService is initialized. (com.linkedin.kmf.services.ProduceService) [2020-05-29 14:34:57,468] INFO KafkaMetricsReporterServiceConfig values: bootstrap.servers = localhost:9092 report.interval.sec = 3 report.kafka.topic.replication.factor = 1 report.metrics.list = [kmf.services:type=produce-service,name=*:produce-availability-avg, kmf.services:type=consume-service,name=*:consume-availability-avg, kmf.services:type=produce-service,name=*:records-produced-total, kmf.services:type=consume-service,name=*:records-consumed-total, kmf.services:type=consume-service,name=*:records-lost-total, kmf.services:type=consume-service,name=*:records-duplicated-total, kmf.services:type=consume-service,name=*:records-delay-ms-avg, kmf.services:type=produce-service,name=*:records-produced-rate, kmf.services:type=produce-service,name=*:produce-error-rate, kmf.services:type=consume-service,name=*:consume-error-rate] topic = kafka-monitor-topic1 zookeeper.connect = localhost:2181 (com.linkedin.kmf.services.configs.KafkaMetricsReporterServiceConfig) [2020-05-29 14:34:57,694] INFO CreateTopicsResult: {kafka-monitor-topic1=KafkaFuture{value=null,exception=null,done=true}}. (com.linkedin.kmf.common.Utils) [2020-05-29 14:34:57,694] INFO Created monitoring topic kafka-monitor-topic1 in cluster with 1 partitions and replication factor of 1. (com.linkedin.kmf.common.Utils) [2020-05-29 14:34:57,694] INFO Completed the topic creation if it doesn't exist for kafka-monitor-topic1. (com.linkedin.kmf.common.Utils) [2020-05-29 14:34:57,706] INFO produce-service/ProduceService started (com.linkedin.kmf.services.ProduceService) [2020-05-29 14:34:57,707] INFO reporter-kafka-service/KafkaMetricsReporterService has started. (com.linkedin.kmf.services.KafkaMetricsReporterService) [2020-05-29 14:34:57,707] INFO Xinfra Monitor (KafkaMonitor) started. (com.linkedin.kmf.KafkaMonitor) [2020-05-29 14:35:00,722] INFO Kafka Metrics Reporter sending metrics = { "kmf.services:name=produce-service,type=produce-service:records-produced-rate" : "0.4514446227929374", "kmf.services:name=produce-service,type=produce-service:records-produced-total" : "27.0", "kmf.services:name=produce-service,type=produce-service:produce-availability-avg" : "1.0", "kmf.services:name=produce-service,type=produce-service:produce-error-rate" : "0.0" } (com.linkedin.kmf.services.KafkaMetricsReporterService) [2020-05-29 14:35:03,708] INFO Kafka Metrics Reporter sending metrics = { "kmf.services:name=produce-service,type=produce-service:records-produced-rate" : "0.9363765571440515", "kmf.services:name=produce-service,type=produce-service:records-produced-total" : "56.0", "kmf.services:name=produce-service,type=produce-service:produce-availability-avg" : "1.0", "kmf.services:name=produce-service,type=produce-service:produce-error-rate" : "0.0" } (com.linkedin.kmf.services.KafkaMetricsReporterService) [2020-05-29 14:35:06,708] INFO Kafka Metrics Reporter sending metrics = { "kmf.services:name=produce-service,type=produce-service:records-produced-rate" : "1.4045648357160772", "kmf.services:name=produce-service,type=produce-service:records-produced-total" : "84.0", "kmf.services:name=produce-service,type=produce-service:produce-availability-avg" : "1.0", "kmf.services:name=produce-service,type=produce-service:produce-error-rate" : "0.0" } (com.linkedin.kmf.services.KafkaMetricsReporterService) [2020-05-29 14:35:09,709] INFO Kafka Metrics Reporter sending metrics = { "kmf.services:name=produce-service,type=produce-service:records-produced-rate" : "1.8894425308497476", "kmf.services:name=produce-service,type=produce-service:records-produced-total" : "113.0", "kmf.services:name=produce-service,type=produce-service:produce-availability-avg" : "1.0", "kmf.services:name=produce-service,type=produce-service:produce-error-rate" : "0.0" } (com.linkedin.kmf.services.KafkaMetricsReporterService) [2020-05-29 14:35:12,712] INFO Kafka Metrics Reporter sending metrics = { "kmf.services:name=produce-service,type=produce-service:records-produced-rate" : "2.3742246150244948", "kmf.services:name=produce-service,type=produce-service:records-produced-total" : "142.0", "kmf.services:name=produce-service,type=produce-service:produce-availability-avg" : "1.0", "kmf.services:name=produce-service,type=produce-service:produce-error-rate" : "0.0" } (com.linkedin.kmf.services.KafkaMetricsReporterService) [2020-05-29 14:35:15,709] INFO Kafka Metrics Reporter sending metrics = { "kmf.services:name=produce-service,type=produce-service:records-produced-rate" : "2.842571691330156", "kmf.services:name=produce-service,type=produce-service:records-produced-total" : "170.0", "kmf.services:name=produce-service,type=produce-service:produce-availability-avg" : "1.0", "kmf.services:name=produce-service,type=produce-service:produce-error-rate" : "0.0" } (com.linkedin.kmf.services.KafkaMetricsReporterService) f[2020-05-29 14:35:18,712] INFO Kafka Metrics Reporter sending metrics = { "kmf.services:name=produce-service,type=produce-service:records-produced-rate" : "3.3273140716960943", "kmf.services:name=produce-service,type=produce-service:records-produced-total" : "199.0", "kmf.services:name=produce-service,type=produce-service:produce-availability-avg" : "1.0", "kmf.services:name=produce-service,type=produce-service:produce-error-rate" : "0.0" } (com.linkedin.kmf.services.KafkaMetricsReporterService) Signed-off-by: Andrew Choi --- .../java/com/linkedin/kmf/XinfraMonitor.java | 17 ++++++++++++---- .../services/KafkaMetricsReporterService.java | 20 ++++++++++--------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/linkedin/kmf/XinfraMonitor.java b/src/main/java/com/linkedin/kmf/XinfraMonitor.java index 626bf105..b4e08b1a 100644 --- a/src/main/java/com/linkedin/kmf/XinfraMonitor.java +++ b/src/main/java/com/linkedin/kmf/XinfraMonitor.java @@ -27,6 +27,7 @@ import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.kafka.clients.admin.AdminClient; import org.apache.kafka.common.metrics.JmxReporter; import org.apache.kafka.common.metrics.MetricConfig; import org.apache.kafka.common.metrics.Metrics; @@ -60,7 +61,7 @@ public class XinfraMonitor { * For example, if there are 10 clusters to be monitored, then this Constructor will create 10 * num_apps_per_cluster * and 10 * num_services_per_cluster. * @param allClusterProps the properties of ALL kafka clusters for which apps and services need to be appended. - * @throws Exception + * @throws Exception when exception occurs while assigning Apps and Services */ @SuppressWarnings({"rawtypes", "unchecked"}) public XinfraMonitor(Map allClusterProps) throws Exception { @@ -80,7 +81,8 @@ public XinfraMonitor(Map allClusterProps) throws Exception { _apps.put(name, clusterApp); } else if (Service.class.isAssignableFrom(aClass)) { Constructor[] constructors = Class.forName(className).getConstructors(); - if (this.constructorContainsFuture(constructors)) { + if (this.constructorContainsClass(constructors, CompletableFuture.class)) { + // for ConsumeService public constructor CompletableFuture completableFuture = new CompletableFuture<>(); completableFuture.complete(null); ConsumerFactoryImpl consumerFactory = new ConsumerFactoryImpl(props); @@ -88,6 +90,13 @@ public XinfraMonitor(Map allClusterProps) throws Exception { .getConstructor(String.class, CompletableFuture.class, ConsumerFactory.class) .newInstance(name, completableFuture, consumerFactory); _services.put(name, service); + } else if (this.constructorContainsClass(constructors, AdminClient.class)) { + // for KafkaMetricsReporterService constructor + AdminClient adminClient = AdminClient.create(props); + Service service = (Service) Class.forName(className) + .getConstructor(Map.class, String.class, AdminClient.class) + .newInstance(props, name, adminClient); + _services.put(name, service); } else { Service service = (Service) Class.forName(className).getConstructor(Map.class, String.class).newInstance(props, name); _services.put(name, service); @@ -105,9 +114,9 @@ public XinfraMonitor(Map allClusterProps) throws Exception { (config, now) -> _offlineRunnables.size()); } - private boolean constructorContainsFuture(Constructor[] constructors) { + private boolean constructorContainsClass(Constructor[] constructors, Class classObject) { for (int n = 0; n < constructors[0].getParameterTypes().length; ++n) { - if (constructors[0].getParameterTypes()[n].equals(CompletableFuture.class)) { + if (constructors[0].getParameterTypes()[n].equals(classObject)) { return true; } } diff --git a/src/main/java/com/linkedin/kmf/services/KafkaMetricsReporterService.java b/src/main/java/com/linkedin/kmf/services/KafkaMetricsReporterService.java index 1e613fee..9bd8ac16 100644 --- a/src/main/java/com/linkedin/kmf/services/KafkaMetricsReporterService.java +++ b/src/main/java/com/linkedin/kmf/services/KafkaMetricsReporterService.java @@ -28,8 +28,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; + public class KafkaMetricsReporterService implements Service { - private static final Logger LOG = LoggerFactory.getLogger(KafkaMetricsReporterService.class); + private static final Logger LOGGER = LoggerFactory.getLogger(KafkaMetricsReporterService.class); private static final String METRICS_PRODUCER_ID = "kafka-metrics-reporter-id"; private final String _name; private final List _metricsNames; @@ -49,9 +50,10 @@ public KafkaMetricsReporterService(Map props, String name, Admin _brokerList = config.getString(KafkaMetricsReporterServiceConfig.BOOTSTRAP_SERVERS_CONFIG); initializeProducer(); _topic = config.getString(KafkaMetricsReporterServiceConfig.TOPIC_CONFIG); + Integer rf = config.getInt(KafkaMetricsReporterServiceConfig.TOPIC_REPLICATION_FACTOR); Utils.createTopicIfNotExists( _topic, - config.getShort(KafkaMetricsReporterServiceConfig.TOPIC_REPLICATION_FACTOR), + rf.shortValue(), 0, // parameter is set to 0 here since no matter the number of nodes, the topic partition number should be set to zero. 1, // fixed partition count 1 new Properties(), @@ -65,17 +67,17 @@ public synchronized void start() { try { reportMetrics(); } catch (Exception e) { - LOG.error(_name + "/KafkaMetricsReporterService failed to report metrics", e); + LOGGER.error(_name + "/KafkaMetricsReporterService failed to report metrics.", e); } }, _reportIntervalSec, _reportIntervalSec, TimeUnit.SECONDS); - LOG.info("{}/KafkaMetricsReporterService started", _name); + LOGGER.info("{}/KafkaMetricsReporterService has started.", _name); } @Override public synchronized void stop() { _executor.shutdown(); _producer.close(); - LOG.info("{}/KafkaMetricsReporterService stopped", _name); + LOGGER.info("{}/KafkaMetricsReporterService stopped.", _name); } @Override @@ -88,9 +90,9 @@ public void awaitShutdown() { try { _executor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { - LOG.info("Thread interrupted when waiting for {}/KafkaMetricsReporterService to shutdown", _name); + LOGGER.info("Thread interrupted when waiting for {}/KafkaMetricsReporterService to shutdown", _name); } - LOG.info("{}/KafkaMetricsReporterService shutdown completed", _name); + LOGGER.info("{}/KafkaMetricsReporterService shutdown completed", _name); } @@ -122,10 +124,10 @@ private void reportMetrics() { } } try { - LOG.debug("Kafka Metrics Reporter sending metrics = " + _parser.writerWithDefaultPrettyPrinter().writeValueAsString(metrics)); + LOGGER.info("Kafka Metrics Reporter sending metrics = " + _parser.writerWithDefaultPrettyPrinter().writeValueAsString(metrics)); _producer.send(new ProducerRecord<>(_topic, _parser.writeValueAsString(metrics))); } catch (JsonProcessingException e) { - LOG.warn("unsupported json format: " + metrics, e); + LOGGER.warn("unsupported json format: " + metrics, e); } } } From 2fc22fd4d9bda61bda8a3196cc3d94e60855e091 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Mon, 1 Jun 2020 18:03:49 -0700 Subject: [PATCH 17/89] Fallback to regular constructor parameters if the constructor doesn't include AdminClient. #253 Fallback to regular constructor parameters if the constructor doesn't include AdminClient. #253 --- .../kmf/services/ConsumerFactoryImpl.java | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/linkedin/kmf/services/ConsumerFactoryImpl.java b/src/main/java/com/linkedin/kmf/services/ConsumerFactoryImpl.java index f08e2472..e3427d4e 100644 --- a/src/main/java/com/linkedin/kmf/services/ConsumerFactoryImpl.java +++ b/src/main/java/com/linkedin/kmf/services/ConsumerFactoryImpl.java @@ -81,10 +81,28 @@ public ConsumerFactoryImpl(Map props) throws Exception { props.forEach(consumerProps::putIfAbsent); } - _baseConsumer = (KMBaseConsumer) Class.forName(consumerClassName) - .getConstructor(String.class, Properties.class, AdminClient.class) - .newInstance(_topic, consumerProps, adminClient()); + java.lang.reflect.Constructor constructor = adminClientConstructorIfExists(consumerClassName); + if (constructor != null) { + _baseConsumer = (KMBaseConsumer) constructor + .newInstance(_topic, consumerProps, adminClient()); + } else { + _baseConsumer = (KMBaseConsumer) Class.forName(consumerClassName) + .getConstructor(String.class, Properties.class) + .newInstance(_topic, consumerProps); + } + } + private static java.lang.reflect.Constructor adminClientConstructorIfExists(String consumerClassName) + throws ClassNotFoundException { + try { + return Class.forName(consumerClassName).getConstructor(String.class, Properties.class, AdminClient.class); + } catch (java.lang.NoSuchMethodException noSuchMethodException) { + LOG.info(consumerClassName + + " does not provide a constructor with signature (Ljava/lang/String;Ljava/util/Properties;Lorg/apache/kafka/clients/admin/AdminClient;)V - falling back to (Ljava/util/Properties;)V"); + return null; + } catch (ClassNotFoundException e) { + throw new ClassNotFoundException("The class was not found: ", e); + } } @Override From 10045c5002ff3fd20bffad7a89b2f94ab118bf13 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Mon, 1 Jun 2020 23:05:44 -0700 Subject: [PATCH 18/89] Service Factory + Individual Services' Factory Implementations (#257) Service Factory + Individual Services' Factory Implementations Because of the early design, architectural decisions of Xinfra Monitor, the KafkaMonitor class enforces individual Xinfra Monitor Services to carry the same set of constructor parameters. This is a large caveat as not all Service classes don't need the identical constructor arguments. This is not enforced the codebase. It is partially and only silently enforced by convention. Unmaintainable code + non-robust code. Non-evolvable code: each Service's constructor implementations aren't allowed to be evolved independently. 3. Change(s) in one Service's constructor absolutely forces the other Services' constructors to also change its parameters, unfortunately. Signed off by : Andrew Choi --- .../java/com/linkedin/kmf/XinfraMonitor.java | 48 +++++---------- .../linkedin/kmf/XinfraMonitorConstants.java | 30 ++++++++++ .../kmf/services/ConsumeServiceFactory.java | 39 +++++++++++++ .../DefaultMetricsReporterServiceFactory.java | 35 +++++++++++ ...GraphiteMetricsReporterServiceFactory.java | 36 ++++++++++++ .../kmf/services/JolokiaServiceFactory.java | 36 ++++++++++++ .../KafkaMetricsReporterServiceFactory.java | 41 +++++++++++++ ...iClusterTopicManagementServiceFactory.java | 36 ++++++++++++ .../kmf/services/ProduceServiceFactory.java | 34 +++++++++++ .../linkedin/kmf/services/ServiceFactory.java | 17 ++++++ ...SignalFxMetricsReporterServiceFactory.java | 37 ++++++++++++ .../StatsdMetricsReporterServiceFactory.java | 37 ++++++++++++ .../TopicManagementServiceFactory.java | 37 ++++++++++++ .../com/linkedin/kmf/KafkaMonitorTest.java | 58 ++++++++++++++----- 14 files changed, 472 insertions(+), 49 deletions(-) create mode 100644 src/main/java/com/linkedin/kmf/XinfraMonitorConstants.java create mode 100644 src/main/java/com/linkedin/kmf/services/ConsumeServiceFactory.java create mode 100644 src/main/java/com/linkedin/kmf/services/DefaultMetricsReporterServiceFactory.java create mode 100644 src/main/java/com/linkedin/kmf/services/GraphiteMetricsReporterServiceFactory.java create mode 100644 src/main/java/com/linkedin/kmf/services/JolokiaServiceFactory.java create mode 100644 src/main/java/com/linkedin/kmf/services/KafkaMetricsReporterServiceFactory.java create mode 100644 src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceFactory.java create mode 100644 src/main/java/com/linkedin/kmf/services/ProduceServiceFactory.java create mode 100644 src/main/java/com/linkedin/kmf/services/ServiceFactory.java create mode 100644 src/main/java/com/linkedin/kmf/services/SignalFxMetricsReporterServiceFactory.java create mode 100644 src/main/java/com/linkedin/kmf/services/StatsdMetricsReporterServiceFactory.java create mode 100644 src/main/java/com/linkedin/kmf/services/TopicManagementServiceFactory.java diff --git a/src/main/java/com/linkedin/kmf/XinfraMonitor.java b/src/main/java/com/linkedin/kmf/XinfraMonitor.java index b4e08b1a..e80246ca 100644 --- a/src/main/java/com/linkedin/kmf/XinfraMonitor.java +++ b/src/main/java/com/linkedin/kmf/XinfraMonitor.java @@ -7,27 +7,25 @@ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ + package com.linkedin.kmf; import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.kmf.apps.App; -import com.linkedin.kmf.services.ConsumerFactory; -import com.linkedin.kmf.services.ConsumerFactoryImpl; import com.linkedin.kmf.services.Service; +import com.linkedin.kmf.services.ServiceFactory; import java.io.BufferedReader; import java.io.FileReader; import java.lang.reflect.Constructor; import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.kafka.clients.admin.AdminClient; import org.apache.kafka.common.metrics.JmxReporter; import org.apache.kafka.common.metrics.MetricConfig; import org.apache.kafka.common.metrics.Metrics; @@ -42,9 +40,6 @@ */ public class XinfraMonitor { private static final Logger LOG = LoggerFactory.getLogger(XinfraMonitor.class); - public static final String CLASS_NAME_CONFIG = "class.name"; - private static final String METRIC_GROUP_NAME = "kafka-monitor"; - private static final String JMX_PREFIX = "kmf"; /** This is concurrent because healthCheck() can modify this map, but awaitShutdown() can be called at any time by * a different thread. @@ -63,7 +58,8 @@ public class XinfraMonitor { * @param allClusterProps the properties of ALL kafka clusters for which apps and services need to be appended. * @throws Exception when exception occurs while assigning Apps and Services */ - @SuppressWarnings({"rawtypes", "unchecked"}) + + @SuppressWarnings({"rawtypes"}) public XinfraMonitor(Map allClusterProps) throws Exception { _apps = new ConcurrentHashMap<>(); _services = new ConcurrentHashMap<>(); @@ -71,36 +67,20 @@ public XinfraMonitor(Map allClusterProps) throws Exception { for (Map.Entry clusterProperty : allClusterProps.entrySet()) { String name = clusterProperty.getKey(); Map props = clusterProperty.getValue(); - if (!props.containsKey(CLASS_NAME_CONFIG)) - throw new IllegalArgumentException(name + " is not configured with " + CLASS_NAME_CONFIG); - String className = (String) props.get(CLASS_NAME_CONFIG); + if (!props.containsKey(XinfraMonitorConstants.CLASS_NAME_CONFIG)) + throw new IllegalArgumentException(name + " is not configured with " + XinfraMonitorConstants.CLASS_NAME_CONFIG); + String className = (String) props.get(XinfraMonitorConstants.CLASS_NAME_CONFIG); Class aClass = Class.forName(className); if (App.class.isAssignableFrom(aClass)) { App clusterApp = (App) Class.forName(className).getConstructor(Map.class, String.class).newInstance(props, name); _apps.put(name, clusterApp); } else if (Service.class.isAssignableFrom(aClass)) { - Constructor[] constructors = Class.forName(className).getConstructors(); - if (this.constructorContainsClass(constructors, CompletableFuture.class)) { - // for ConsumeService public constructor - CompletableFuture completableFuture = new CompletableFuture<>(); - completableFuture.complete(null); - ConsumerFactoryImpl consumerFactory = new ConsumerFactoryImpl(props); - Service service = (Service) Class.forName(className) - .getConstructor(String.class, CompletableFuture.class, ConsumerFactory.class) - .newInstance(name, completableFuture, consumerFactory); - _services.put(name, service); - } else if (this.constructorContainsClass(constructors, AdminClient.class)) { - // for KafkaMetricsReporterService constructor - AdminClient adminClient = AdminClient.create(props); - Service service = (Service) Class.forName(className) - .getConstructor(Map.class, String.class, AdminClient.class) - .newInstance(props, name, adminClient); - _services.put(name, service); - } else { - Service service = (Service) Class.forName(className).getConstructor(Map.class, String.class).newInstance(props, name); - _services.put(name, service); - } + ServiceFactory serviceFactory = (ServiceFactory) Class.forName(className + XinfraMonitorConstants.FACTORY) + .getConstructor(Map.class, String.class) + .newInstance(props, name); + Service service = serviceFactory.createService(); + _services.put(name, service); } else { throw new IllegalArgumentException(className + " should implement either " + App.class.getSimpleName() + " or " + Service.class.getSimpleName()); } @@ -108,9 +88,9 @@ public XinfraMonitor(Map allClusterProps) throws Exception { _executor = Executors.newSingleThreadScheduledExecutor(); _offlineRunnables = new ConcurrentHashMap<>(); List reporters = new ArrayList<>(); - reporters.add(new JmxReporter(JMX_PREFIX)); + reporters.add(new JmxReporter(XinfraMonitorConstants.JMX_PREFIX)); Metrics metrics = new Metrics(new MetricConfig(), reporters, new SystemTime()); - metrics.addMetric(metrics.metricName("offline-runnable-count", METRIC_GROUP_NAME, "The number of Service/App that are not fully running"), + metrics.addMetric(metrics.metricName("offline-runnable-count", XinfraMonitorConstants.METRIC_GROUP_NAME, "The number of Service/App that are not fully running"), (config, now) -> _offlineRunnables.size()); } diff --git a/src/main/java/com/linkedin/kmf/XinfraMonitorConstants.java b/src/main/java/com/linkedin/kmf/XinfraMonitorConstants.java new file mode 100644 index 00000000..3d78b08f --- /dev/null +++ b/src/main/java/com/linkedin/kmf/XinfraMonitorConstants.java @@ -0,0 +1,30 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf; + +/** + * Constant variables in Xinfra Monitor repo. + */ +public class XinfraMonitorConstants { + + public XinfraMonitorConstants() { + } + + static final String FACTORY = "Factory"; + + static final String CLASS_NAME_CONFIG = "class.name"; + + static final String METRIC_GROUP_NAME = "kafka-monitor"; + + static final String JMX_PREFIX = "kmf"; + + +} diff --git a/src/main/java/com/linkedin/kmf/services/ConsumeServiceFactory.java b/src/main/java/com/linkedin/kmf/services/ConsumeServiceFactory.java new file mode 100644 index 00000000..fcf3af16 --- /dev/null +++ b/src/main/java/com/linkedin/kmf/services/ConsumeServiceFactory.java @@ -0,0 +1,39 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.services; + +import java.util.Map; +import java.util.concurrent.CompletableFuture; + + +/** + * Factory that constructs the ConsumeService. + */ +@SuppressWarnings({"rawtypes", "unchecked"}) +public class ConsumeServiceFactory implements ServiceFactory { + private final Map _props; + private final String _name; + + public ConsumeServiceFactory(Map props, String name) { + _props = props; + _name = name; + } + + @Override + public Service createService() throws Exception { + + CompletableFuture topicPartitionResult = new CompletableFuture<>(); + topicPartitionResult.complete(null); + ConsumerFactoryImpl consumerFactory = new ConsumerFactoryImpl(_props); + + return new ConsumeService(_name, topicPartitionResult, consumerFactory); + } +} diff --git a/src/main/java/com/linkedin/kmf/services/DefaultMetricsReporterServiceFactory.java b/src/main/java/com/linkedin/kmf/services/DefaultMetricsReporterServiceFactory.java new file mode 100644 index 00000000..29f89618 --- /dev/null +++ b/src/main/java/com/linkedin/kmf/services/DefaultMetricsReporterServiceFactory.java @@ -0,0 +1,35 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.services; + +import java.util.Map; + + +/** + * Factory class which instantiates a DefaultMetricsReporterService. + */ +@SuppressWarnings("rawtypes") +public class DefaultMetricsReporterServiceFactory implements ServiceFactory { + private final Map _properties; + private final String _serviceName; + + public DefaultMetricsReporterServiceFactory(Map properties, String serviceName) { + + _properties = properties; + _serviceName = serviceName; + } + + @SuppressWarnings("unchecked") + @Override + public Service createService() throws Exception { + return new DefaultMetricsReporterService(_properties, _serviceName); + } +} diff --git a/src/main/java/com/linkedin/kmf/services/GraphiteMetricsReporterServiceFactory.java b/src/main/java/com/linkedin/kmf/services/GraphiteMetricsReporterServiceFactory.java new file mode 100644 index 00000000..2a34e7cd --- /dev/null +++ b/src/main/java/com/linkedin/kmf/services/GraphiteMetricsReporterServiceFactory.java @@ -0,0 +1,36 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.services; + +import java.util.Map; + + +/** + * Factory class which instantiates a GraphiteMetricsReporterServiceFactory service. + */ +@SuppressWarnings("rawtypes") +public class GraphiteMetricsReporterServiceFactory implements ServiceFactory { + + private final Map _properties; + private final String _serviceName; + + public GraphiteMetricsReporterServiceFactory(Map properties, String serviceName) { + + _properties = properties; + _serviceName = serviceName; + } + + @SuppressWarnings("unchecked") + @Override + public Service createService() throws Exception { + return new GraphiteMetricsReporterService(_properties, _serviceName); + } +} diff --git a/src/main/java/com/linkedin/kmf/services/JolokiaServiceFactory.java b/src/main/java/com/linkedin/kmf/services/JolokiaServiceFactory.java new file mode 100644 index 00000000..540cdae5 --- /dev/null +++ b/src/main/java/com/linkedin/kmf/services/JolokiaServiceFactory.java @@ -0,0 +1,36 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.services; + +import java.util.Map; + + +/** + * Factory class which instantiates a JolokiaService service. + */ +@SuppressWarnings("rawtypes") +public class JolokiaServiceFactory implements ServiceFactory { + + private final Map _properties; + private final String _serviceName; + + public JolokiaServiceFactory(Map properties, String serviceName) { + + _properties = properties; + _serviceName = serviceName; + } + + @SuppressWarnings("unchecked") + @Override + public Service createService() throws Exception { + return new JolokiaService(_properties, _serviceName); + } +} diff --git a/src/main/java/com/linkedin/kmf/services/KafkaMetricsReporterServiceFactory.java b/src/main/java/com/linkedin/kmf/services/KafkaMetricsReporterServiceFactory.java new file mode 100644 index 00000000..b72c2a4e --- /dev/null +++ b/src/main/java/com/linkedin/kmf/services/KafkaMetricsReporterServiceFactory.java @@ -0,0 +1,41 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.services; + +import java.util.Map; +import org.apache.kafka.clients.admin.AdminClient; + + +/** + * Factory class which instantiates a KafkaMetricsReporterService service object. + */ +@SuppressWarnings("rawtypes") +public class KafkaMetricsReporterServiceFactory implements ServiceFactory { + + private final Map _properties; + private final String _serviceName; + + public KafkaMetricsReporterServiceFactory(Map properties, String serviceName) { + + _properties = properties; + _serviceName = serviceName; + } + + @SuppressWarnings("unchecked") + @Override + public Service createService() throws Exception { + + AdminClient adminClient = AdminClient.create(_properties); + + return new KafkaMetricsReporterService(_properties, _serviceName, adminClient); + + } +} diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceFactory.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceFactory.java new file mode 100644 index 00000000..bf802a6d --- /dev/null +++ b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceFactory.java @@ -0,0 +1,36 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.services; + +import java.util.Map; + + +/** + * Factory which instantiates a MultiClusterTopicManagementService service object. + */ +@SuppressWarnings("rawtypes") +public class MultiClusterTopicManagementServiceFactory implements ServiceFactory { + + private final Map _properties; + private final String _serviceName; + + public MultiClusterTopicManagementServiceFactory(Map properties, String serviceName) { + + _properties = properties; + _serviceName = serviceName; + } + + @SuppressWarnings("unchecked") + @Override + public Service createService() throws Exception { + return new MultiClusterTopicManagementService(_properties, _serviceName); + } +} diff --git a/src/main/java/com/linkedin/kmf/services/ProduceServiceFactory.java b/src/main/java/com/linkedin/kmf/services/ProduceServiceFactory.java new file mode 100644 index 00000000..9102b700 --- /dev/null +++ b/src/main/java/com/linkedin/kmf/services/ProduceServiceFactory.java @@ -0,0 +1,34 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.services; + +import java.util.Map; + + +/** + * Factory that constructs the ProduceService + */ +@SuppressWarnings("rawtypes") +public class ProduceServiceFactory implements ServiceFactory { + private final Map _props; + private final String _name; + + public ProduceServiceFactory(Map props, String name) { + _props = props; + _name = name; + } + + @SuppressWarnings("unchecked") + @Override + public Service createService() throws Exception { + return new ProduceService(_props, _name); + } +} diff --git a/src/main/java/com/linkedin/kmf/services/ServiceFactory.java b/src/main/java/com/linkedin/kmf/services/ServiceFactory.java new file mode 100644 index 00000000..0f5c9411 --- /dev/null +++ b/src/main/java/com/linkedin/kmf/services/ServiceFactory.java @@ -0,0 +1,17 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.services; + +public interface ServiceFactory { + + Service createService() throws Exception; + +} diff --git a/src/main/java/com/linkedin/kmf/services/SignalFxMetricsReporterServiceFactory.java b/src/main/java/com/linkedin/kmf/services/SignalFxMetricsReporterServiceFactory.java new file mode 100644 index 00000000..b62d7d7c --- /dev/null +++ b/src/main/java/com/linkedin/kmf/services/SignalFxMetricsReporterServiceFactory.java @@ -0,0 +1,37 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.services; + +import java.util.Map; + + +/** + * Factory class which instantiates a SignalFxMetricsReporterServiceFactory service. + */ +@SuppressWarnings("rawtypes") +public class SignalFxMetricsReporterServiceFactory implements ServiceFactory { + + private final Map _properties; + private final String _serviceName; + + public SignalFxMetricsReporterServiceFactory(Map properties, String serviceName) { + + _properties = properties; + _serviceName = serviceName; + } + + @SuppressWarnings("unchecked") + @Override + public Service createService() throws Exception { + return new SignalFxMetricsReporterService(_properties, _serviceName); + } +} + diff --git a/src/main/java/com/linkedin/kmf/services/StatsdMetricsReporterServiceFactory.java b/src/main/java/com/linkedin/kmf/services/StatsdMetricsReporterServiceFactory.java new file mode 100644 index 00000000..857c00e6 --- /dev/null +++ b/src/main/java/com/linkedin/kmf/services/StatsdMetricsReporterServiceFactory.java @@ -0,0 +1,37 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.services; + +import java.util.Map; + + +/** + * Factory class that constructs the StatsdMetricsReporterService. + */ +@SuppressWarnings("rawtypes") +public class StatsdMetricsReporterServiceFactory implements ServiceFactory { + private final Map _properties; + private final String _name; + + public StatsdMetricsReporterServiceFactory(Map properties, String name) { + + _properties = properties; + _name = name; + } + + @Override + public Service createService() throws Exception { + + //noinspection unchecked + return new StatsdMetricsReporterService(_properties, _name); + + } +} diff --git a/src/main/java/com/linkedin/kmf/services/TopicManagementServiceFactory.java b/src/main/java/com/linkedin/kmf/services/TopicManagementServiceFactory.java new file mode 100644 index 00000000..2dfbdf54 --- /dev/null +++ b/src/main/java/com/linkedin/kmf/services/TopicManagementServiceFactory.java @@ -0,0 +1,37 @@ +/** + * Copyright 2020 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this + * file except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +package com.linkedin.kmf.services; + +import java.util.Map; + + +/** + * Factory class which constructs the TopicManagementService. + */ +@SuppressWarnings("rawtypes") +public class TopicManagementServiceFactory implements ServiceFactory { + private final Map _properties; + private final String _serviceName; + + public TopicManagementServiceFactory(Map properties, String serviceName) { + + _properties = properties; + _serviceName = serviceName; + } + + @SuppressWarnings("unchecked") + @Override + public Service createService() throws Exception { + + return new TopicManagementService(_properties, _serviceName); + + } +} diff --git a/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java b/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java index fa6e65c0..b794aaed 100644 --- a/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java +++ b/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java @@ -11,6 +11,7 @@ package com.linkedin.kmf; import com.linkedin.kmf.services.Service; +import com.linkedin.kmf.services.ServiceFactory; import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; @@ -27,22 +28,24 @@ public void lifecycleTest() throws Exception { XinfraMonitor xinfraMonitor = kafkaMonitor(); /* Nothing should be started */ - org.testng.Assert.assertEquals(FakeService.startCount.get(), 0); - org.testng.Assert.assertEquals(FakeService.stopCount.get(), 0); + org.testng.Assert.assertEquals(FakeService.START_COUNT.get(), 0); + org.testng.Assert.assertEquals(FakeService.STOP_COUNT.get(), 0); /* Should accept but ignore start because start has not been called */ + xinfraMonitor.stop(); - org.testng.Assert.assertEquals(FakeService.stopCount.get(), 0); + org.testng.Assert.assertEquals(FakeService.STOP_COUNT.get(), 0); /* Should start */ xinfraMonitor.start(); - org.testng.Assert.assertEquals(FakeService.startCount.get(), 1); + org.testng.Assert.assertEquals(FakeService.START_COUNT.get(), 1); /* Should allow start to be called more than once */ xinfraMonitor.stop(); xinfraMonitor.stop(); - org.testng.Assert.assertEquals(FakeService.startCount.get(), 1); - org.testng.Assert.assertEquals(FakeService.stopCount.get(), 1); + org.testng.Assert.assertEquals(FakeService.START_COUNT.get(), 1); + org.testng.Assert.assertEquals(FakeService.STOP_COUNT.get(), 1); + /* Should be allowed to shutdown more than once. */ xinfraMonitor.awaitShutdown(); @@ -78,16 +81,41 @@ private XinfraMonitor kafkaMonitor() throws Exception { FakeService.clearCounters(); Map config = new HashMap<>(); Map fakeServiceConfig = new HashMap<>(); - fakeServiceConfig.put(XinfraMonitor.CLASS_NAME_CONFIG, FakeService.class.getName()); + + fakeServiceConfig.put(XinfraMonitorConstants.CLASS_NAME_CONFIG, FakeService.class.getName()); config.put("fake-service", fakeServiceConfig); return new XinfraMonitor(config); + } + /** + * Factory class which instantiates a new FakeService service object. + */ + @SuppressWarnings("rawtypes") + static final class FakeServiceFactory implements ServiceFactory { + + private final Map _config; + private final String _serviceInstanceName; + + public FakeServiceFactory(Map config, String serviceInstanceName) { + + this._config = config; + this._serviceInstanceName = serviceInstanceName; + } + + @SuppressWarnings("unchecked") + @Override + public Service createService() throws Exception { + + return new KafkaMonitorTest.FakeService(_config, _serviceInstanceName); + + } + } static final class FakeService implements Service { - private static AtomicInteger startCount = new AtomicInteger(); - private static AtomicInteger stopCount = new AtomicInteger(); + private static final AtomicInteger START_COUNT = new AtomicInteger(); + private static final AtomicInteger STOP_COUNT = new AtomicInteger(); private final AtomicBoolean _isRunning = new AtomicBoolean(); /** required */ @@ -96,20 +124,20 @@ public FakeService(Map config, String serviceInstanceName) { } private static void clearCounters() { - startCount.set(0); - stopCount.set(0); + START_COUNT.set(0); + STOP_COUNT.set(0); } @Override public void start() { _isRunning.compareAndSet(false, true); - startCount.incrementAndGet(); + START_COUNT.incrementAndGet(); } @Override public synchronized void stop() { _isRunning.compareAndSet(true, false); - stopCount.incrementAndGet(); + STOP_COUNT.incrementAndGet(); notifyAll(); } @@ -121,9 +149,9 @@ public boolean isRunning() { @Override public synchronized void awaitShutdown() { try { - if (stopCount.get() == 0) { + if (STOP_COUNT.get() == 0) { wait(3_000); - if (stopCount.get() == 0) { + if (STOP_COUNT.get() == 0) { throw new IllegalStateException("Never notified."); } } From d9634a9441afc0abe2ed4116a732f77ce63ef7c6 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Tue, 2 Jun 2020 20:58:14 -0700 Subject: [PATCH 19/89] xinfra rebrading (#259) Signed-off-by: Andrew Choi --- README.md | 2 +- config/kafka-monitor.properties | 2 +- src/main/java/com/linkedin/kmf/XinfraMonitor.java | 4 ++-- src/main/java/com/linkedin/kmf/common/Utils.java | 2 +- .../{KafkaMonitorTest.java => XinfraMonitorTest.java} | 10 +++++----- 5 files changed, 10 insertions(+), 10 deletions(-) rename src/test/java/com/linkedin/kmf/{KafkaMonitorTest.java => XinfraMonitorTest.java} (94%) diff --git a/README.md b/README.md index 95b1ef63..07bbfbe7 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ $ cd kafka-monitor $ ./gradlew jar ``` -### Start KafkaMonitor to run tests/services specified in the config file +### Start XinfraMonitor to run tests/services specified in the config file ``` $ ./bin/kafka-monitor-start.sh config/kafka-monitor.properties ``` diff --git a/config/kafka-monitor.properties b/config/kafka-monitor.properties index 79f8b816..7553664a 100644 --- a/config/kafka-monitor.properties +++ b/config/kafka-monitor.properties @@ -6,7 +6,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# This properties file specifies the tests/services that KafkaMonitor +# This properties file specifies the tests/services that XinfraMonitor # should instantiate and run, together with the key/value pairs used to # configure these tests/services. It should have the following format: # diff --git a/src/main/java/com/linkedin/kmf/XinfraMonitor.java b/src/main/java/com/linkedin/kmf/XinfraMonitor.java index e80246ca..8a3cad14 100644 --- a/src/main/java/com/linkedin/kmf/XinfraMonitor.java +++ b/src/main/java/com/linkedin/kmf/XinfraMonitor.java @@ -52,7 +52,7 @@ public class XinfraMonitor { private final AtomicBoolean _isRunning = new AtomicBoolean(false); /** - * KafkaMonitor constructor creates apps and services for each of the individual clusters (properties) that's passed in. + * XinfraMonitor constructor creates apps and services for each of the individual clusters (properties) that's passed in. * For example, if there are 10 clusters to be monitored, then this Constructor will create 10 * num_apps_per_cluster * and 10 * num_services_per_cluster. * @param allClusterProps the properties of ALL kafka clusters for which apps and services need to be appended. @@ -185,7 +185,7 @@ public static void main(String[] args) throws Exception { Map props = new ObjectMapper().readValue(buffer.toString(), Map.class); XinfraMonitor xinfraMonitor = new XinfraMonitor(props); xinfraMonitor.start(); - LOG.info("Xinfra Monitor (KafkaMonitor) started."); + LOG.info("Xinfra Monitor has started."); xinfraMonitor.awaitShutdown(); } diff --git a/src/main/java/com/linkedin/kmf/common/Utils.java b/src/main/java/com/linkedin/kmf/common/Utils.java index 8aa0287f..4afaa966 100644 --- a/src/main/java/com/linkedin/kmf/common/Utils.java +++ b/src/main/java/com/linkedin/kmf/common/Utils.java @@ -130,7 +130,7 @@ private static int getBrokerCount(AdminClient adminClient) throws ExecutionExcep /** * @param timestamp time in Ms when this message is generated * @param topic topic this message is sent to - * @param idx index is consecutive numbers used by KafkaMonitor to determine duplicate or lost messages + * @param idx index is consecutive numbers used by XinfraMonitor to determine duplicate or lost messages * @param msgSize size of the message * @return string that encodes the above fields */ diff --git a/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java b/src/test/java/com/linkedin/kmf/XinfraMonitorTest.java similarity index 94% rename from src/test/java/com/linkedin/kmf/KafkaMonitorTest.java rename to src/test/java/com/linkedin/kmf/XinfraMonitorTest.java index b794aaed..1844738b 100644 --- a/src/test/java/com/linkedin/kmf/KafkaMonitorTest.java +++ b/src/test/java/com/linkedin/kmf/XinfraMonitorTest.java @@ -21,11 +21,11 @@ @Test -public class KafkaMonitorTest { +public class XinfraMonitorTest { @Test public void lifecycleTest() throws Exception { - XinfraMonitor xinfraMonitor = kafkaMonitor(); + XinfraMonitor xinfraMonitor = xinfraMonitor(); /* Nothing should be started */ org.testng.Assert.assertEquals(FakeService.START_COUNT.get(), 0); @@ -54,7 +54,7 @@ public void lifecycleTest() throws Exception { @Test public void awaitShutdownOtherThread() throws Exception { - final XinfraMonitor xinfraMonitor = kafkaMonitor(); + final XinfraMonitor xinfraMonitor = xinfraMonitor(); final AtomicReference error = new AtomicReference<>(); Thread t = new Thread("test awaitshutdown thread") { @@ -77,7 +77,7 @@ public void run() { org.testng.Assert.assertEquals(error.get(), null); } - private XinfraMonitor kafkaMonitor() throws Exception { + private XinfraMonitor xinfraMonitor() throws Exception { FakeService.clearCounters(); Map config = new HashMap<>(); Map fakeServiceConfig = new HashMap<>(); @@ -107,7 +107,7 @@ public FakeServiceFactory(Map config, String serviceInstanceName) { @Override public Service createService() throws Exception { - return new KafkaMonitorTest.FakeService(_config, _serviceInstanceName); + return new XinfraMonitorTest.FakeService(_config, _serviceInstanceName); } } From 9b15dc0033c217d4150992ab3eb6548543b4afb4 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Wed, 3 Jun 2020 13:23:15 -0700 Subject: [PATCH 20/89] Xinfra Monitor Rebranding Migration Continuation Part III Signed-off-by: Andrew Choi Xinfra Monitor Rebranding Migration Continuation Part III Signed-off-by: Andrew Choi --- .travis.yml | 2 +- README.md | 14 +++++++------- bin/windows/kafka-monitor-start.bat | 2 +- bin/windows/kmf-run-class.bat | 4 ++-- ...ka-monitor-start.sh => xinfra-monitor-start.sh} | 0 ...onitor.properties => xinfra-monitor.properties} | 8 ++++---- docker/Dockerfile | 2 +- docker/kafka-monitor-docker-entry.sh | 2 +- src/main/java/com/linkedin/kmf/XinfraMonitor.java | 2 +- .../StatsdMetricsReporterServiceConfig.java | 2 +- 10 files changed, 19 insertions(+), 19 deletions(-) rename bin/{kafka-monitor-start.sh => xinfra-monitor-start.sh} (100%) rename config/{kafka-monitor.properties => xinfra-monitor.properties} (97%) diff --git a/.travis.yml b/.travis.yml index bb6c28bb..d866cfeb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,7 @@ jdk: oraclejdk8 env: global: - # these secure values are encrypted for linkedin/kafka-monitor. for use in forked repos, set env vars in your own build - they should win over these (as these wont be decodable in forked builds) + # these secure values are encrypted for linkedin/xinfra-monitor. for use in forked repos, set env vars in your own build - they should win over these (as these wont be decodable in forked builds) - secure: ghCdYRfSGI7BtA0rB0UfzEJWszKwvnSy1Y0LBSWn3wdo3e0ibHdofwfeQ5YAczXun+RpGvaUwJRgkSDOsL6DFF8N2hODNkzR+DFNRJv5CFOYuCPJZwPpw/Sr7blCkpZleWmJBW7sqRPa7Yyu5/QpdezRM4tHP/RCxjr9IKgQwOknYyVGUf81Wa3Y+MF5iqR434WItfFpHR7uU5sis592iR4aKS4bcEQ9DCqcrCsrr3RFAft6aM5NZjAhO7tXe3vu5IrtqbB/MFNLrnRO0T6kXcifVqVrHQ/ePzl2iyPIdKygiDgK9VOYg6TfT5maHLwoCK511qnhMlitsy/0qx8CsF33rS0hXS5+EDSJ3qoB38Rqgi2K08uGjWgwwXtUg+2CkyEDA2gpEDIELQSFvG0WEHmAim9Azfxiiyx+HwNJ0zkKeOPUXU3uLDeOGXsUB6vxqjrWvAGtUqCZ5g2qEI8geGH3Zopmg9j665FywawV0OAehj4I2Tn0rBDORbIsu3XWgcZIbq/D8sb4Xhb65CB4wSvcco5g25zQzXruDk6MjB76bthtnIbO9hvIVwADxx2MdWXOX873ApjRoffGtRev+ugfEZCEuWxBvtgJ2owCSdTOm8TIjAwjsc4eBuLqEWQplwnoGhRA12zch4t/itOGv0ABoT9/tVjhiLwtTzmHTFc= - secure: I88NyZbbsV4j5a14k1ModOLsQygs7cXjCY98QQcQNRV0FKvvk/ka7xEvwiOFPWFOFy7TUf9O2VjNA8M4oWGqop9MwYNysAccuX6y/VMAldOPlQ23PG7C+Tr44u2YgvbJCdjtmpVKh842yrU8nlHcfLKBGMaqRxjBg4kHNr0lyTGIU83UBsU26yFiUBuE0EwdWXcrlX+ZvfS523BezquWnFW+75R4z2e0i++/vGqKnlBsqXzzxR61aFnYuSN0CDj+XbG1hsc1dcNJbjLVfO8zOSd25oDYVEX+dfBEfD+4LL/3YSjM8EJvUZGghiaDR+K9nm26B+nfvWNOWH8PuqM94t65vueQ4qcUIUXqwhbIf5GixHGTOIj7QOjckPHZ4N4hYkpYMLwPRO+I5VuBEKlboCN90c3+OEEsBSUXSH/JIBI8mmiwkegPV89HtSRntFGFqYtL7Wg/GyJV7DCQSsacJDaM9ErX2z1sUd++QAQA18qdk1Ngl2hnBOWmTgmtefMo9u15iCxDZr+iMeeTA0IvsupyXeO6hN9xFuwRuXD/q40RIYcOgqFeYR9e/pRJ1A29PE4N6uqDe/JBA8oQFH0jCFbO2oEpVJlX9qgp29wWLSvm/93LRK3f8RLt8ZT1BVNEUnRqtkprbDVp2QlN/7fOz9MI9a00SAXv+siaz3oK++k= diff --git a/README.md b/README.md index 07bbfbe7..ee7a5975 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,8 @@ Xinfra Monitor supports Apache Kafka 0.8 to 2.0:
  1. We advise advanced users to run Xinfra Monitor with -./bin/kafka-monitor-start.sh config/kafka-monitor.properties. The default -kafka-monitor.properties in the repo provides an simple example of how to +./bin/kafka-monitor-start.sh config/xinfra-monitor.properties. The default +xinfra-monitor.properties in the repo provides an simple example of how to monitor a single cluster. You probably need to change the value of zookeeper.connect and bootstrap.servers to point to your cluster.
  2. @@ -55,7 +55,7 @@ monitor a single cluster. You probably need to change the value of Config class for respective service, e.g. ProduceServiceConfig.java and ConsumeServiceConfig.java.
    -
  3. You can specify multiple SingleClusterMonitor in the kafka-monitor.properties to +
  4. You can specify multiple SingleClusterMonitor in the xinfra-monitor.properties to monitor multiple Kafka clusters in one Xinfra Monitor process. As another advanced use-case, you can point ProduceService and ConsumeService to two different Kafka clusters that are connected by MirrorMaker to monitor their end-to-end latency.

  5. @@ -92,16 +92,16 @@ $ ./gradlew jar ### Start XinfraMonitor to run tests/services specified in the config file ``` -$ ./bin/kafka-monitor-start.sh config/kafka-monitor.properties +$ ./bin/kafka-monitor-start.sh config/xinfra-monitor.properties ``` ### Run Xinfra Monitor with arbitrary producer/consumer configuration (e.g. SASL enabled client) -Edit `config/kafka-monitor.properties` to specify custom configurations for producer in the key/value map `produce.producer.props` in -`config/kafka-monitor.properties`. Similarly specify configurations for +Edit `config/xinfra-monitor.properties` to specify custom configurations for producer in the key/value map `produce.producer.props` in +`config/xinfra-monitor.properties`. Similarly specify configurations for consumer as well. The documentation for producer and consumer in the key/value maps can be found in the Apache Kafka wiki. ``` -$ ./bin/kafka-monitor-start.sh config/kafka-monitor.properties +$ ./bin/kafka-monitor-start.sh config/xinfra-monitor.properties ``` ### Run SingleClusterMonitor app to monitor kafka cluster diff --git a/bin/windows/kafka-monitor-start.bat b/bin/windows/kafka-monitor-start.bat index d8928ee2..e4531f91 100644 --- a/bin/windows/kafka-monitor-start.bat +++ b/bin/windows/kafka-monitor-start.bat @@ -15,7 +15,7 @@ popd IF [%1] EQU [] ( - echo USAGE: %0 config/kafka-monitor.properties + echo USAGE: %0 config/xinfra-monitor.properties EXIT /B 1 ) diff --git a/bin/windows/kmf-run-class.bat b/bin/windows/kmf-run-class.bat index 619f70d3..ca79a7b4 100644 --- a/bin/windows/kmf-run-class.bat +++ b/bin/windows/kmf-run-class.bat @@ -10,12 +10,12 @@ REM an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expre setlocal enabledelayedexpansion IF [%1] EQU [] ( - echo USAGE: %0 com.linkedin.kmf.XinfraMonitor config/kafka-monitor.properties + echo USAGE: %0 com.linkedin.kmf.XinfraMonitor config/xinfra-monitor.properties EXIT /B 1 ) IF [%2] EQU [] ( - echo USAGE: %0 %1 config/kafka-monitor.properties + echo USAGE: %0 %1 config/xinfra-monitor.properties EXIT /B 1 ) diff --git a/bin/kafka-monitor-start.sh b/bin/xinfra-monitor-start.sh similarity index 100% rename from bin/kafka-monitor-start.sh rename to bin/xinfra-monitor-start.sh diff --git a/config/kafka-monitor.properties b/config/xinfra-monitor.properties similarity index 97% rename from config/kafka-monitor.properties rename to config/xinfra-monitor.properties index 7553664a..d36c1923 100644 --- a/config/kafka-monitor.properties +++ b/config/xinfra-monitor.properties @@ -45,7 +45,7 @@ { "single-cluster-monitor": { "class.name": "com.linkedin.kmf.apps.SingleClusterMonitor", - "topic": "kafka-monitor-topic", + "topic": "xinfra-monitor-topic", "zookeeper.connect": "localhost:2181", "bootstrap.servers": "localhost:9092", "request.timeout.ms": 9000, @@ -104,7 +104,7 @@ # Example produce-service to produce messages to cluster # "produce-service": { # "class.name": "com.linkedin.kmf.services.ProduceService", -# "topic": "kafka-monitor-topic", +# "topic": "xinfra-monitor-topic", # "zookeeper.connect": "localhost:2181", # "bootstrap.servers": "localhost:9092", # "consume.latency.sla.ms": "20000", @@ -115,7 +115,7 @@ # Example consume-service to consume messages # "consume-service": { # "class.name": "com.linkedin.kmf.services.ConsumeService", -# "topic": "kafka-monitor-topic", +# "topic": "xinfra-monitor-topic", # "zookeeper.connect": "localhost:2181", # "bootstrap.servers": "localhost:9092", # "consume.latency.sla.ms": "20000", @@ -143,7 +143,7 @@ # "report.interval.sec": 3, # "zookeeper.connect": "localhost:2181", # "bootstrap.servers": "localhost:9092", -# "topic": "kafka-monitor-topic-metrics", +# "topic": "xinfra-monitor-topic-metrics", # "report.kafka.topic.replication.factor": 1, # "report.metrics.list": [ # "kmf.services:type=produce-service,name=*:produce-availability-avg", diff --git a/docker/Dockerfile b/docker/Dockerfile index 8c6cbbd7..dcd49e6a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -19,7 +19,7 @@ WORKDIR /opt/kafka-monitor ADD build/ build/ ADD bin/kafka-monitor-start.sh bin/kafka-monitor-start.sh ADD bin/kmf-run-class.sh bin/kmf-run-class.sh -ADD config/kafka-monitor.properties config/kafka-monitor.properties +ADD config/xinfra-monitor.properties config/xinfra-monitor.properties ADD config/log4j2.properties config/log4j2.properties ADD docker/kafka-monitor-docker-entry.sh kafka-monitor-docker-entry.sh ADD webapp/ webapp/ diff --git a/docker/kafka-monitor-docker-entry.sh b/docker/kafka-monitor-docker-entry.sh index 946b6e66..97554bb0 100755 --- a/docker/kafka-monitor-docker-entry.sh +++ b/docker/kafka-monitor-docker-entry.sh @@ -22,6 +22,6 @@ trap 'pkill java; exit 143' SIGTERM # wait for DNS services to be available sleep 10 -bin/kafka-monitor-start.sh config/kafka-monitor.properties & +bin/xinfra-monitor-start.sh config/xinfra-monitor.properties & wait $! \ No newline at end of file diff --git a/src/main/java/com/linkedin/kmf/XinfraMonitor.java b/src/main/java/com/linkedin/kmf/XinfraMonitor.java index 8a3cad14..df5e0eb9 100644 --- a/src/main/java/com/linkedin/kmf/XinfraMonitor.java +++ b/src/main/java/com/linkedin/kmf/XinfraMonitor.java @@ -168,7 +168,7 @@ public void awaitShutdown() { @SuppressWarnings("rawtypes") public static void main(String[] args) throws Exception { if (args.length <= 0) { - LOG.info("USAGE: java [options] " + XinfraMonitor.class.getName() + " config/kafka-monitor.properties"); + LOG.info("USAGE: java [options] " + XinfraMonitor.class.getName() + " config/xinfra-monitor.properties"); return; } diff --git a/src/main/java/com/linkedin/kmf/services/configs/StatsdMetricsReporterServiceConfig.java b/src/main/java/com/linkedin/kmf/services/configs/StatsdMetricsReporterServiceConfig.java index 0a431ced..146f7603 100644 --- a/src/main/java/com/linkedin/kmf/services/configs/StatsdMetricsReporterServiceConfig.java +++ b/src/main/java/com/linkedin/kmf/services/configs/StatsdMetricsReporterServiceConfig.java @@ -8,7 +8,7 @@ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * - * In order to enable the StatsD metrics export, add the following section to kafka-monitor.properties file + * In order to enable the StatsD metrics export, add the following section to xinfra-monitor.properties file * */ From 0a385dc9cd3f094755b4f0b177d0b91ad76c3cac Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Wed, 3 Jun 2020 17:07:04 -0700 Subject: [PATCH 21/89] update readme (#262) Signed-off-by: Andrew Choi --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ee7a5975..c8e34852 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Xinfra Monitor supports Apache Kafka 0.8 to 2.0:
    1. We advise advanced users to run Xinfra Monitor with -./bin/kafka-monitor-start.sh config/xinfra-monitor.properties. The default +./bin/xinfra-monitor-start.sh config/xinfra-monitor.properties. The default xinfra-monitor.properties in the repo provides an simple example of how to monitor a single cluster. You probably need to change the value of zookeeper.connect and bootstrap.servers to point to your cluster. @@ -92,7 +92,7 @@ $ ./gradlew jar ### Start XinfraMonitor to run tests/services specified in the config file ``` -$ ./bin/kafka-monitor-start.sh config/xinfra-monitor.properties +$ ./bin/xinfra-monitor-start.sh config/xinfra-monitor.properties ``` ### Run Xinfra Monitor with arbitrary producer/consumer configuration (e.g. SASL enabled client) @@ -101,7 +101,7 @@ Edit `config/xinfra-monitor.properties` to specify custom configurations for pro consumer as well. The documentation for producer and consumer in the key/value maps can be found in the Apache Kafka wiki. ``` -$ ./bin/kafka-monitor-start.sh config/xinfra-monitor.properties +$ ./bin/xinfra-monitor-start.sh config/xinfra-monitor.properties ``` ### Run SingleClusterMonitor app to monitor kafka cluster @@ -124,7 +124,7 @@ from the destination cluster. See config/multi-cluster-monitor.properties for the full jmx path for these metrics. ``` -$ ./bin/kafka-monitor-start.sh config/multi-cluster-monitor.properties +$ ./bin/xinfra-monitor-start.sh config/multi-cluster-monitor.properties ``` ### Get metric values (e.g. service availability, message loss rate) in real-time as time series graphs From b103f3cb32a2f5fa3fbff949572b3badd21168e6 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Thu, 4 Jun 2020 12:31:13 -0700 Subject: [PATCH 22/89] 1 - Remove deprecations in produce service for recordsProduced and produceError. The behaviour will remain unchanged and is purely to get rid of library deprecations. 2 - Render fields local instead of instance variables inside src/main/java/com/linkedin/kmf/services/SignalFxMetricsReporterService.java. These variables inside the class don't need to be instance variables because they aren't used elsewhere. The code behaviour will remain the same. 3 - Suppression of Warnings for some IntelliJ level warnings. Makes no logic changes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1 - Remove deprecations in produce service for recordsProduced and produceError. The behaviour will remain unchanged and is purely to get rid of library deprecations. 2 - Render fields local instead of instance variables inside src/main/java/com/linkedin/kmf/services/SignalFxMetricsReporterService.java. These variables inside the class don't need to be instance variables because they aren't used elsewhere. The code behaviour will remain the same. 3 - Suppression of Warnings for some IntelliJ level warnings. Makes no logic changes. https://docs.oracle.com/javase/8/docs/api/java/lang/SuppressWarnings.html when you use IntelliJ IDEA to code - it will occasionally bring up warning signs, conventionally through some highlighting of words and texts. at build time these suppressions get rid of these annoying warnings. warnings will show up like this if we don’t have suppressions: image In compile time + run time, these suppressions make no difference. suppressions only exist to get rid of annoying warning signs in Intellij (or some other IDEs). Testing: local deploy Signed-off-by: Andrew Choi --- .../linkedin/kmf/services/ProduceService.java | 14 ++++++++++---- .../SignalFxMetricsReporterService.java | 19 ++++++++----------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/main/java/com/linkedin/kmf/services/ProduceService.java b/src/main/java/com/linkedin/kmf/services/ProduceService.java index 23d71326..4883af7e 100644 --- a/src/main/java/com/linkedin/kmf/services/ProduceService.java +++ b/src/main/java/com/linkedin/kmf/services/ProduceService.java @@ -54,6 +54,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +@SuppressWarnings("rawtypes") public class ProduceService implements Service { private static final Logger LOG = LoggerFactory.getLogger(ProduceService.class); private static final String METRIC_GROUP_NAME = "produce-service"; @@ -64,7 +65,7 @@ public class ProduceService implements Service { private final String _name; private final ProduceMetrics _sensors; private KMBaseProducer _producer; - private KMPartitioner _partitioner; + private final KMPartitioner _partitioner; private ScheduledExecutorService _produceExecutor; private final ScheduledExecutorService _handleNewPartitionsExecutor; private final int _produceDelayMs; @@ -276,8 +277,12 @@ public ProduceMetrics(final Metrics metrics, final Map tags) { double availabilitySum = 0.0; int partitionNum = _partitionNum.get(); for (int partition = 0; partition < partitionNum; partition++) { - double recordsProduced = metrics.metrics().get(metrics.metricName("records-produced-rate-partition-" + partition, METRIC_GROUP_NAME, tags)).value(); - double produceError = metrics.metrics().get(metrics.metricName("produce-error-rate-partition-" + partition, METRIC_GROUP_NAME, tags)).value(); + double recordsProduced = (double) metrics.metrics() + .get(metrics.metricName("records-produced-rate-partition-" + partition, METRIC_GROUP_NAME, tags)) + .metricValue(); + double produceError = (double) metrics.metrics() + .get(metrics.metricName("produce-error-rate-partition-" + partition, METRIC_GROUP_NAME, tags)) + .metricValue(); // If there is no error, error rate sensor may expire and the value may be NaN. Treat NaN as 0 for error rate. if (Double.isNaN(produceError) || Double.isInfinite(produceError)) { produceError = 0; @@ -402,6 +407,7 @@ public void run() { } } + @SuppressWarnings("NullableProblems") private class ProduceServiceThreadFactory implements ThreadFactory { private final AtomicInteger _threadId = new AtomicInteger(); @@ -411,7 +417,7 @@ public Thread newThread(Runnable r) { } private class HandleNewPartitionsThreadFactory implements ThreadFactory { - public Thread newThread(Runnable r) { + public Thread newThread(@SuppressWarnings("NullableProblems") Runnable r) { return new Thread(r, _name + "-produce-service-new-partition-handler"); } } diff --git a/src/main/java/com/linkedin/kmf/services/SignalFxMetricsReporterService.java b/src/main/java/com/linkedin/kmf/services/SignalFxMetricsReporterService.java index ee64f22a..8dcb3535 100644 --- a/src/main/java/com/linkedin/kmf/services/SignalFxMetricsReporterService.java +++ b/src/main/java/com/linkedin/kmf/services/SignalFxMetricsReporterService.java @@ -31,11 +31,9 @@ public class SignalFxMetricsReporterService implements Service { private final ScheduledExecutorService _executor; private final MetricRegistry _metricRegistry; private final SignalFxReporter _signalfxReporter; - private final String _signalfxUrl; - private final String _signalfxToken; - private MetricMetadata _metricMetadata; - private Map _metricMap; + private final MetricMetadata _metricMetadata; + private final Map _metricMap; private Map _dimensionsMap; public SignalFxMetricsReporterService(Map props, String name) throws Exception { @@ -44,10 +42,10 @@ public SignalFxMetricsReporterService(Map props, String name) th _name = name; _metricNames = config.getList(SignalFxMetricsReporterServiceConfig.REPORT_METRICS_CONFIG); _reportIntervalSec = config.getInt(SignalFxMetricsReporterServiceConfig.REPORT_INTERVAL_SEC_CONFIG); - _signalfxUrl = config.getString(SignalFxMetricsReporterServiceConfig.REPORT_SIGNALFX_URL); - _signalfxToken = config.getString(SignalFxMetricsReporterServiceConfig.SIGNALFX_TOKEN); + String signalfxUrl = config.getString(SignalFxMetricsReporterServiceConfig.REPORT_SIGNALFX_URL); + String signalfxToken = config.getString(SignalFxMetricsReporterServiceConfig.SIGNALFX_TOKEN); - if (StringUtils.isEmpty(_signalfxToken)) { + if (StringUtils.isEmpty(signalfxToken)) { throw new IllegalArgumentException("SignalFx token is not configured"); } @@ -60,11 +58,10 @@ public SignalFxMetricsReporterService(Map props, String name) th } SignalFxReporter.Builder sfxReportBuilder = new SignalFxReporter.Builder( - _metricRegistry, - _signalfxToken + _metricRegistry, signalfxToken ); - if (!StringUtils.isEmpty(_signalfxUrl)) { - sfxReportBuilder.setEndpoint(getSignalFxEndpoint(_signalfxUrl)); + if (!StringUtils.isEmpty(signalfxUrl)) { + sfxReportBuilder.setEndpoint(getSignalFxEndpoint(signalfxUrl)); } _signalfxReporter = sfxReportBuilder.build(); From 5a29f113084e8765c6d5b733c8a2ec35d7f53fb2 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Thu, 4 Jun 2020 14:17:22 -0700 Subject: [PATCH 23/89] Update Providing number of assignments by which the number of partitions was increased (#263) The partition count you are increasing the partitions by should equal the actual size of the new partition assignments. This PR achieves this. For instance: old partitions count = 5 new partitions count = 11 increase in partitions = 6 the size of the new partition assignments ought to be be 6 also. Note: Random assignment is adequate here because the periodic runnable for maybeReassignPartitions runs the reassignment operation on an interval. Signed-off-by: Andrew Choi --- .../MultiClusterTopicManagementService.java | 24 +++++++++++++++---- ...ultiClusterTopicManagementServiceTest.java | 23 ++++++++++-------- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java index d2eb667d..947cd9cc 100644 --- a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java +++ b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.Random; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; @@ -338,14 +339,13 @@ static List> newPartitionAssignments(int minPartitionNum, int part int partitionDifference = minPartitionNum - partitionNum; // leader assignments - - for (BrokerMetadata brokerMetadata : brokers) { + while (newPartitionAssignments.size() != partitionDifference) { List replicas = new ArrayList<>(); // leader replica/broker - - replicas.add(brokerMetadata.id()); + int brokerMetadata = randomBroker(brokers).id(); + replicas.add(brokerMetadata); + newPartitionAssignments.add(replicas); - if (newPartitionAssignments.size() == partitionDifference) { - break; - } } // follower assignments - @@ -364,6 +364,20 @@ static List> newPartitionAssignments(int minPartitionNum, int part return newPartitionAssignments; } + private static BrokerMetadata randomBroker(Set brokers) { + int brokerSetSize = brokers.size(); + // In practicality, the Random object should be rather more shared than this. + int random = new Random().nextInt(brokerSetSize); + int index = 0; + for (BrokerMetadata brokerMetadata : brokers) { + if (index == random) + return brokerMetadata; + index++; + } + + throw new IllegalStateException("Couldn't find random broker."); + } + /** * Exposed package-private access for testing. Get the total number of partitions for a Kafka topic. * @return total number of topic partitions diff --git a/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java b/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java index e97bb407..69c04118 100644 --- a/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java +++ b/src/test/java/com/linkedin/kmf/services/MultiClusterTopicManagementServiceTest.java @@ -60,9 +60,11 @@ private void startTest() { nodeSet.add(new Node(3, "host-3", 2134)); nodeSet.add(new Node(4, "host-4", 2135)); nodeSet.add(new Node(5, "host-5", 2136)); - nodeSet.add(new Node(6, "host-5", 2136)); - nodeSet.add(new Node(7, "host-5", 2136)); - nodeSet.add(new Node(8, "host-5", 2136)); + nodeSet.add(new Node(6, "host-5", 2137)); + nodeSet.add(new Node(7, "host-5", 2138)); + nodeSet.add(new Node(8, "host-5", 2139)); + nodeSet.add(new Node(9, "host-5", 2140)); + nodeSet.add(new Node(10, "host-5", 2141)); _topicManagementHelper = Mockito.mock(MultiClusterTopicManagementService.TopicManagementHelper.class); _topicManagementHelper._topic = SERVICE_TEST_TOPIC; @@ -82,17 +84,18 @@ protected void maybeAddPartitionsTest() { for (Node broker : nodeSet) { brokerMetadataSet.add(new BrokerMetadata(broker.id(), Option.apply(broker.rack()))); } + + int minPartitionNum = 14; + int partitionNum = 5; + int rf = 4; + List> newPartitionAssignments = - MultiClusterTopicManagementService.TopicManagementHelper.newPartitionAssignments(11, 5, brokerMetadataSet, 4); + MultiClusterTopicManagementService.TopicManagementHelper.newPartitionAssignments(minPartitionNum, partitionNum, brokerMetadataSet, rf); Assert.assertNotNull(newPartitionAssignments); System.out.println(newPartitionAssignments); - Assert.assertEquals(newPartitionAssignments.get(0).get(0).intValue(), 1); - Assert.assertEquals(newPartitionAssignments.get(1).get(0).intValue(), 2); - Assert.assertEquals(newPartitionAssignments.get(2).get(0).intValue(), 3); - Assert.assertEquals(newPartitionAssignments.get(3).get(0).intValue(), 4); - Assert.assertEquals(newPartitionAssignments.get(4).get(0).intValue(), 5); - Assert.assertEquals(newPartitionAssignments.get(5).get(0).intValue(), 6); + Assert.assertEquals(newPartitionAssignments.size(), minPartitionNum - partitionNum); + Assert.assertEquals(newPartitionAssignments.get(0).size(), rf); } @Test From 1a3666f636ce71bc4faf2a1123cca2da366a0804 Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Fri, 5 Jun 2020 12:19:55 -0700 Subject: [PATCH 24/89] Conversion of Set to List for BrokerMetadata Collection #264 Set is unordered and doesn't allow for duplicates. List is ordered and allows for duplicates. Converting Set -> List for O(1) runtime complexity in lieu of O(n) caused by the usage of List. Set -> List should be safe in this case since the Set contains non-duplicated values. TopicFactory interface declares the following member functions. Changing this method to return List could break the backward compatibility Set getBlackListedBrokers(String zkUrl); Signed-off-by: Andrew Choi --- .../MultiClusterTopicManagementService.java | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java index 947cd9cc..7fb95c03 100644 --- a/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java +++ b/src/main/java/com/linkedin/kmf/services/MultiClusterTopicManagementService.java @@ -335,7 +335,7 @@ static List> newPartitionAssignments(int minPartitionNum, int part // partition 3's preferred leader will be broker 1, // partition 4's preferred leader will be broker 2 and // partition 5's preferred leader will be broker 3. - List> newPartitionAssignments = new ArrayList<>(new ArrayList<>()); + List> newPartitionAssignments = new ArrayList<>(); int partitionDifference = minPartitionNum - partitionNum; // leader assignments - @@ -365,17 +365,25 @@ static List> newPartitionAssignments(int minPartitionNum, int part } private static BrokerMetadata randomBroker(Set brokers) { + + if (brokers == null || brokers.size() == 0) { + throw new IllegalArgumentException("brokers object is either null or empty."); + } + + // Using Set enforces the usage of loop which is O(n). + // As the list of brokers does not change in newPartitionAssignments, + // the acceptance of a List argument instead of a Set will be faster which is (O(1)) + List brokerMetadataList = new ArrayList<>(); + + // convert to a list so there's no need to create a index and iterate through this set + brokerMetadataList.addAll(brokers); + int brokerSetSize = brokers.size(); + // In practicality, the Random object should be rather more shared than this. int random = new Random().nextInt(brokerSetSize); - int index = 0; - for (BrokerMetadata brokerMetadata : brokers) { - if (index == random) - return brokerMetadata; - index++; - } - throw new IllegalStateException("Couldn't find random broker."); + return brokerMetadataList.get(random); } /** From 22e66b9aae54c87b3f8ee444cb721a14fd0b6b4a Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Fri, 5 Jun 2020 18:26:38 -0700 Subject: [PATCH 25/89] Travis CI clean up instead of the deprecated skip_cleanup under .travis YML file #265 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Travis CI has deprecated the use of the key skip_cleanup. Thus, have Travis CI use clean up instead of the deprecated skip_cleanup under .travis YML file Why? Such that the following tests don't fail. 1. continuous-integration/travis-ci/pr — The Travis CI build passed 2. continuous-integration/travis-ci/push — The Travis CI build passed Signed-off-by: Andrew Choi --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d866cfeb..511503f0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,7 @@ cache: deploy: provider: script script: bash -v scripts/publishToBintray.sh - skip_cleanup: true + cleanup: true on: all_branches: true tags: true From 23d2d5618cac91834f4f4cfbdc093abafff83d3f Mon Sep 17 00:00:00 2001 From: Andrew Choi Date: Mon, 8 Jun 2020 12:56:47 -0700 Subject: [PATCH 26/89] Minor, Update web application index HTML file -- Updated license year to 2020 Minor, Update web application index HTML file -- Updated license year to 2020 --- webapp/index.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/webapp/index.html b/webapp/index.html index 5c2a86d4..03c22fb8 100644 --- a/webapp/index.html +++ b/webapp/index.html @@ -1,5 +1,5 @@