-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-4964] [Streaming] Exactly-once semantics for Kafka #3798
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
76913e2
1d70625
0b94b33
4dafd1b
ce91c59
7d050bc
783b477
29c6b43
3c2a96a
4b078bf
8d7de4a
979da25
38bb727
326ff3c
6bf14f2
bcca8a4
37d3053
cac63ee
e09045b
8bfd6c0
1d50749
adf99a6
356c7cc
e93eb72
e86317b
0458e4e
548d529
c1bd6d9
d4a7cf7
bb80bbe
2e67117
19406cc
99d2eba
80fd6ae
2b340d8
9a838c2
0090553
9adaa0a
4354bce
825110f
8991017
0df3ebe
8c31855
59e29f6
1dc2941
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.rdd.kafka | ||
|
|
||
| import scala.util.control.NonFatal | ||
| import java.util.Properties | ||
| import kafka.api.{TopicMetadataRequest, TopicMetadataResponse} | ||
| import kafka.consumer.{ConsumerConfig, SimpleConsumer} | ||
|
|
||
| /** | ||
| * Convenience methods for interacting with a Kafka cluster. | ||
| * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">configuration parameters</a>. | ||
| * Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s), | ||
| * NOT zookeeper servers, specified in host1:port1,host2:port2 form | ||
| */ | ||
| class KafkaCluster(val kafkaParams: Map[String, String]) { | ||
|
||
| val brokers: Array[(String, Int)] = | ||
| kafkaParams.get("metadata.broker.list") | ||
| .orElse(kafkaParams.get("bootstrap.servers")) | ||
| .getOrElse(throw new Exception("Must specify metadata.broker.list or bootstrap.servers")) | ||
| .split(",").map { hp => | ||
| val hpa = hp.split(":") | ||
| (hpa(0), hpa(1).toInt) | ||
| } | ||
|
|
||
| val config: ConsumerConfig = KafkaCluster.consumerConfig(kafkaParams) | ||
|
|
||
| def connect(host: String, port: Int): SimpleConsumer = | ||
| new SimpleConsumer(host, port, config.socketTimeoutMs, config.socketReceiveBufferBytes, config.clientId) | ||
|
|
||
| def connect(hostAndPort: (String, Int)): SimpleConsumer = | ||
|
||
| connect(hostAndPort._1, hostAndPort._2) | ||
|
|
||
| def connectLeader(topic: String, partition: Int): Option[SimpleConsumer] = | ||
| findLeader(topic, partition).map(connect) | ||
|
|
||
| def findLeader(topic: String, partition: Int): Option[(String, Int)] = { | ||
| brokers.foreach { hp => | ||
| var consumer: SimpleConsumer = null | ||
| try { | ||
| consumer = connect(hp) | ||
| val req = TopicMetadataRequest(TopicMetadataRequest.CurrentVersion, 0, config.clientId, Seq(topic)) | ||
| val resp: TopicMetadataResponse = consumer.send(req) | ||
| resp.topicsMetadata.find(_.topic == topic).flatMap { t => | ||
| t.partitionsMetadata.find(_.partitionId == partition) | ||
| }.foreach { partitionMeta => | ||
| partitionMeta.leader.foreach { leader => | ||
| return Some((leader.host, leader.port)) | ||
| } | ||
| } | ||
| } catch { | ||
| case NonFatal(e) => | ||
| } finally { | ||
| if (consumer != null) consumer.close() | ||
| } | ||
|
||
| } | ||
| None | ||
| } | ||
| } | ||
|
|
||
| object KafkaCluster { | ||
| /** Make a consumer config without requiring group.id or zookeeper.connect, | ||
| * since communicating with brokers also needs common settings such as timeout | ||
| */ | ||
| def consumerConfig(kafkaParams: Map[String, String]): ConsumerConfig = { | ||
| val props = new Properties() | ||
| kafkaParams.foreach(param => props.put(param._1, param._2)) | ||
| Seq("zookeeper.connect", "group.id").foreach { s => | ||
| if (!props.contains(s)) | ||
| props.setProperty(s, "") | ||
| } | ||
|
||
| new ConsumerConfig(props) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you turn all scaladoc style into javadoc for this pr?