Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
e768164
#2808 update kafka to version 0.8.2
Dec 7, 2014
d9dc2bc
Merge remote-tracking branch 'upstream/master' into wip-2808-kafka-0.…
Dec 23, 2014
2e67c66
#SPARK-2808 Update to Kafka 0.8.2.0 GA from beta.
Feb 5, 2015
6953429
[SPARK-2808][Streaming][Kafka] update kafka to 0.8.2
koeninger Feb 11, 2015
77de6c2
Merge branch 'master' into wip-2808-kafka-0.8.2-upgrade
koeninger Mar 18, 2015
407382e
[SPARK-2808][Streaming][Kafka] update kafka to 0.8.2.1
koeninger Mar 18, 2015
ed02d2c
[SPARK-2808][Streaming][Kafka] move default argument for api version …
koeninger Apr 15, 2015
1d10751
Merge branch 'master' into wip-2808-kafka-0.8.2-upgrade
koeninger Apr 27, 2015
c70ee43
[SPARK-2808][Streaming][Kafka] add more asserts to test, try to figur…
koeninger Apr 28, 2015
9edab4c
[SPARK-2808][Streaming][Kafka] more shots in the dark on jenkins fail…
koeninger Apr 28, 2015
af6f3ec
[SPARK-2808][Streaming][Kafka] delay test until latest leader offset …
koeninger Apr 29, 2015
61b3464
[SPARK-2808][Streaming][Kafka] delay for second send in boundary cond…
koeninger Apr 29, 2015
3824ce3
[SPARK-2808][Streaming][Kafka] naming / comments per tdas
koeninger Apr 29, 2015
2b92d3f
[SPARK-2808][Streaming][Kafka] wait for leader offsets in the java te…
koeninger Apr 29, 2015
2712649
[SPARK-2808][Streaming][Kafka] add more logging to python test, see w…
koeninger Apr 29, 2015
115aeee
Merge branch 'master' into wip-2808-kafka-0.8.2-upgrade
koeninger Apr 29, 2015
4c4557f
[SPARK-2808][Streaming][Kafka] add even more logging to python test
koeninger Apr 30, 2015
1d896e2
[SPARK-2808][Streaming][Kafka] add even even more logging to python test
koeninger Apr 30, 2015
bb0cfe2
Changes to debug flaky streaming tests.
tdas May 1, 2015
ae12eb2
Enable only kafka streaming test
tdas May 1, 2015
4bb7e40
Fix goof up
tdas May 1, 2015
9804030
removed sleeps.
tdas May 1, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions dev/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ echo "========================================================================="

CURRENT_BLOCK=$BLOCK_RAT

./dev/check-license
# ./dev/check-license

echo ""
echo "========================================================================="
Expand All @@ -122,7 +122,7 @@ echo "========================================================================="

CURRENT_BLOCK=$BLOCK_SCALA_STYLE

./dev/lint-scala
# ./dev/lint-scala

echo ""
echo "========================================================================="
Expand All @@ -131,7 +131,7 @@ echo "========================================================================="

CURRENT_BLOCK=$BLOCK_PYTHON_STYLE

./dev/lint-python
# ./dev/lint-python

echo ""
echo "========================================================================="
Expand Down Expand Up @@ -173,7 +173,7 @@ CURRENT_BLOCK=$BLOCK_BUILD
build/mvn $HIVE_BUILD_ARGS clean package -DskipTests
else
echo -e "q\n" \
| build/sbt $HIVE_BUILD_ARGS package assembly/assembly streaming-kafka-assembly/assembly \
| build/sbt package assembly/assembly streaming-kafka-assembly/assembly \
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
fi
}
Expand All @@ -185,7 +185,7 @@ echo "========================================================================="

CURRENT_BLOCK=$BLOCK_MIMA

./dev/mima
# ./dev/mima

echo ""
echo "========================================================================="
Expand Down Expand Up @@ -222,9 +222,10 @@ CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
# "${SBT_MAVEN_TEST_ARGS[@]}" is cool because it's an array.
# QUESTION: Why doesn't 'yes "q"' work?
# QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
echo -e "q\n" \
| build/sbt $SBT_MAVEN_PROFILES_ARGS "${SBT_MAVEN_TEST_ARGS[@]}" \
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"

echo "Running this"

# build/sbt "streaming/test-only *WriteAheadLog*" "streaming/test-only *JobGenerator*"
fi
}

Expand All @@ -247,8 +248,9 @@ echo "========================================================================="
CURRENT_BLOCK=$BLOCK_SPARKR_UNIT_TESTS

if [ $(command -v R) ]; then
./R/install-dev.sh
./R/run-tests.sh
#./R/install-dev.sh
#./R/run-tests.sh
echo ""
else
echo "Ignoring SparkR tests as R was not found in PATH"
fi
Expand Down
2 changes: 1 addition & 1 deletion external/kafka/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_${scala.binary.version}</artifactId>
<version>0.8.1.1</version>
<version>0.8.2.1</version>
<exclusions>
<exclusion>
<groupId>com.sun.jmx</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ package org.apache.spark.streaming.kafka
import scala.util.control.NonFatal
import scala.util.Random
import scala.collection.mutable.ArrayBuffer
import scala.collection.JavaConverters._
import java.util.Properties
import kafka.api._
import kafka.common.{ErrorMapping, OffsetMetadataAndError, TopicAndPartition}
import kafka.common.{ErrorMapping, OffsetAndMetadata, OffsetMetadataAndError, TopicAndPartition}
import kafka.consumer.{ConsumerConfig, SimpleConsumer}
import org.apache.spark.SparkException

Expand All @@ -37,6 +38,11 @@ private[spark]
class KafkaCluster(val kafkaParams: Map[String, String]) extends Serializable {
import KafkaCluster.{Err, LeaderOffset, SimpleConsumerConfig}

/** Constructor that takes a Java map */
def this(kafkaParams: java.util.Map[String, String]) {
this(kafkaParams.asScala.toMap)
}

// ConsumerConfig isn't serializable
@transient private var _config: SimpleConsumerConfig = null

Expand Down Expand Up @@ -220,12 +226,22 @@ class KafkaCluster(val kafkaParams: Map[String, String]) extends Serializable {
// https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI
// scalastyle:on

// this 0 here indicates api version, in this case the original ZK backed api.
def defaultConsumerApiVersion: Short = 0

/** Requires Kafka >= 0.8.1.1 */
def getConsumerOffsets(
groupId: String,
topicAndPartitions: Set[TopicAndPartition]
): Either[Err, Map[TopicAndPartition, Long]] =
getConsumerOffsets(groupId, topicAndPartitions, defaultConsumerApiVersion)

def getConsumerOffsets(
groupId: String,
topicAndPartitions: Set[TopicAndPartition],
consumerApiVersion: Short
): Either[Err, Map[TopicAndPartition, Long]] = {
getConsumerOffsetMetadata(groupId, topicAndPartitions).right.map { r =>
getConsumerOffsetMetadata(groupId, topicAndPartitions, consumerApiVersion).right.map { r =>
r.map { kv =>
kv._1 -> kv._2.offset
}
Expand All @@ -236,9 +252,16 @@ class KafkaCluster(val kafkaParams: Map[String, String]) extends Serializable {
def getConsumerOffsetMetadata(
groupId: String,
topicAndPartitions: Set[TopicAndPartition]
): Either[Err, Map[TopicAndPartition, OffsetMetadataAndError]] =
getConsumerOffsetMetadata(groupId, topicAndPartitions, defaultConsumerApiVersion)

def getConsumerOffsetMetadata(
groupId: String,
topicAndPartitions: Set[TopicAndPartition],
consumerApiVersion: Short
): Either[Err, Map[TopicAndPartition, OffsetMetadataAndError]] = {
var result = Map[TopicAndPartition, OffsetMetadataAndError]()
val req = OffsetFetchRequest(groupId, topicAndPartitions.toSeq)
val req = OffsetFetchRequest(groupId, topicAndPartitions.toSeq, consumerApiVersion)
val errs = new Err
withBrokers(Random.shuffle(config.seedBrokers), errs) { consumer =>
val resp = consumer.fetchOffsets(req)
Expand Down Expand Up @@ -266,24 +289,39 @@ class KafkaCluster(val kafkaParams: Map[String, String]) extends Serializable {
def setConsumerOffsets(
groupId: String,
offsets: Map[TopicAndPartition, Long]
): Either[Err, Map[TopicAndPartition, Short]] =
setConsumerOffsets(groupId, offsets, defaultConsumerApiVersion)

def setConsumerOffsets(
groupId: String,
offsets: Map[TopicAndPartition, Long],
consumerApiVersion: Short
): Either[Err, Map[TopicAndPartition, Short]] = {
setConsumerOffsetMetadata(groupId, offsets.map { kv =>
kv._1 -> OffsetMetadataAndError(kv._2)
})
val meta = offsets.map { kv =>
kv._1 -> OffsetAndMetadata(kv._2)
}
setConsumerOffsetMetadata(groupId, meta, consumerApiVersion)
}

/** Requires Kafka >= 0.8.1.1 */
def setConsumerOffsetMetadata(
groupId: String,
metadata: Map[TopicAndPartition, OffsetMetadataAndError]
metadata: Map[TopicAndPartition, OffsetAndMetadata]
): Either[Err, Map[TopicAndPartition, Short]] =
setConsumerOffsetMetadata(groupId, metadata, defaultConsumerApiVersion)

def setConsumerOffsetMetadata(
groupId: String,
metadata: Map[TopicAndPartition, OffsetAndMetadata],
consumerApiVersion: Short
): Either[Err, Map[TopicAndPartition, Short]] = {
var result = Map[TopicAndPartition, Short]()
val req = OffsetCommitRequest(groupId, metadata)
val req = OffsetCommitRequest(groupId, metadata, consumerApiVersion)
val errs = new Err
val topicAndPartitions = metadata.keySet
withBrokers(Random.shuffle(config.seedBrokers), errs) { consumer =>
val resp = consumer.commitOffsets(req)
val respMap = resp.requestInfo
val respMap = resp.commitStatus
val needed = topicAndPartitions.diff(result.keySet)
needed.foreach { tp: TopicAndPartition =>
respMap.get(tp).foreach { err: Short =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ import scala.language.postfixOps
import scala.util.control.NonFatal

import kafka.admin.AdminUtils
import kafka.api.Request
import kafka.common.TopicAndPartition
import kafka.producer.{KeyedMessage, Producer, ProducerConfig}
import kafka.serializer.StringEncoder
import kafka.server.{KafkaConfig, KafkaServer}
import kafka.utils.ZKStringSerializer
import kafka.utils.{ZKStringSerializer, ZkUtils}
import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
import org.I0Itec.zkclient.ZkClient

Expand Down Expand Up @@ -227,10 +229,34 @@ private class KafkaTestUtils extends Logging {
tryAgain(1)
}

/** wait until the leader offset for the given topic / partition equals the specified offset */
def waitUntilLeaderOffset(
kc: KafkaCluster,
topic: String,
partition: Int,
offset: Long): Unit = {
eventually(Time(10000), Time(100)) {
val tp = TopicAndPartition(topic, partition)
val llo = kc.getLatestLeaderOffsets(Set(tp)).right.get.apply(tp).offset
assert(
llo == offset,
s"$topic $partition $offset not reached after timeout")
}
}

private def waitUntilMetadataIsPropagated(topic: String, partition: Int): Unit = {
eventually(Time(10000), Time(100)) {
assert(
server.apis.metadataCache.containsTopicAndPartition(topic, partition),
server.apis.metadataCache.getPartitionInfo(topic, partition) match {
case Some(partitionState) =>
val leaderAndIsr = partitionState.leaderIsrAndControllerEpoch.leaderAndIsr
ZkUtils.getLeaderForPartition(zkClient, topic, partition).isDefined &&
Request.isValidBrokerId(leaderAndIsr.leader) &&
leaderAndIsr.isr.size >= 1

case _ =>
false
},
s"Partition [$topic, $partition] metadata not propagated after timeout"
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ public void testKafkaRDD() throws InterruptedException {
HashMap<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("metadata.broker.list", kafkaTestUtils.brokerAddress());

KafkaCluster kc = new KafkaCluster(kafkaParams);
kafkaTestUtils.waitUntilLeaderOffset(kc, topic1, 0, topic1data.length);
kafkaTestUtils.waitUntilLeaderOffset(kc, topic2, 0, topic2data.length);

OffsetRange[] offsetRanges = {
OffsetRange.create(topic1, 0, 0, 1),
OffsetRange.create(topic2, 0, 0, 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,16 @@ class KafkaRDDSuite extends FunSuite with BeforeAndAfterAll {
}

test("basic usage") {
val topic = "topicbasic"
val topic = s"topicbasic-${Random.nextInt}"
kafkaTestUtils.createTopic(topic)
val messages = Set("the", "quick", "brown", "fox")
kafkaTestUtils.sendMessages(topic, messages.toArray)


val kafkaParams = Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress,
"group.id" -> s"test-consumer-${Random.nextInt(10000)}")
"group.id" -> s"test-consumer-${Random.nextInt}")

val kc = new KafkaCluster(kafkaParams)
kafkaTestUtils.waitUntilLeaderOffset(kc, topic, 0, messages.size)

val offsetRanges = Array(OffsetRange(topic, 0, 0, messages.size))

Expand All @@ -73,34 +75,46 @@ class KafkaRDDSuite extends FunSuite with BeforeAndAfterAll {

test("iterator boundary conditions") {
// the idea is to find e.g. off-by-one errors between what kafka has available and the rdd
val topic = "topic1"
val topic = s"topicboundary-${Random.nextInt}"
val sent = Map("a" -> 5, "b" -> 3, "c" -> 10)
kafkaTestUtils.createTopic(topic)

val kafkaParams = Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress,
"group.id" -> s"test-consumer-${Random.nextInt(10000)}")
"group.id" -> s"test-consumer-${Random.nextInt}")

val kc = new KafkaCluster(kafkaParams)

// this is the "lots of messages" case
kafkaTestUtils.sendMessages(topic, sent)
val sentCount = sent.values.sum
kafkaTestUtils.waitUntilLeaderOffset(kc, topic, 0, sentCount)

// rdd defined from leaders after sending messages, should get the number sent
val rdd = getRdd(kc, Set(topic))

assert(rdd.isDefined)
assert(rdd.get.count === sent.values.sum, "didn't get all sent messages")

val ranges = rdd.get.asInstanceOf[HasOffsetRanges]
.offsetRanges.map(o => TopicAndPartition(o.topic, o.partition) -> o.untilOffset).toMap
val ranges = rdd.get.asInstanceOf[HasOffsetRanges].offsetRanges
val rangeCount = ranges.map(o => o.untilOffset - o.fromOffset).sum

kc.setConsumerOffsets(kafkaParams("group.id"), ranges)
assert(rangeCount === sentCount, "offset range didn't include all sent messages")
assert(rdd.get.count === sentCount, "didn't get all sent messages")

val rangesMap = ranges.map(o => TopicAndPartition(o.topic, o.partition) -> o.untilOffset).toMap

kc.setConsumerOffsets(kafkaParams("group.id"), rangesMap).fold(
err => throw new Exception(err.mkString("\n")),
_ => ()
)

// this is the "0 messages" case
val rdd2 = getRdd(kc, Set(topic))
// shouldn't get anything, since message is sent after rdd was defined
val sentOnlyOne = Map("d" -> 1)

kafkaTestUtils.sendMessages(topic, sentOnlyOne)
kafkaTestUtils.waitUntilLeaderOffset(kc, topic, 0, sentCount + 1)

assert(rdd2.isDefined)
assert(rdd2.get.count === 0, "got messages when there shouldn't be any")

Expand Down
Loading