You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by bo...@apache.org on 2018/10/11 21:27:06 UTC
[1/3] samza git commit: SAMZA-1868: Create new SamzaAmdmin for Kafka
Repository: samza
Updated Branches:
refs/heads/master 3c78e06ac -> 63d33fa06
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemAdminWithMock.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemAdminWithMock.java b/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemAdminWithMock.java
new file mode 100644
index 0000000..6a03198
--- /dev/null
+++ b/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemAdminWithMock.java
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.samza.system.kafka;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.Supplier;
+import kafka.admin.AdminClient;
+import kafka.utils.ZkUtils;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.common.PartitionInfo;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.samza.Partition;
+import org.apache.samza.SamzaException;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.JobConfig;
+import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.config.KafkaConsumerConfig;
+import org.apache.samza.config.MapConfig;
+import org.apache.samza.system.SystemStreamMetadata;
+import org.apache.samza.system.SystemStreamPartition;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import static org.apache.samza.system.kafka.KafkaSystemDescriptor.*;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+
+
+public class TestKafkaSystemAdminWithMock {
+ private KafkaSystemAdmin kafkaSystemAdmin;
+ //private KafkaSystemAdmin kafkaAdmin;
+ private Config testConfig;
+ private Consumer<byte[], byte[]> mockKafkaConsumer;
+ private PartitionInfo mockPartitionInfo0;
+ private PartitionInfo mockPartitionInfo1;
+ private TopicPartition testTopicPartition0;
+ private TopicPartition testTopicPartition1;
+
+ private ConcurrentHashMap<String, KafkaSystemConsumer> consumersReference;
+
+ private static final String VALID_TOPIC = "validTopic";
+ private static final String INVALID_TOPIC = "invalidTopic";
+ private static final String TEST_SYSTEM = "testSystem";
+ private static final Long KAFKA_BEGINNING_OFFSET_FOR_PARTITION0 = 10L;
+ private static final Long KAFKA_BEGINNING_OFFSET_FOR_PARTITION1 = 11L;
+ private static final Long KAFKA_END_OFFSET_FOR_PARTITION0 = 20L;
+ private static final Long KAFKA_END_OFFSET_FOR_PARTITION1 = 21L;
+
+ @Before
+ public void setUp() throws Exception {
+ Map<String, String> configMap = new HashMap<>();
+ configMap.put(String.format(KafkaConfig.CONSUMER_CONFIGS_CONFIG_KEY(), TEST_SYSTEM, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG),
+ "localhost:123");
+ configMap.put(String.format(KafkaConfig.CONSUMER_ZK_CONNECT_CONFIG_KEY(), TEST_SYSTEM), "localhost:124");
+ configMap.put(JobConfig.JOB_NAME(), "jobName");
+ configMap.put(JobConfig.JOB_ID(), "jobId");
+
+ testConfig = new MapConfig(configMap);
+
+ consumersReference = new ConcurrentHashMap<>();
+
+ // mock PartitionInfo
+ mockPartitionInfo0 = mock(PartitionInfo.class);
+ when(mockPartitionInfo0.topic()).thenReturn(VALID_TOPIC);
+ when(mockPartitionInfo0.partition()).thenReturn(0);
+ mockPartitionInfo1 = mock(PartitionInfo.class);
+ when(mockPartitionInfo1.topic()).thenReturn(VALID_TOPIC);
+ when(mockPartitionInfo1.partition()).thenReturn(1);
+
+ // mock LinkedInKafkaConsumerImpl constructor
+ mockKafkaConsumer = mock(KafkaConsumer.class);
+
+ // mock LinkedInKafkaConsumerImpl other behaviors
+ testTopicPartition0 = new TopicPartition(VALID_TOPIC, 0);
+ testTopicPartition1 = new TopicPartition(VALID_TOPIC, 1);
+ Map<TopicPartition, Long> testBeginningOffsets =
+ ImmutableMap.of(testTopicPartition0, KAFKA_BEGINNING_OFFSET_FOR_PARTITION0, testTopicPartition1,
+ KAFKA_BEGINNING_OFFSET_FOR_PARTITION1);
+ Map<TopicPartition, Long> testEndOffsets =
+ ImmutableMap.of(testTopicPartition0, KAFKA_END_OFFSET_FOR_PARTITION0, testTopicPartition1,
+ KAFKA_END_OFFSET_FOR_PARTITION1);
+
+ when(mockKafkaConsumer.partitionsFor(VALID_TOPIC)).thenReturn(
+ ImmutableList.of(mockPartitionInfo0, mockPartitionInfo1));
+ when(mockKafkaConsumer.beginningOffsets(ImmutableList.of(testTopicPartition0, testTopicPartition1))).thenReturn(
+ testBeginningOffsets);
+ when(mockKafkaConsumer.endOffsets(ImmutableList.of(testTopicPartition0, testTopicPartition1))).thenReturn(
+ testEndOffsets);
+
+ kafkaSystemAdmin =
+ new KafkaSystemAdmin(TEST_SYSTEM, testConfig, mockKafkaConsumer);
+
+ }
+
+ @After
+ public void tearDown() {
+ }
+
+ @Test
+ public void testGetSystemStreamMetaDataWithValidTopic() {
+ System.out.println("STARTING");
+ Map<String, SystemStreamMetadata> metadataMap =
+ kafkaSystemAdmin.getSystemStreamMetadata(ImmutableSet.of(VALID_TOPIC));
+
+ // verify metadata size
+ assertEquals("metadata should return for 1 topic", metadataMap.size(), 1);
+ System.out.println("STARTING1");
+ // verify the metadata streamName
+ assertEquals("the stream name should be " + VALID_TOPIC, metadataMap.get(VALID_TOPIC).getStreamName(), VALID_TOPIC);
+ System.out.println("STARTING2");
+ // verify the offset for each partition
+ Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> systemStreamPartitionMetadata =
+ metadataMap.get(VALID_TOPIC).getSystemStreamPartitionMetadata();
+ assertEquals("there are 2 partitions", systemStreamPartitionMetadata.size(), 2);
+ System.out.println("STARTING3");
+ SystemStreamMetadata.SystemStreamPartitionMetadata partition0Metadata =
+ systemStreamPartitionMetadata.get(new Partition(0));
+ assertEquals("oldest offset for partition 0", partition0Metadata.getOldestOffset(),
+ KAFKA_BEGINNING_OFFSET_FOR_PARTITION0.toString());
+ assertEquals("upcoming offset for partition 0", partition0Metadata.getUpcomingOffset(),
+ KAFKA_END_OFFSET_FOR_PARTITION0.toString());
+ assertEquals("newest offset for partition 0", partition0Metadata.getNewestOffset(),
+ Long.toString(KAFKA_END_OFFSET_FOR_PARTITION0 - 1));
+ System.out.println("STARTING4");
+ SystemStreamMetadata.SystemStreamPartitionMetadata partition1Metadata =
+ systemStreamPartitionMetadata.get(new Partition(1));
+ assertEquals("oldest offset for partition 1", partition1Metadata.getOldestOffset(),
+ KAFKA_BEGINNING_OFFSET_FOR_PARTITION1.toString());
+ assertEquals("upcoming offset for partition 1", partition1Metadata.getUpcomingOffset(),
+ KAFKA_END_OFFSET_FOR_PARTITION1.toString());
+ assertEquals("newest offset for partition 1", partition1Metadata.getNewestOffset(),
+ Long.toString(KAFKA_END_OFFSET_FOR_PARTITION1 - 1));
+ }
+
+ @Test
+ public void testGetSystemStreamMetaDataWithInvalidTopic() {
+ Map<String, SystemStreamMetadata> metadataMap =
+ kafkaSystemAdmin.getSystemStreamMetadata(ImmutableSet.of(INVALID_TOPIC));
+ assertEquals("empty metadata for invalid topic", metadataMap.size(), 0);
+ }
+
+ @Test
+ public void testGetSystemStreamMetaDataWithNoTopic() {
+ Map<String, SystemStreamMetadata> metadataMap = kafkaSystemAdmin.getSystemStreamMetadata(Collections.emptySet());
+ assertEquals("empty metadata for no topic", metadataMap.size(), 0);
+ }
+
+ @Test
+ public void testGetSystemStreamMetaDataForTopicWithNoMessage() {
+ // The topic with no messages will have beginningOffset = 0 and endOffset = 0
+ when(mockKafkaConsumer.beginningOffsets(ImmutableList.of(testTopicPartition0, testTopicPartition1))).thenReturn(
+ ImmutableMap.of(testTopicPartition0, 0L, testTopicPartition1, 0L));
+ when(mockKafkaConsumer.endOffsets(ImmutableList.of(testTopicPartition0, testTopicPartition1))).thenReturn(
+ ImmutableMap.of(testTopicPartition0, 0L, testTopicPartition1, 0L));
+
+ Map<String, SystemStreamMetadata> metadataMap =
+ kafkaSystemAdmin.getSystemStreamMetadata(ImmutableSet.of(VALID_TOPIC));
+ assertEquals("metadata should return for 1 topic", metadataMap.size(), 1);
+
+ // verify the metadata streamName
+ assertEquals("the stream name should be " + VALID_TOPIC, metadataMap.get(VALID_TOPIC).getStreamName(), VALID_TOPIC);
+
+ // verify the offset for each partition
+ Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> systemStreamPartitionMetadata =
+ metadataMap.get(VALID_TOPIC).getSystemStreamPartitionMetadata();
+ assertEquals("there are 2 partitions", systemStreamPartitionMetadata.size(), 2);
+
+ SystemStreamMetadata.SystemStreamPartitionMetadata partition0Metadata =
+ systemStreamPartitionMetadata.get(new Partition(0));
+ assertEquals("oldest offset for partition 0", partition0Metadata.getOldestOffset(), "0");
+ assertEquals("upcoming offset for partition 0", partition0Metadata.getUpcomingOffset(), "0");
+ assertEquals("newest offset is not set due to abnormal upcoming offset", partition0Metadata.getNewestOffset(),
+ null);
+
+ SystemStreamMetadata.SystemStreamPartitionMetadata partition1Metadata =
+ systemStreamPartitionMetadata.get(new Partition(1));
+ assertEquals("oldest offset for partition 1", partition1Metadata.getOldestOffset(), "0");
+ assertEquals("upcoming offset for partition 1", partition1Metadata.getUpcomingOffset(), "0");
+ assertEquals("newest offset is not set due to abnormal upcoming offset", partition1Metadata.getNewestOffset(),
+ null);
+ }
+
+ @Test
+ public void testGetSSPMetadata() {
+ SystemStreamPartition ssp = new SystemStreamPartition(TEST_SYSTEM, VALID_TOPIC, new Partition(0));
+ SystemStreamPartition otherSSP = new SystemStreamPartition(TEST_SYSTEM, "otherTopic", new Partition(1));
+ TopicPartition topicPartition = new TopicPartition(VALID_TOPIC, 0);
+ TopicPartition otherTopicPartition = new TopicPartition("otherTopic", 1);
+ when(mockKafkaConsumer.beginningOffsets(ImmutableList.of(topicPartition, otherTopicPartition))).thenReturn(
+ ImmutableMap.of(topicPartition, 1L, otherTopicPartition, 2L));
+ when(mockKafkaConsumer.endOffsets(ImmutableList.of(topicPartition, otherTopicPartition))).thenReturn(
+ ImmutableMap.of(topicPartition, 11L, otherTopicPartition, 12L));
+ Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> expected =
+ ImmutableMap.of(ssp, new SystemStreamMetadata.SystemStreamPartitionMetadata("1", "10", "11"), otherSSP,
+ new SystemStreamMetadata.SystemStreamPartitionMetadata("2", "11", "12"));
+ assertEquals(kafkaSystemAdmin.getSSPMetadata(ImmutableSet.of(ssp, otherSSP)), expected);
+ }
+
+ @Test
+ public void testGetSSPMetadataEmptyPartition() {
+ SystemStreamPartition ssp = new SystemStreamPartition(TEST_SYSTEM, VALID_TOPIC, new Partition(0));
+ SystemStreamPartition otherSSP = new SystemStreamPartition(TEST_SYSTEM, "otherTopic", new Partition(1));
+ TopicPartition topicPartition = new TopicPartition(VALID_TOPIC, 0);
+ TopicPartition otherTopicPartition = new TopicPartition("otherTopic", 1);
+ when(mockKafkaConsumer.beginningOffsets(ImmutableList.of(topicPartition, otherTopicPartition))).thenReturn(
+ ImmutableMap.of(topicPartition, 1L));
+ when(mockKafkaConsumer.endOffsets(ImmutableList.of(topicPartition, otherTopicPartition))).thenReturn(
+ ImmutableMap.of(topicPartition, 11L));
+
+ Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> expected =
+ ImmutableMap.of(ssp, new SystemStreamMetadata.SystemStreamPartitionMetadata("1", "10", "11"), otherSSP,
+ new SystemStreamMetadata.SystemStreamPartitionMetadata(null, null, null));
+ assertEquals(expected, kafkaSystemAdmin.getSSPMetadata(ImmutableSet.of(ssp, otherSSP)));
+ }
+
+ @Test
+ public void testGetSSPMetadataEmptyUpcomingOffset() {
+ SystemStreamPartition ssp = new SystemStreamPartition(TEST_SYSTEM, VALID_TOPIC, new Partition(0));
+ TopicPartition topicPartition = new TopicPartition(VALID_TOPIC, 0);
+ when(mockKafkaConsumer.beginningOffsets(ImmutableList.of(topicPartition))).thenReturn(
+ ImmutableMap.of(topicPartition, 0L));
+ when(mockKafkaConsumer.endOffsets(ImmutableList.of(topicPartition))).thenReturn(ImmutableMap.of());
+ Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> expected =
+ ImmutableMap.of(ssp, new SystemStreamMetadata.SystemStreamPartitionMetadata("0", null, null));
+ assertEquals(kafkaSystemAdmin.getSSPMetadata(ImmutableSet.of(ssp)), expected);
+ }
+
+ @Test
+ public void testGetSSPMetadataZeroUpcomingOffset() {
+ SystemStreamPartition ssp = new SystemStreamPartition(TEST_SYSTEM, VALID_TOPIC, new Partition(0));
+ TopicPartition topicPartition = new TopicPartition(VALID_TOPIC, 0);
+ when(mockKafkaConsumer.beginningOffsets(ImmutableList.of(topicPartition))).thenReturn(
+ ImmutableMap.of(topicPartition, -1L));
+ when(mockKafkaConsumer.endOffsets(ImmutableList.of(topicPartition))).thenReturn(
+ ImmutableMap.of(topicPartition, 0L));
+ Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> expected =
+ ImmutableMap.of(ssp, new SystemStreamMetadata.SystemStreamPartitionMetadata("0", null, "0"));
+ assertEquals(kafkaSystemAdmin.getSSPMetadata(ImmutableSet.of(ssp)), expected);
+ }
+
+ @Test
+ public void testGetSystemStreamMetaDataWithRetry() {
+ final List<PartitionInfo> partitionInfosForTopic = ImmutableList.of(mockPartitionInfo0, mockPartitionInfo1);
+ when(mockKafkaConsumer.partitionsFor(VALID_TOPIC)).thenThrow(new RuntimeException())
+ .thenReturn(partitionInfosForTopic);
+
+ Map<String, SystemStreamMetadata> metadataMap =
+ kafkaSystemAdmin.getSystemStreamMetadata(ImmutableSet.of(VALID_TOPIC));
+ assertEquals("metadata should return for 1 topic", metadataMap.size(), 1);
+
+ // retried twice because the first fails and the second succeeds
+ Mockito.verify(mockKafkaConsumer, Mockito.times(2)).partitionsFor(VALID_TOPIC);
+
+ final List<TopicPartition> topicPartitions =
+ Arrays.asList(new TopicPartition(mockPartitionInfo0.topic(), mockPartitionInfo0.partition()),
+ new TopicPartition(mockPartitionInfo1.topic(), mockPartitionInfo1.partition()));
+ // the following methods thereafter are only called once
+ Mockito.verify(mockKafkaConsumer, Mockito.times(1)).beginningOffsets(topicPartitions);
+ Mockito.verify(mockKafkaConsumer, Mockito.times(1)).endOffsets(topicPartitions);
+ }
+
+ @Test(expected = SamzaException.class)
+ public void testGetSystemStreamMetadataShouldTerminateAfterFiniteRetriesOnException() {
+ when(mockKafkaConsumer.partitionsFor(VALID_TOPIC)).thenThrow(new RuntimeException())
+ .thenThrow(new RuntimeException())
+ .thenThrow(new RuntimeException())
+ .thenThrow(new RuntimeException())
+ .thenThrow(new RuntimeException());
+
+ kafkaSystemAdmin.getSystemStreamMetadata(ImmutableSet.of(VALID_TOPIC));
+ }
+
+ @Test(expected = SamzaException.class)
+ public void testGetSystemStreamPartitionCountsShouldTerminateAfterFiniteRetriesOnException() throws Exception {
+ final Set<String> streamNames = ImmutableSet.of(VALID_TOPIC);
+ final long cacheTTL = 100L;
+
+ when(mockKafkaConsumer.partitionsFor(VALID_TOPIC)).thenThrow(new RuntimeException())
+ .thenThrow(new RuntimeException())
+ .thenThrow(new RuntimeException())
+ .thenThrow(new RuntimeException())
+ .thenThrow(new RuntimeException());
+
+ kafkaSystemAdmin.getSystemStreamPartitionCounts(streamNames, cacheTTL);
+ }
+}
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java b/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java
new file mode 100644
index 0000000..981ac45
--- /dev/null
+++ b/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java
@@ -0,0 +1,225 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.samza.system.kafka;
+
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.common.serialization.ByteArraySerializer;
+import org.apache.samza.Partition;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.JobConfig;
+import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.config.KafkaConsumerConfig;
+import org.apache.samza.config.MapConfig;
+import org.apache.samza.system.IncomingMessageEnvelope;
+import org.apache.samza.system.SystemStreamPartition;
+import org.apache.samza.util.Clock;
+import org.apache.samza.util.NoOpMetricsRegistry;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+
+public class TestKafkaSystemConsumer {
+ public final String TEST_SYSTEM = "test-system";
+ public final String TEST_STREAM = "test-stream";
+ public final String TEST_JOB = "test-job";
+ public final String TEST_PREFIX_ID = "testClientId";
+ public final String BOOTSTRAP_SERVER = "127.0.0.1:8888";
+ public final String FETCH_THRESHOLD_MSGS = "50000";
+ public final String FETCH_THRESHOLD_BYTES = "100000";
+
+ private KafkaSystemConsumer createConsumer(String fetchMsg, String fetchBytes) {
+ final Map<String, String> map = new HashMap<>();
+
+ map.put(JobConfig.JOB_NAME(), TEST_JOB);
+
+ map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD(), TEST_SYSTEM), fetchMsg);
+ map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD_BYTES(), TEST_SYSTEM), fetchBytes);
+ map.put(String.format("systems.%s.consumer.%s", TEST_SYSTEM, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG),
+ BOOTSTRAP_SERVER);
+ map.put(JobConfig.JOB_NAME(), "jobName");
+
+ Config config = new MapConfig(map);
+ String clientId = KafkaConsumerConfig.createClientId(TEST_PREFIX_ID, config);
+ KafkaConsumerConfig consumerConfig =
+ KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, TEST_SYSTEM, clientId);
+
+ final KafkaConsumer<byte[], byte[]> kafkaConsumer = new MockKafkaConsumer(consumerConfig);
+
+ MockKafkaSystemConsumer newKafkaSystemConsumer =
+ new MockKafkaSystemConsumer(kafkaConsumer, TEST_SYSTEM, config, TEST_PREFIX_ID,
+ new KafkaSystemConsumerMetrics(TEST_SYSTEM, new NoOpMetricsRegistry()), System::currentTimeMillis);
+
+ return newKafkaSystemConsumer;
+ }
+
+ @Test
+ public void testConfigValidations() {
+
+ final KafkaSystemConsumer consumer = createConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+ consumer.start();
+ // should be no failures
+ }
+
+ @Test
+ public void testFetchThresholdShouldDivideEvenlyAmongPartitions() {
+ final KafkaSystemConsumer consumer = createConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+ final int partitionsNum = 50;
+ for (int i = 0; i < partitionsNum; i++) {
+ consumer.register(new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(i)), "0");
+ }
+
+ consumer.start();
+
+ Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum, consumer.perPartitionFetchThreshold);
+ Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_BYTES) / 2 / partitionsNum,
+ consumer.perPartitionFetchThresholdBytes);
+
+ consumer.stop();
+ }
+
+ @Test
+ public void testConsumerRegisterOlderOffsetOfTheSamzaSSP() {
+
+ KafkaSystemConsumer consumer = createConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+ SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+ SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+ SystemStreamPartition ssp2 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(2));
+
+ consumer.register(ssp0, "0");
+ consumer.register(ssp0, "5");
+ consumer.register(ssp1, "2");
+ consumer.register(ssp1, "3");
+ consumer.register(ssp2, "0");
+
+ assertEquals("0", consumer.topicPartitionsToOffset.get(KafkaSystemConsumer.toTopicPartition(ssp0)));
+ assertEquals("2", consumer.topicPartitionsToOffset.get(KafkaSystemConsumer.toTopicPartition(ssp1)));
+ assertEquals("0", consumer.topicPartitionsToOffset.get(KafkaSystemConsumer.toTopicPartition(ssp2)));
+ }
+
+ @Test
+ public void testFetchThresholdBytes() {
+
+ SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+ SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+ int partitionsNum = 2;
+ int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size
+ int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 1; // fake size
+ int ime11Size = 20;
+ ByteArraySerializer bytesSerde = new ByteArraySerializer();
+ IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
+ bytesSerde.serialize("", "value0".getBytes()), ime0Size);
+ IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
+ bytesSerde.serialize("", "value1".getBytes()), ime1Size);
+ IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
+ bytesSerde.serialize("", "value11".getBytes()), ime11Size);
+ KafkaSystemConsumer consumer = createConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+ consumer.register(ssp0, "0");
+ consumer.register(ssp1, "0");
+ consumer.start();
+ consumer.messageSink.addMessage(ssp0, ime0);
+ // queue for ssp0 should be full now, because we added message of size FETCH_THRESHOLD_MSGS/partitionsNum
+ Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp0));
+ consumer.messageSink.addMessage(ssp1, ime1);
+ // queue for ssp1 should be less then full now, because we added message of size (FETCH_THRESHOLD_MSGS/partitionsNum - 1)
+ Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
+ consumer.messageSink.addMessage(ssp1, ime11);
+ // queue for ssp1 should full now, because we added message of size 20 on top
+ Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
+
+ Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
+ Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
+ Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
+ Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
+
+ consumer.stop();
+ }
+
+ @Test
+ public void testFetchThresholdBytesDiabled() {
+ // Pass 0 as fetchThresholdByBytes, which disables checking for limit by size
+
+ SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+ SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+ int partitionsNum = 2;
+ int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size, upto the limit
+ int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 100; // fake size, below the limit
+ int ime11Size = 20;// event with the second message still below the size limit
+ ByteArraySerializer bytesSerde = new ByteArraySerializer();
+ IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
+ bytesSerde.serialize("", "value0".getBytes()), ime0Size);
+ IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
+ bytesSerde.serialize("", "value1".getBytes()), ime1Size);
+ IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
+ bytesSerde.serialize("", "value11".getBytes()), ime11Size);
+
+ // limit by number of messages 4/2 = 2 per partition
+ // limit by number of bytes - disabled
+ KafkaSystemConsumer consumer = createConsumer("4", "0"); // should disable
+
+ consumer.register(ssp0, "0");
+ consumer.register(ssp1, "0");
+ consumer.start();
+ consumer.messageSink.addMessage(ssp0, ime0);
+ // should be full by size, but not full by number of messages (1 of 2)
+ Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp0));
+ consumer.messageSink.addMessage(ssp1, ime1);
+ // not full neither by size nor by messages
+ Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
+ consumer.messageSink.addMessage(ssp1, ime11);
+ // not full by size, but should be full by messages
+ Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
+
+ Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
+ Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
+ Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
+ Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
+
+ consumer.stop();
+ }
+
+ // mock kafkaConsumer and SystemConsumer
+ static class MockKafkaConsumer extends KafkaConsumer {
+ public MockKafkaConsumer(Map<String, Object> configs) {
+ super(configs);
+ }
+ }
+
+ static class MockKafkaSystemConsumer extends KafkaSystemConsumer {
+ public MockKafkaSystemConsumer(Consumer kafkaConsumer, String systemName, Config config, String clientId,
+ KafkaSystemConsumerMetrics metrics, Clock clock) {
+ super(kafkaConsumer, systemName, config, clientId, metrics, clock);
+ }
+
+ @Override
+ void startConsumer() {
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemConsumerMetrics.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemConsumerMetrics.java b/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemConsumerMetrics.java
new file mode 100644
index 0000000..03b0564
--- /dev/null
+++ b/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemConsumerMetrics.java
@@ -0,0 +1,109 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.samza.system.kafka;
+
+import java.util.HashMap;
+import java.util.Map;
+import kafka.common.TopicAndPartition;
+import org.apache.samza.metrics.Metric;
+import org.apache.samza.metrics.MetricsRegistryMap;
+import org.apache.samza.metrics.ReadableMetricsRegistry;
+import org.junit.Assert;
+import org.junit.Test;
+
+
+public class TestKafkaSystemConsumerMetrics {
+ @Test
+ public void testKafkaSystemConsumerMetrics() {
+ String systemName = "system";
+ TopicAndPartition tp1 = new TopicAndPartition("topic1", 1);
+ TopicAndPartition tp2 = new TopicAndPartition("topic2", 2);
+ String clientName = "clientName";
+
+ // record expected values for further comparison
+ Map<String, String> expectedValues = new HashMap<>();
+
+ ReadableMetricsRegistry registry = new MetricsRegistryMap();
+ KafkaSystemConsumerMetrics metrics = new KafkaSystemConsumerMetrics(systemName, registry);
+
+ // initialize the metrics for the partitions
+ metrics.registerTopicAndPartition(tp1);
+ metrics.registerTopicAndPartition(tp2);
+
+ // initialize the metrics for the host:port
+ metrics.registerClientProxy(clientName);
+
+ metrics.setOffsets(tp1, 1001);
+ metrics.setOffsets(tp2, 1002);
+ expectedValues.put(metrics.offsets().get(tp1).getName(), "1001");
+ expectedValues.put(metrics.offsets().get(tp2).getName(), "1002");
+
+ metrics.incBytesReads(tp1, 10);
+ metrics.incBytesReads(tp1, 5); // total 15
+ expectedValues.put(metrics.bytesRead().get(tp1).getName(), "15");
+
+ metrics.incReads(tp1);
+ metrics.incReads(tp1); // total 2
+ expectedValues.put(metrics.reads().get(tp1).getName(), "2");
+
+ metrics.setHighWatermarkValue(tp2, 1000);
+ metrics.setHighWatermarkValue(tp2, 1001); // final value 1001
+ expectedValues.put(metrics.highWatermark().get(tp2).getName(), "1001");
+
+ metrics.setLagValue(tp1, 200);
+ metrics.setLagValue(tp1, 201); // final value 201
+ expectedValues.put(metrics.lag().get(tp1).getName(), "201");
+
+ metrics.incClientBytesReads(clientName, 100); // broker-bytes-read
+ metrics.incClientBytesReads(clientName, 110); // total 210
+ expectedValues.put(metrics.clientBytesRead().get(clientName).getName(), "210");
+
+ metrics.incClientReads(clientName); // messages-read
+ metrics.incClientReads(clientName); // total 2
+ expectedValues.put(metrics.clientReads().get(clientName).getName(), "2");
+
+ metrics.setNumTopicPartitions(clientName, 2); // "topic-partitions"
+ metrics.setNumTopicPartitions(clientName, 3); // final value 3
+ expectedValues.put(metrics.topicPartitions().get(clientName).getName(), "3");
+
+
+ String groupName = metrics.group();
+ Assert.assertEquals(groupName, KafkaSystemConsumerMetrics.class.getName());
+ Assert.assertEquals(metrics.systemName(), systemName);
+
+ Map<String, Metric> metricMap = registry.getGroup(groupName);
+ validate(metricMap, expectedValues);
+ }
+
+ protected static void validate(Map<String, Metric> metricMap, Map<String, String> expectedValues) {
+ // match the expected value, set in the test above, and the value in the metrics
+ for(Map.Entry<String, String> e: expectedValues.entrySet()) {
+ String metricName = e.getKey();
+ String expectedValue = e.getValue();
+ // get the metric from the registry
+ String actualValue = metricMap.get(metricName).toString();
+
+ //System.out.println("name=" + metricName + " expVal=" + expectedValue + " actVal=" + actualValue);
+ Assert.assertEquals("failed for metricName=" + metricName, actualValue, expectedValue);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemAdmin.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemAdmin.scala b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemAdmin.scala
index cd511f2..1570363 100644
--- a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemAdmin.scala
+++ b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemAdmin.scala
@@ -21,22 +21,18 @@
package org.apache.samza.system.kafka
-import java.util.{Properties, UUID}
-
import kafka.admin.AdminUtils
-import org.apache.kafka.common.errors.LeaderNotAvailableException
-import org.apache.kafka.common.protocol.Errors
import kafka.consumer.{Consumer, ConsumerConfig, ConsumerConnector}
import kafka.integration.KafkaServerTestHarness
import kafka.server.KafkaConfig
-import kafka.utils.{TestUtils, ZkUtils}
+import kafka.utils.TestUtils
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.security.JaasUtils
import org.apache.samza.Partition
-import org.apache.samza.config.KafkaProducerConfig
+import org.apache.samza.config._
import org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata
import org.apache.samza.system.{StreamSpec, SystemStreamMetadata, SystemStreamPartition}
-import org.apache.samza.util.{ClientUtilTopicMetadataStore, ExponentialSleepStrategy, KafkaUtil, TopicMetadataStore}
+import org.apache.samza.util.{ClientUtilTopicMetadataStore, KafkaUtil, TopicMetadataStore}
import org.junit.Assert._
import org.junit._
@@ -53,6 +49,9 @@ object TestKafkaSystemAdmin extends KafkaServerTestHarness {
val TOTAL_PARTITIONS = 50
val REPLICATION_FACTOR = 2
val zkSecure = JaasUtils.isZkSecurityEnabled()
+ val KAFKA_CONSUMER_PROPERTY_PREFIX: String = "systems." + SYSTEM + ".consumer."
+ val KAFKA_PRODUCER_PROPERTY_PREFIX: String = "systems." + SYSTEM + ".producer."
+
protected def numBrokers: Int = 3
@@ -69,14 +68,19 @@ object TestKafkaSystemAdmin extends KafkaServerTestHarness {
@BeforeClass
override def setUp() {
super.setUp()
- val config = new java.util.HashMap[String, String]()
- config.put("bootstrap.servers", brokerList)
- config.put("acks", "all")
- config.put("serializer.class", "kafka.serializer.StringEncoder")
- producerConfig = new KafkaProducerConfig("kafka", "i001", config)
+ val map = new java.util.HashMap[String, String]()
+ map.put("bootstrap.servers", brokerList)
+ map.put(KAFKA_CONSUMER_PROPERTY_PREFIX +
+ org.apache.kafka.clients.consumer.ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
+ map.put("acks", "all")
+ map.put("serializer.class", "kafka.serializer.StringEncoder")
+
+
+ producerConfig = new KafkaProducerConfig("kafka", "i001", map)
producer = new KafkaProducer[Array[Byte], Array[Byte]](producerConfig.getProducerProperties)
metadataStore = new ClientUtilTopicMetadataStore(brokerList, "some-job-name")
- systemAdmin = new KafkaSystemAdmin(SYSTEM, brokerList, connectZk = () => ZkUtils(zkConnect, 6000, 6000, zkSecure))
+ systemAdmin = createSystemAdmin(SYSTEM, map)
+
systemAdmin.start()
}
@@ -122,7 +126,7 @@ object TestKafkaSystemAdmin extends KafkaServerTestHarness {
}
def getConsumerConnector(): ConsumerConnector = {
- val props = new Properties
+ val props = new java.util.Properties
props.put("zookeeper.connect", zkConnect)
props.put("group.id", "test")
@@ -132,18 +136,36 @@ object TestKafkaSystemAdmin extends KafkaServerTestHarness {
Consumer.create(consumerConfig)
}
- def createSystemAdmin(coordinatorStreamProperties: Properties, coordinatorStreamReplicationFactor: Int, topicMetaInformation: Map[String, ChangelogInfo]): KafkaSystemAdmin = {
- new KafkaSystemAdmin(SYSTEM, brokerList, connectZk = () => ZkUtils(zkConnect, 6000, 6000, zkSecure), coordinatorStreamProperties,
- coordinatorStreamReplicationFactor, 10000, ConsumerConfig.SocketBufferSize, UUID.randomUUID.toString, topicMetaInformation, Map(), false)
- }
+ def createSystemAdmin(system: String, map: java.util.Map[String, String]) = {
+ // required configs - boostraplist, zkconnect and jobname
+ map.put(KAFKA_CONSUMER_PROPERTY_PREFIX + org.apache.kafka.clients.consumer.ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,
+ brokerList)
+ map.put(KAFKA_PRODUCER_PROPERTY_PREFIX + org.apache.kafka.clients.consumer.ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,
+ brokerList)
+ map.put(JobConfig.JOB_NAME, "job.name")
+ map.put(KAFKA_CONSUMER_PROPERTY_PREFIX + KafkaConsumerConfig.ZOOKEEPER_CONNECT, zkConnect)
+
+ val config: Config = new MapConfig(map)
+ val res = KafkaSystemAdminUtilsScala.getIntermediateStreamProperties(config)
+
+ val clientId = KafkaConsumerConfig.createClientId("clientPrefix", config);
+ // extract kafka client configs
+ val consumerConfig = KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, system, clientId)
+
+ new KafkaSystemAdmin(
+ system,
+ config,
+ KafkaSystemConsumer.createKafkaConsumerImpl(system, consumerConfig))
+ }
}
/**
- * Test creates a local ZK and Kafka cluster, and uses it to create and test
- * topics for to verify that offset APIs in SystemAdmin work as expected.
- */
+ * Test creates a local ZK and Kafka cluster, and uses it to create and test
+ * topics for to verify that offset APIs in SystemAdmin work as expected.
+ */
class TestKafkaSystemAdmin {
+
import TestKafkaSystemAdmin._
@Test
@@ -163,7 +185,7 @@ class TestKafkaSystemAdmin {
new SystemStreamPartition(SYSTEM, "stream2", new Partition(0)) -> "u2",
new SystemStreamPartition(SYSTEM, "stream1", new Partition(1)) -> "u3",
new SystemStreamPartition(SYSTEM, "stream2", new Partition(1)) -> "u4")
- val metadata = KafkaSystemAdmin.assembleMetadata(oldestOffsets, newestOffsets, upcomingOffsets)
+ val metadata = KafkaSystemAdminUtilsScala.assembleMetadata(oldestOffsets, newestOffsets, upcomingOffsets)
assertNotNull(metadata)
assertEquals(2, metadata.size)
assertTrue(metadata.contains("stream1"))
@@ -271,7 +293,9 @@ class TestKafkaSystemAdmin {
@Test
def testShouldCreateCoordinatorStream {
val topic = "test-coordinator-stream"
- val systemAdmin = new KafkaSystemAdmin(SYSTEM, brokerList, () => ZkUtils(zkConnect, 6000, 6000, zkSecure), coordinatorStreamReplicationFactor = 3)
+ val map = new java.util.HashMap[String, String]()
+ map.put(org.apache.samza.config.KafkaConfig.JOB_COORDINATOR_REPLICATION_FACTOR, "3")
+ val systemAdmin = createSystemAdmin(SYSTEM, map)
val spec = StreamSpec.createCoordinatorStreamSpec(topic, "kafka")
systemAdmin.createStream(spec)
@@ -284,30 +308,6 @@ class TestKafkaSystemAdmin {
assertEquals(3, partitionMetadata.replicas.size)
}
- class KafkaSystemAdminWithTopicMetadataError extends KafkaSystemAdmin(SYSTEM, brokerList, () => ZkUtils(zkConnect, 6000, 6000, zkSecure)) {
- import kafka.api.TopicMetadata
- var metadataCallCount = 0
-
- // Simulate Kafka telling us that the leader for the topic is not available
- override def getTopicMetadata(topics: Set[String]) = {
- metadataCallCount += 1
- val topicMetadata = TopicMetadata(topic = "quux", partitionsMetadata = Seq(), error = Errors.LEADER_NOT_AVAILABLE)
- Map("quux" -> topicMetadata)
- }
- }
-
- @Test
- def testShouldRetryOnTopicMetadataError {
- val systemAdmin = new KafkaSystemAdminWithTopicMetadataError
- val retryBackoff = new ExponentialSleepStrategy.Mock(maxCalls = 3)
- try {
- systemAdmin.getSystemStreamMetadata(Set("quux").asJava, retryBackoff)
- fail("expected CallLimitReached to be thrown")
- } catch {
- case e: ExponentialSleepStrategy.CallLimitReached => ()
- }
- }
-
@Test
def testGetNewestOffset {
createTopic(TOPIC2, 16)
@@ -335,17 +335,4 @@ class TestKafkaSystemAdmin {
assertEquals("2", systemAdmin.getNewestOffset(sspUnderTest, 0))
assertEquals("0", systemAdmin.getNewestOffset(otherSsp, 0))
}
-
- @Test (expected = classOf[LeaderNotAvailableException])
- def testGetNewestOffsetMaxRetry {
- val expectedRetryCount = 3
- val systemAdmin = new KafkaSystemAdminWithTopicMetadataError
- try {
- systemAdmin.getNewestOffset(new SystemStreamPartition(SYSTEM, "quux", new Partition(0)), 3)
- } catch {
- case e: Exception =>
- assertEquals(expectedRetryCount + 1, systemAdmin.metadataCallCount)
- throw e
- }
- }
-}
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java
deleted file mode 100644
index 5791545..0000000
--- a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- */
-
-package org.apache.samza.system.kafka;
-
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.kafka.clients.consumer.Consumer;
-import org.apache.kafka.clients.consumer.ConsumerConfig;
-import org.apache.kafka.clients.consumer.KafkaConsumer;
-import org.apache.kafka.common.serialization.ByteArraySerializer;
-import org.apache.samza.Partition;
-import org.apache.samza.config.Config;
-import org.apache.samza.config.JobConfig;
-import org.apache.samza.config.KafkaConfig;
-import org.apache.samza.config.KafkaConsumerConfig;
-import org.apache.samza.config.MapConfig;
-import org.apache.samza.system.IncomingMessageEnvelope;
-import org.apache.samza.system.SystemStreamPartition;
-import org.apache.samza.util.Clock;
-import org.apache.samza.util.NoOpMetricsRegistry;
-import org.junit.Assert;
-import org.junit.Test;
-
-import static org.junit.Assert.*;
-
-
-public class TestKafkaSystemConsumer {
- public final String TEST_SYSTEM = "test-system";
- public final String TEST_STREAM = "test-stream";
- public final String TEST_CLIENT_ID = "testClientId";
- public final String BOOTSTRAP_SERVER = "127.0.0.1:8888";
- public final String FETCH_THRESHOLD_MSGS = "50000";
- public final String FETCH_THRESHOLD_BYTES = "100000";
-
- private KafkaSystemConsumer createConsumer(String fetchMsg, String fetchBytes) {
- final Map<String, String> map = new HashMap<>();
-
- map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD(), TEST_SYSTEM), fetchMsg);
- map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD_BYTES(), TEST_SYSTEM), fetchBytes);
- map.put(String.format("systems.%s.consumer.%s", TEST_SYSTEM, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG),
- BOOTSTRAP_SERVER);
- map.put(JobConfig.JOB_NAME(), "jobName");
-
- Config config = new MapConfig(map);
- KafkaConsumerConfig consumerConfig =
- KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, TEST_SYSTEM, TEST_CLIENT_ID);
- final KafkaConsumer<byte[], byte[]> kafkaConsumer = new MockKafkaConsumer(consumerConfig);
-
- MockKafkaSystemConsumer newKafkaSystemConsumer =
- new MockKafkaSystemConsumer(kafkaConsumer, TEST_SYSTEM, config, TEST_CLIENT_ID,
- new KafkaSystemConsumerMetrics(TEST_SYSTEM, new NoOpMetricsRegistry()), System::currentTimeMillis);
-
- return newKafkaSystemConsumer;
- }
-
- @Test
- public void testConfigValidations() {
-
- final KafkaSystemConsumer consumer = createConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
-
- consumer.start();
- // should be no failures
- }
-
- @Test
- public void testFetchThresholdShouldDivideEvenlyAmongPartitions() {
- final KafkaSystemConsumer consumer = createConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
- final int partitionsNum = 50;
- for (int i = 0; i < partitionsNum; i++) {
- consumer.register(new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(i)), "0");
- }
-
- consumer.start();
-
- Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum, consumer.perPartitionFetchThreshold);
- Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_BYTES) / 2 / partitionsNum,
- consumer.perPartitionFetchThresholdBytes);
-
- consumer.stop();
- }
-
- @Test
- public void testConsumerRegisterOlderOffsetOfTheSamzaSSP() {
-
- KafkaSystemConsumer consumer = createConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
-
- SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
- SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
- SystemStreamPartition ssp2 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(2));
-
- consumer.register(ssp0, "0");
- consumer.register(ssp0, "5");
- consumer.register(ssp1, "2");
- consumer.register(ssp1, "3");
- consumer.register(ssp2, "0");
-
- assertEquals("0", consumer.topicPartitionsToOffset.get(KafkaSystemConsumer.toTopicPartition(ssp0)));
- assertEquals("2", consumer.topicPartitionsToOffset.get(KafkaSystemConsumer.toTopicPartition(ssp1)));
- assertEquals("0", consumer.topicPartitionsToOffset.get(KafkaSystemConsumer.toTopicPartition(ssp2)));
- }
-
- @Test
- public void testFetchThresholdBytes() {
-
- SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
- SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
- int partitionsNum = 2;
- int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size
- int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 1; // fake size
- int ime11Size = 20;
- ByteArraySerializer bytesSerde = new ByteArraySerializer();
- IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
- bytesSerde.serialize("", "value0".getBytes()), ime0Size);
- IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
- bytesSerde.serialize("", "value1".getBytes()), ime1Size);
- IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
- bytesSerde.serialize("", "value11".getBytes()), ime11Size);
- KafkaSystemConsumer consumer = createConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
-
- consumer.register(ssp0, "0");
- consumer.register(ssp1, "0");
- consumer.start();
- consumer.messageSink.addMessage(ssp0, ime0);
- // queue for ssp0 should be full now, because we added message of size FETCH_THRESHOLD_MSGS/partitionsNum
- Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp0));
- consumer.messageSink.addMessage(ssp1, ime1);
- // queue for ssp1 should be less then full now, because we added message of size (FETCH_THRESHOLD_MSGS/partitionsNum - 1)
- Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
- consumer.messageSink.addMessage(ssp1, ime11);
- // queue for ssp1 should full now, because we added message of size 20 on top
- Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
-
- Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
- Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
- Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
- Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
-
- consumer.stop();
- }
-
- @Test
- public void testFetchThresholdBytesDiabled() {
- // Pass 0 as fetchThresholdByBytes, which disables checking for limit by size
-
- SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
- SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
- int partitionsNum = 2;
- int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size, upto the limit
- int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 100; // fake size, below the limit
- int ime11Size = 20;// event with the second message still below the size limit
- ByteArraySerializer bytesSerde = new ByteArraySerializer();
- IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
- bytesSerde.serialize("", "value0".getBytes()), ime0Size);
- IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
- bytesSerde.serialize("", "value1".getBytes()), ime1Size);
- IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
- bytesSerde.serialize("", "value11".getBytes()), ime11Size);
-
- // limit by number of messages 4/2 = 2 per partition
- // limit by number of bytes - disabled
- KafkaSystemConsumer consumer = createConsumer("4", "0"); // should disable
-
- consumer.register(ssp0, "0");
- consumer.register(ssp1, "0");
- consumer.start();
- consumer.messageSink.addMessage(ssp0, ime0);
- // should be full by size, but not full by number of messages (1 of 2)
- Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp0));
- consumer.messageSink.addMessage(ssp1, ime1);
- // not full neither by size nor by messages
- Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
- consumer.messageSink.addMessage(ssp1, ime11);
- // not full by size, but should be full by messages
- Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
-
- Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
- Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
- Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
- Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
-
- consumer.stop();
- }
-
- // mock kafkaConsumer and SystemConsumer
- static class MockKafkaConsumer extends KafkaConsumer {
- public MockKafkaConsumer(Map<String, Object> configs) {
- super(configs);
- }
- }
-
- static class MockKafkaSystemConsumer extends KafkaSystemConsumer {
- public MockKafkaSystemConsumer(Consumer kafkaConsumer, String systemName, Config config, String clientId,
- KafkaSystemConsumerMetrics metrics, Clock clock) {
- super(kafkaConsumer, systemName, config, clientId, metrics, clock);
- }
-
- @Override
- void startConsumer() {
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-test/src/test/java/org/apache/samza/test/operator/TestRepartitionJoinWindowApp.java
----------------------------------------------------------------------
diff --git a/samza-test/src/test/java/org/apache/samza/test/operator/TestRepartitionJoinWindowApp.java b/samza-test/src/test/java/org/apache/samza/test/operator/TestRepartitionJoinWindowApp.java
index 340f0e7..fc5e75a 100644
--- a/samza-test/src/test/java/org/apache/samza/test/operator/TestRepartitionJoinWindowApp.java
+++ b/samza-test/src/test/java/org/apache/samza/test/operator/TestRepartitionJoinWindowApp.java
@@ -73,7 +73,7 @@ public class TestRepartitionJoinWindowApp extends StreamApplicationIntegrationTe
String inputTopicName2 = "ad-clicks";
String outputTopicName = "user-ad-click-counts";
- KafkaSystemAdmin.deleteMessagesCalled_$eq(false);
+ KafkaSystemAdmin.deleteMessageCalled = false;
initializeTopics(inputTopicName1, inputTopicName2, outputTopicName);
@@ -95,7 +95,7 @@ public class TestRepartitionJoinWindowApp extends StreamApplicationIntegrationTe
List<ConsumerRecord<String, String>> messages = consumeMessages(Collections.singletonList(outputTopicName), 2);
assertEquals(2, messages.size());
- Assert.assertFalse(KafkaSystemAdmin.deleteMessagesCalled());
+ Assert.assertFalse(KafkaSystemAdmin.deleteMessageCalled);
}
@Test
@@ -133,18 +133,20 @@ public class TestRepartitionJoinWindowApp extends StreamApplicationIntegrationTe
// Verify that messages in the intermediate stream will be deleted in 10 seconds
long startTimeMs = System.currentTimeMillis();
- for (String streamId: app.getIntermediateStreamIds()) {
+ for (String streamId : app.getIntermediateStreamIds()) {
long remainingMessageNum = -1;
while (remainingMessageNum != 0 && System.currentTimeMillis() - startTimeMs < 10000) {
remainingMessageNum = 0;
- SystemStreamMetadata metadatas = systemAdmin.getSystemStreamMetadata(
- new HashSet<>(Arrays.asList(streamId)), new ExponentialSleepStrategy.Mock(3)
- ).get(streamId).get();
+ SystemStreamMetadata metadatas =
+ (SystemStreamMetadata) systemAdmin.getSystemStreamMetadata(new HashSet<>(Arrays.asList(streamId)),
+ new ExponentialSleepStrategy.Mock(3)).get(streamId);
- for (Map.Entry<Partition, SystemStreamPartitionMetadata> entry : metadatas.getSystemStreamPartitionMetadata().entrySet()) {
+ for (Map.Entry<Partition, SystemStreamPartitionMetadata> entry : metadatas.getSystemStreamPartitionMetadata()
+ .entrySet()) {
SystemStreamPartitionMetadata metadata = entry.getValue();
- remainingMessageNum += Long.parseLong(metadata.getUpcomingOffset()) - Long.parseLong(metadata.getOldestOffset());
+ remainingMessageNum +=
+ Long.parseLong(metadata.getUpcomingOffset()) - Long.parseLong(metadata.getOldestOffset());
}
}
assertEquals(0, remainingMessageNum);
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-test/src/test/scala/org/apache/samza/test/harness/AbstractIntegrationTestHarness.scala
----------------------------------------------------------------------
diff --git a/samza-test/src/test/scala/org/apache/samza/test/harness/AbstractIntegrationTestHarness.scala b/samza-test/src/test/scala/org/apache/samza/test/harness/AbstractIntegrationTestHarness.scala
index bc00305..2f18875 100644
--- a/samza-test/src/test/scala/org/apache/samza/test/harness/AbstractIntegrationTestHarness.scala
+++ b/samza-test/src/test/scala/org/apache/samza/test/harness/AbstractIntegrationTestHarness.scala
@@ -17,36 +17,39 @@
* under the License.
*/
package org.apache.samza.test.harness
+
import java.util.Properties
import kafka.server.KafkaConfig
-import kafka.utils.{TestUtils, ZkUtils}
-import org.apache.kafka.common.security.JaasUtils
-import org.apache.samza.system.kafka.KafkaSystemAdmin
+import kafka.utils.TestUtils
+import org.apache.samza.config.{JobConfig, KafkaConsumerConfig, MapConfig}
+import org.apache.samza.system.kafka.{KafkaSystemAdmin, KafkaSystemConsumer}
/**
- * LinkedIn integration test harness for Kafka
- * This is simply a copy of open source code. We do this because java does not support trait and we are making it an
- * abstract class so that user's java test class can extend it.
- */
+ * Integration test harness for Kafka
+ * We do this because java does not support trait and we are making it an
+ * abstract class so that user's java test class can extend it.
+ */
abstract class AbstractIntegrationTestHarness extends AbstractKafkaServerTestHarness {
def generateConfigs() =
TestUtils.createBrokerConfigs(clusterSize(), zkConnect, enableControlledShutdown = false).map(KafkaConfig.fromProps(_, overridingProps()))
/**
- * User can override this method to return the number of brokers they want.
- * By default only one broker will be launched.
- * @return the number of brokers needed in the Kafka cluster for the test.
- */
+ * User can override this method to return the number of brokers they want.
+ * By default only one broker will be launched.
+ *
+ * @return the number of brokers needed in the Kafka cluster for the test.
+ */
def clusterSize(): Int = 1
/**
- * User can override this method to apply customized configurations to the brokers.
- * By default the only configuration is number of partitions when topics get automatically created. The default value
- * is 1.
- * @return The configurations to be used by brokers.
- */
+ * User can override this method to apply customized configurations to the brokers.
+ * By default the only configuration is number of partitions when topics get automatically created. The default value
+ * is 1.
+ *
+ * @return The configurations to be used by brokers.
+ */
def overridingProps(): Properties = {
val props = new Properties()
props.setProperty(KafkaConfig.NumPartitionsProp, 1.toString)
@@ -54,13 +57,30 @@ abstract class AbstractIntegrationTestHarness extends AbstractKafkaServerTestHar
}
/**
- * Returns the bootstrap servers configuration string to be used by clients.
- * @return bootstrap servers string.
- */
+ * Returns the bootstrap servers configuration string to be used by clients.
+ *
+ * @return bootstrap servers string.
+ */
def bootstrapServers(): String = super.bootstrapUrl
def createSystemAdmin(system: String): KafkaSystemAdmin = {
- new KafkaSystemAdmin(system, bootstrapServers, connectZk = () => ZkUtils(zkConnect, zkSessionTimeout, zkConnectionTimeout, JaasUtils.isZkSecurityEnabled))
+
+ val map: java.util.Map[String, String] = new java.util.HashMap();
+
+ val KAFKA_CONSUMER_PROPERTY_PREFIX: String = "systems." + system + ".consumer."
+ val KAFKA_PRODUCER_PROPERTY_PREFIX: String = "systems." + system + ".consumer."
+
+ map.put(KAFKA_CONSUMER_PROPERTY_PREFIX +
+ org.apache.kafka.clients.consumer.ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
+ map.put(JobConfig.JOB_NAME, "test.job")
+
+ map.put(KAFKA_CONSUMER_PROPERTY_PREFIX +
+ KafkaConsumerConfig.ZOOKEEPER_CONNECT, zkConnect)
+
+ val config = new MapConfig(map)
+ val consumerConfig = KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, system, KafkaConsumerConfig.createClientId("kafka-admin-consumer", config))
+
+ new KafkaSystemAdmin(system, new MapConfig(map), KafkaSystemConsumer.createKafkaConsumerImpl(system, consumerConfig));
}
}
\ No newline at end of file
[3/3] samza git commit: SAMZA-1868: Create new SamzaAmdmin for Kafka
Posted by bo...@apache.org.
SAMZA-1868: Create new SamzaAmdmin for Kafka
This Request is a copy of #647(got garbled). This PR already addresses all the comments brought up in the other request.
Author: Boris S <bs...@linkedin.com>
Author: Boris S <bo...@apache.org>
Author: Boris Shkolnik <bs...@linkedin.com>
Reviewers: Shanthoosh Venkatraman <sv...@linkedin.com>, Prateek Maheshwari <pm...@apache.org>
Closes #662 from sborya/NewConsumerAdmin2
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/63d33fa0
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/63d33fa0
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/63d33fa0
Branch: refs/heads/master
Commit: 63d33fa0617488d25a0d3fb061423271392d20f6
Parents: 3c78e06
Author: Boris S <bs...@linkedin.com>
Authored: Thu Oct 11 14:26:51 2018 -0700
Committer: Boris S <bs...@linkedin.com>
Committed: Thu Oct 11 14:26:51 2018 -0700
----------------------------------------------------------------------
.../samza/application/ApplicationUtil.java | 1 -
.../org/apache/samza/config/SystemConfig.scala | 2 +-
.../samza/config/KafkaConsumerConfig.java | 194 ++++++
.../samza/system/kafka/KafkaSystemAdmin.java | 665 +++++++++++++++++++
.../samza/system/kafka/KafkaSystemConsumer.java | 366 ++++++++++
.../org/apache/samza/config/KafkaConfig.scala | 5 +
.../samza/config/KafkaConsumerConfig.java | 201 ------
.../samza/system/kafka/KafkaConsumerProxy.java | 2 +
.../samza/system/kafka/KafkaSystemAdmin.scala | 608 -----------------
.../kafka/KafkaSystemAdminUtilsScala.scala | 192 ++++++
.../samza/system/kafka/KafkaSystemConsumer.java | 371 -----------
.../samza/system/kafka/KafkaSystemFactory.scala | 63 +-
.../scala/org/apache/samza/util/KafkaUtil.scala | 11 -
.../samza/config/TestKafkaConsumerConfig.java | 60 +-
.../system/kafka/TestKafkaSystemAdminJava.java | 185 ++++--
.../kafka/TestKafkaSystemAdminWithMock.java | 317 +++++++++
.../system/kafka/TestKafkaSystemConsumer.java | 225 +++++++
.../kafka/TestKafkaSystemConsumerMetrics.java | 109 +++
.../system/kafka/TestKafkaSystemAdmin.scala | 109 ++-
.../system/kafka/TestKafkaSystemConsumer.java | 220 ------
.../operator/TestRepartitionJoinWindowApp.java | 18 +-
.../AbstractIntegrationTestHarness.scala | 60 +-
22 files changed, 2366 insertions(+), 1618 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-core/src/main/java/org/apache/samza/application/ApplicationUtil.java
----------------------------------------------------------------------
diff --git a/samza-core/src/main/java/org/apache/samza/application/ApplicationUtil.java b/samza-core/src/main/java/org/apache/samza/application/ApplicationUtil.java
index b39ad3c..f779619 100644
--- a/samza-core/src/main/java/org/apache/samza/application/ApplicationUtil.java
+++ b/samza-core/src/main/java/org/apache/samza/application/ApplicationUtil.java
@@ -59,5 +59,4 @@ public class ApplicationUtil {
}
return new LegacyTaskApplication(taskClassOption.get());
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-core/src/main/scala/org/apache/samza/config/SystemConfig.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/config/SystemConfig.scala b/samza-core/src/main/scala/org/apache/samza/config/SystemConfig.scala
index bebdbd8..00e65a7 100644
--- a/samza-core/src/main/scala/org/apache/samza/config/SystemConfig.scala
+++ b/samza-core/src/main/scala/org/apache/samza/config/SystemConfig.scala
@@ -50,7 +50,7 @@ class SystemConfig(config: Config) extends ScalaMapConfig(config) with Logging {
def getDefaultSystemOffset(systemName: String) = getOption(SystemConfig.CONSUMER_OFFSET_DEFAULT format (systemName))
- def deleteCommittedMessages(systemName: String) = getOption(SystemConfig.DELETE_COMMITTED_MESSAGES format (systemName))
+ def deleteCommittedMessages(systemName: String) = getBoolean(SystemConfig.DELETE_COMMITTED_MESSAGES format (systemName), false)
/**
* Returns a list of all system names from the config file. Useful for
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/main/java/org/apache/samza/config/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/java/org/apache/samza/config/KafkaConsumerConfig.java b/samza-kafka/src/main/java/org/apache/samza/config/KafkaConsumerConfig.java
new file mode 100644
index 0000000..ad17e82
--- /dev/null
+++ b/samza-kafka/src/main/java/org/apache/samza/config/KafkaConsumerConfig.java
@@ -0,0 +1,194 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.samza.config;
+
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.RangeAssignor;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.apache.samza.SamzaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Option;
+
+
+/**
+ * The configuration class for KafkaConsumer
+ */
+public class KafkaConsumerConfig extends HashMap<String, Object> {
+
+ public static final Logger LOG = LoggerFactory.getLogger(KafkaConsumerConfig.class);
+
+ public static final String ZOOKEEPER_CONNECT = "zookeeper.connect";
+
+ private final String systemName;
+ /*
+ * By default, KafkaConsumer will fetch some big number of available messages for all the partitions.
+ * This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
+ */
+ static final String DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS = "100";
+
+ private KafkaConsumerConfig(Map<String, Object> props, String systemName) {
+ super(props);
+ this.systemName = systemName;
+ }
+
+ /**
+ * Create kafka consumer configs, based on the subset of global configs.
+ * @param config application config
+ * @param systemName system name
+ * @param clientId client id provided by the caller
+ * @return KafkaConsumerConfig
+ */
+ public static KafkaConsumerConfig getKafkaSystemConsumerConfig(Config config, String systemName, String clientId) {
+
+ Config subConf = config.subset(String.format("systems.%s.consumer.", systemName), true);
+
+ final String groupId = createConsumerGroupId(config);
+
+ Map<String, Object> consumerProps = new HashMap<>();
+ consumerProps.putAll(subConf);
+
+ consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
+ consumerProps.put(ConsumerConfig.CLIENT_ID_CONFIG, clientId);
+
+ // These are values we enforce in sazma, and they cannot be overwritten.
+
+ // Disable consumer auto-commit because Samza controls commits
+ consumerProps.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
+
+ // Translate samza config value to kafka config value
+ consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
+ getAutoOffsetResetValue((String) consumerProps.get(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG)));
+
+ // if consumer bootstrap servers are not configured, get them from the producer configs
+ if (!subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
+ String bootstrapServers =
+ config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
+ if (StringUtils.isEmpty(bootstrapServers)) {
+ throw new SamzaException("Missing " + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + " config for " + systemName);
+ }
+ consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
+ }
+
+ // Always use default partition assignment strategy. Do not allow override.
+ consumerProps.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG, RangeAssignor.class.getName());
+
+ // the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
+ // default to byte[]
+ if (!consumerProps.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
+ LOG.info("setting key serialization for the consumer(for system {}) to ByteArrayDeserializer", systemName);
+ consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ }
+ if (!consumerProps.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
+ LOG.info("setting value serialization for the consumer(for system {}) to ByteArrayDeserializer", systemName);
+ consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ }
+
+ // Override default max poll config if there is no value
+ consumerProps.computeIfAbsent(ConsumerConfig.MAX_POLL_RECORDS_CONFIG,
+ (k) -> DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS);
+
+ return new KafkaConsumerConfig(consumerProps, systemName);
+ }
+
+ public String getClientId() {
+ String clientId = (String) get(ConsumerConfig.CLIENT_ID_CONFIG);
+ if (StringUtils.isBlank(clientId)) {
+ throw new SamzaException("client Id is not set for consumer for system=" + systemName);
+ }
+ return clientId;
+ }
+
+ // group id should be unique per job
+ static String createConsumerGroupId(Config config) {
+ Pair<String, String> jobNameId = getJobNameAndId(config);
+
+ return String.format("%s-%s", jobNameId.getLeft(), jobNameId.getRight());
+ }
+
+ // client id should be unique per job
+ public static String createClientId(String prefix, Config config) {
+
+ Pair<String, String> jobNameId = getJobNameAndId(config);
+ String jobName = jobNameId.getLeft();
+ String jobId = jobNameId.getRight();
+ return String.format("%s-%s-%s", prefix.replaceAll("\\W", "_"), jobName.replaceAll("\\W", "_"),
+ jobId.replaceAll("\\W", "_"));
+ }
+
+ public static Pair<String, String> getJobNameAndId(Config config) {
+ JobConfig jobConfig = new JobConfig(config);
+ Option jobNameOption = jobConfig.getName();
+ if (jobNameOption.isEmpty()) {
+ throw new ConfigException("Missing job name");
+ }
+ String jobName = (String) jobNameOption.get();
+ return new ImmutablePair<>(jobName, jobConfig.getJobId());
+ }
+
+ /**
+ * If settings for auto.reset in samza are different from settings in Kafka (auto.offset.reset),
+ * then need to convert them (see kafka.apache.org/documentation):
+ * "largest" -> "latest"
+ * "smallest" -> "earliest"
+ *
+ * If no setting specified we return "latest" (same as Kafka).
+ * @param autoOffsetReset value from the app provided config
+ * @return String representing the config value for "auto.offset.reset" property
+ */
+ static String getAutoOffsetResetValue(final String autoOffsetReset) {
+ final String SAMZA_OFFSET_LARGEST = "largest";
+ final String SAMZA_OFFSET_SMALLEST = "smallest";
+ final String KAFKA_OFFSET_LATEST = "latest";
+ final String KAFKA_OFFSET_EARLIEST = "earliest";
+ final String KAFKA_OFFSET_NONE = "none";
+
+ if (autoOffsetReset == null) {
+ return KAFKA_OFFSET_LATEST; // return default
+ }
+
+ // accept kafka values directly
+ if (autoOffsetReset.equals(KAFKA_OFFSET_EARLIEST) || autoOffsetReset.equals(KAFKA_OFFSET_LATEST)
+ || autoOffsetReset.equals(KAFKA_OFFSET_NONE)) {
+ return autoOffsetReset;
+ }
+
+ String newAutoOffsetReset;
+ switch (autoOffsetReset) {
+ case SAMZA_OFFSET_LARGEST:
+ newAutoOffsetReset = KAFKA_OFFSET_LATEST;
+ break;
+ case SAMZA_OFFSET_SMALLEST:
+ newAutoOffsetReset = KAFKA_OFFSET_EARLIEST;
+ break;
+ default:
+ newAutoOffsetReset = KAFKA_OFFSET_LATEST;
+ }
+ LOG.info("AutoOffsetReset value converted from {} to {}", autoOffsetReset, newAutoOffsetReset);
+ return newAutoOffsetReset;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/main/java/org/apache/samza/system/kafka/KafkaSystemAdmin.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/java/org/apache/samza/system/kafka/KafkaSystemAdmin.java b/samza-kafka/src/main/java/org/apache/samza/system/kafka/KafkaSystemAdmin.java
new file mode 100644
index 0000000..cb2db10
--- /dev/null
+++ b/samza-kafka/src/main/java/org/apache/samza/system/kafka/KafkaSystemAdmin.java
@@ -0,0 +1,665 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.samza.system.kafka;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableSet;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+import kafka.admin.AdminClient;
+import kafka.utils.ZkUtils;
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.PartitionInfo;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.samza.Partition;
+import org.apache.samza.SamzaException;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.config.SystemConfig;
+import org.apache.samza.system.ExtendedSystemAdmin;
+import org.apache.samza.system.StreamSpec;
+import org.apache.samza.system.StreamValidationException;
+import org.apache.samza.system.SystemStreamMetadata;
+import org.apache.samza.system.SystemStreamPartition;
+import org.apache.samza.util.ExponentialSleepStrategy;
+import org.apache.samza.util.ScalaJavaUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Function0;
+import scala.Function1;
+import scala.Function2;
+import scala.collection.JavaConverters;
+import scala.runtime.AbstractFunction0;
+import scala.runtime.AbstractFunction1;
+import scala.runtime.AbstractFunction2;
+import scala.runtime.BoxedUnit;
+
+import static org.apache.samza.config.KafkaConsumerConfig.*;
+
+
+public class KafkaSystemAdmin implements ExtendedSystemAdmin {
+ private static final Logger LOG = LoggerFactory.getLogger(KafkaSystemAdmin.class);
+
+ // Default exponential sleep strategy values
+ protected static final double DEFAULT_EXPONENTIAL_SLEEP_BACK_OFF_MULTIPLIER = 2.0;
+ protected static final long DEFAULT_EXPONENTIAL_SLEEP_INITIAL_DELAY_MS = 500;
+ protected static final long DEFAULT_EXPONENTIAL_SLEEP_MAX_DELAY_MS = 10000;
+ protected static final int MAX_RETRIES_ON_EXCEPTION = 5;
+ protected static final int DEFAULT_REPL_FACTOR = 2;
+
+ // used in TestRepartitionJoinWindowApp TODO - remove SAMZA-1945
+ @VisibleForTesting
+ public static volatile boolean deleteMessageCalled = false;
+
+ protected final String systemName;
+ protected final Consumer metadataConsumer;
+
+ // get ZkUtils object to connect to Kafka's ZK.
+ private final Supplier<ZkUtils> getZkConnection;
+
+ // Custom properties to create a new coordinator stream.
+ private final Properties coordinatorStreamProperties;
+
+ // Replication factor for a new coordinator stream.
+ private final int coordinatorStreamReplicationFactor;
+
+ // Replication factor and kafka properties for changelog topic creation
+ private final Map<String, ChangelogInfo> changelogTopicMetaInformation;
+
+ // Kafka properties for intermediate topics creation
+ private final Map<String, Properties> intermediateStreamProperties;
+
+ // adminClient is required for deleteCommittedMessages operation
+ private final AdminClient adminClient;
+
+ // used for intermediate streams
+ private final boolean deleteCommittedMessages;
+
+ private final AtomicBoolean stopped = new AtomicBoolean(false);
+
+ public KafkaSystemAdmin(String systemName, Config config, Consumer metadataConsumer) {
+ this.systemName = systemName;
+
+ if (metadataConsumer == null) {
+ throw new SamzaException(
+ "Cannot construct KafkaSystemAdmin for system " + systemName + " with null metadataConsumer");
+ }
+ this.metadataConsumer = metadataConsumer;
+
+ // populate brokerList from either consumer or producer configs
+ Properties props = new Properties();
+ String brokerList = config.get(
+ String.format(KafkaConfig.CONSUMER_CONFIGS_CONFIG_KEY(), systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
+ if (brokerList == null) {
+ brokerList = config.get(String.format(KafkaConfig.PRODUCER_CONFIGS_CONFIG_KEY(), systemName,
+ ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
+ }
+ if (brokerList == null) {
+ throw new SamzaException(
+ ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + " is required for systemAdmin for system " + systemName);
+ }
+ props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
+
+ // kafka.admin.AdminUtils requires zkConnect
+ // this will change after we move to the new org.apache..AdminClient
+ String zkConnect =
+ config.get(String.format(KafkaConfig.CONSUMER_CONFIGS_CONFIG_KEY(), systemName, ZOOKEEPER_CONNECT));
+ if (StringUtils.isBlank(zkConnect)) {
+ throw new SamzaException("Missing zookeeper.connect config for admin for system " + systemName);
+ }
+ props.put(ZOOKEEPER_CONNECT, zkConnect);
+
+ adminClient = AdminClient.create(props);
+
+ getZkConnection = () -> {
+ return ZkUtils.apply(zkConnect, 6000, 6000, false);
+ };
+
+ KafkaConfig kafkaConfig = new KafkaConfig(config);
+ coordinatorStreamReplicationFactor = Integer.valueOf(kafkaConfig.getCoordinatorReplicationFactor());
+ coordinatorStreamProperties = KafkaSystemAdminUtilsScala.getCoordinatorTopicProperties(kafkaConfig);
+
+ Map<String, String> storeToChangelog =
+ JavaConverters.mapAsJavaMapConverter(kafkaConfig.getKafkaChangelogEnabledStores()).asJava();
+ // Construct the meta information for each topic, if the replication factor is not defined,
+ // we use 2 (DEFAULT_REPL_FACTOR) as the number of replicas for the change log stream.
+ changelogTopicMetaInformation = new HashMap<>();
+ for (Map.Entry<String, String> e : storeToChangelog.entrySet()) {
+ String storeName = e.getKey();
+ String topicName = e.getValue();
+ String replicationFactorStr = kafkaConfig.getChangelogStreamReplicationFactor(storeName);
+ int replicationFactor =
+ StringUtils.isEmpty(replicationFactorStr) ? DEFAULT_REPL_FACTOR : Integer.valueOf(replicationFactorStr);
+ ChangelogInfo changelogInfo =
+ new ChangelogInfo(replicationFactor, kafkaConfig.getChangelogKafkaProperties(storeName));
+ LOG.info(String.format("Creating topic meta information for topic: %s with replication factor: %s", topicName,
+ replicationFactor));
+ changelogTopicMetaInformation.put(topicName, changelogInfo);
+ }
+
+ // special flag to allow/enforce deleting of committed messages
+ SystemConfig systemConfig = new SystemConfig(config);
+ this.deleteCommittedMessages = systemConfig.deleteCommittedMessages(systemName);
+
+ intermediateStreamProperties =
+ JavaConverters.mapAsJavaMapConverter(KafkaSystemAdminUtilsScala.getIntermediateStreamProperties(config))
+ .asJava();
+
+ LOG.info(String.format("Created KafkaSystemAdmin for system %s", systemName));
+ }
+
+ @Override
+ public void start() {
+ // Plese note. There is slight inconsistency in the use of this class.
+ // Some of the functionality of this class may actually be used BEFORE start() is called.
+ // The SamzaContainer gets metadata (using this class) in SamzaContainer.apply,
+ // but this "start" actually gets called in SamzaContainer.run.
+ // review this usage (SAMZA-1888)
+
+ // Throw exception if start is called after stop
+ if (stopped.get()) {
+ throw new IllegalStateException("SamzaKafkaAdmin.start() is called after stop()");
+ }
+ }
+
+ @Override
+ public void stop() {
+ if (stopped.compareAndSet(false, true)) {
+ try {
+ metadataConsumer.close();
+ } catch (Exception e) {
+ LOG.warn("metadataConsumer.close for system " + systemName + " failed with exception.", e);
+ }
+ try {
+ adminClient.close();
+ } catch (Exception e) {
+ LOG.warn("adminClient.close for system " + systemName + " failed with exception.", e);
+ }
+ }
+ }
+
+ /**
+ * Note! This method does not populate SystemStreamMetadata for each stream with real data.
+ * Thus, this method should ONLY be used to get number of partitions for each stream.
+ * It will throw NotImplementedException if anyone tries to access the actual metadata.
+ * @param streamNames set of streams for which get the partitions counts
+ * @param cacheTTL cache TTL if caching the data
+ * @return a map, keyed on stream names. Number of partitions in SystemStreamMetadata is the output of this method.
+ */
+ @Override
+ public Map<String, SystemStreamMetadata> getSystemStreamPartitionCounts(Set<String> streamNames, long cacheTTL) {
+ // This optimization omits actual metadata for performance. Instead, we inject a dummy for all partitions.
+ final SystemStreamMetadata.SystemStreamPartitionMetadata dummySspm =
+ new SystemStreamMetadata.SystemStreamPartitionMetadata(null, null, null) {
+ String msg =
+ "getSystemStreamPartitionCounts does not populate SystemStreaMetadata info. Only number of partitions";
+
+ @Override
+ public String getOldestOffset() {
+ throw new NotImplementedException(msg);
+ }
+
+ @Override
+ public String getNewestOffset() {
+ throw new NotImplementedException(msg);
+ }
+
+ @Override
+ public String getUpcomingOffset() {
+ throw new NotImplementedException(msg);
+ }
+ };
+
+ ExponentialSleepStrategy strategy = new ExponentialSleepStrategy(DEFAULT_EXPONENTIAL_SLEEP_BACK_OFF_MULTIPLIER,
+ DEFAULT_EXPONENTIAL_SLEEP_INITIAL_DELAY_MS, DEFAULT_EXPONENTIAL_SLEEP_MAX_DELAY_MS);
+
+ Function1<ExponentialSleepStrategy.RetryLoop, Map<String, SystemStreamMetadata>> fetchMetadataOperation =
+ new AbstractFunction1<ExponentialSleepStrategy.RetryLoop, Map<String, SystemStreamMetadata>>() {
+ @Override
+ public Map<String, SystemStreamMetadata> apply(ExponentialSleepStrategy.RetryLoop loop) {
+ Map<String, SystemStreamMetadata> allMetadata = new HashMap<>();
+
+ streamNames.forEach(streamName -> {
+ Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> partitionMetadata = new HashMap<>();
+
+ List<PartitionInfo> partitionInfos = metadataConsumer.partitionsFor(streamName);
+ LOG.debug("Stream {} has partitions {}", streamName, partitionInfos);
+
+ partitionInfos.forEach(partitionInfo -> {
+ partitionMetadata.put(new Partition(partitionInfo.partition()), dummySspm);
+ });
+
+ allMetadata.put(streamName, new SystemStreamMetadata(streamName, partitionMetadata));
+ });
+
+ loop.done();
+ return allMetadata;
+ }
+ };
+
+ Map<String, SystemStreamMetadata> result = strategy.run(fetchMetadataOperation,
+ new AbstractFunction2<Exception, ExponentialSleepStrategy.RetryLoop, BoxedUnit>() {
+ @Override
+ public BoxedUnit apply(Exception exception, ExponentialSleepStrategy.RetryLoop loop) {
+ if (loop.sleepCount() < MAX_RETRIES_ON_EXCEPTION) {
+ LOG.warn(String.format("Fetching systemstreampartition counts for: %s threw an exception. Retrying.",
+ streamNames), exception);
+ } else {
+ LOG.error(String.format("Fetching systemstreampartition counts for: %s threw an exception.", streamNames),
+ exception);
+ loop.done();
+ throw new SamzaException(exception);
+ }
+ return null;
+ }
+ }).get();
+
+ LOG.info("SystemStream partition counts for system {}: {}", systemName, result);
+ return result;
+ }
+
+ @Override
+ public Map<SystemStreamPartition, String> getOffsetsAfter(Map<SystemStreamPartition, String> offsets) {
+ // This is safe to do with Kafka, even if a topic is key-deduped. If the
+ // offset doesn't exist on a compacted topic, Kafka will return the first
+ // message AFTER the offset that was specified in the fetch request.
+ return offsets.entrySet()
+ .stream()
+ .collect(Collectors.toMap(Map.Entry::getKey, (entry) -> String.valueOf(Long.valueOf(entry.getValue()) + 1)));
+ }
+
+ @Override
+ public Map<String, SystemStreamMetadata> getSystemStreamMetadata(Set<String> streamNames) {
+ return getSystemStreamMetadata(streamNames,
+ new ExponentialSleepStrategy(DEFAULT_EXPONENTIAL_SLEEP_BACK_OFF_MULTIPLIER,
+ DEFAULT_EXPONENTIAL_SLEEP_INITIAL_DELAY_MS, DEFAULT_EXPONENTIAL_SLEEP_MAX_DELAY_MS));
+ }
+
+ @Override
+ public Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> getSSPMetadata(
+ Set<SystemStreamPartition> ssps) {
+
+ LOG.info("Fetching SSP metadata for: {}", ssps);
+ List<TopicPartition> topicPartitions = ssps.stream()
+ .map(ssp -> new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId()))
+ .collect(Collectors.toList());
+
+ OffsetsMaps topicPartitionsMetadata = fetchTopicPartitionsMetadata(topicPartitions);
+
+ Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> sspToSSPMetadata = new HashMap<>();
+ for (SystemStreamPartition ssp : ssps) {
+ String oldestOffset = topicPartitionsMetadata.getOldestOffsets().get(ssp);
+ String newestOffset = topicPartitionsMetadata.getNewestOffsets().get(ssp);
+ String upcomingOffset = topicPartitionsMetadata.getUpcomingOffsets().get(ssp);
+
+ sspToSSPMetadata.put(ssp,
+ new SystemStreamMetadata.SystemStreamPartitionMetadata(oldestOffset, newestOffset, upcomingOffset));
+ }
+ return sspToSSPMetadata;
+ }
+
+ /**
+ * Given a set of stream names (topics), fetch metadata from Kafka for each
+ * stream, and return a map from stream name to SystemStreamMetadata for
+ * each stream. This method will return null for oldest and newest offsets
+ * if a given SystemStreamPartition is empty. This method will block and
+ * retry indefinitely until it gets a successful response from Kafka.
+ *
+ * @param streamNames a set of strings of stream names/topics
+ * @param retryBackoff retry backoff strategy
+ * @return a map from topic to SystemStreamMetadata which has offsets for each partition
+ */
+ public Map<String, SystemStreamMetadata> getSystemStreamMetadata(Set<String> streamNames,
+ ExponentialSleepStrategy retryBackoff) {
+
+ LOG.info("Fetching system stream metadata for {} from system {}", streamNames, systemName);
+
+ Function1<ExponentialSleepStrategy.RetryLoop, Map<String, SystemStreamMetadata>> fetchMetadataOperation =
+ new AbstractFunction1<ExponentialSleepStrategy.RetryLoop, Map<String, SystemStreamMetadata>>() {
+ @Override
+ public Map<String, SystemStreamMetadata> apply(ExponentialSleepStrategy.RetryLoop loop) {
+ Map<String, SystemStreamMetadata> metadata = fetchSystemStreamMetadata(streamNames);
+ loop.done();
+ return metadata;
+ }
+ };
+
+ Function2<Exception, ExponentialSleepStrategy.RetryLoop, BoxedUnit> onExceptionRetryOperation =
+ new AbstractFunction2<Exception, ExponentialSleepStrategy.RetryLoop, BoxedUnit>() {
+ @Override
+ public BoxedUnit apply(Exception exception, ExponentialSleepStrategy.RetryLoop loop) {
+ if (loop.sleepCount() < MAX_RETRIES_ON_EXCEPTION) {
+ LOG.warn(
+ String.format("Fetching system stream metadata for: %s threw an exception. Retrying.", streamNames),
+ exception);
+ } else {
+ LOG.error(String.format("Fetching system stream metadata for: %s threw an exception.", streamNames),
+ exception);
+ loop.done();
+ throw new SamzaException(exception);
+ }
+
+ return null;
+ }
+ };
+
+ Function0<Map<String, SystemStreamMetadata>> fallbackOperation =
+ new AbstractFunction0<Map<String, SystemStreamMetadata>>() {
+ @Override
+ public Map<String, SystemStreamMetadata> apply() {
+ throw new SamzaException("Failed to get system stream metadata");
+ }
+ };
+
+ Map<String, SystemStreamMetadata> result =
+ retryBackoff.run(fetchMetadataOperation, onExceptionRetryOperation).getOrElse(fallbackOperation);
+ return result;
+ }
+
+ @Override
+ public String getNewestOffset(SystemStreamPartition ssp, Integer maxRetries) {
+ LOG.info("Fetching newest offset for: {}", ssp);
+
+ ExponentialSleepStrategy strategy = new ExponentialSleepStrategy(DEFAULT_EXPONENTIAL_SLEEP_BACK_OFF_MULTIPLIER,
+ DEFAULT_EXPONENTIAL_SLEEP_INITIAL_DELAY_MS, DEFAULT_EXPONENTIAL_SLEEP_MAX_DELAY_MS);
+
+ Function1<ExponentialSleepStrategy.RetryLoop, String> fetchNewestOffset =
+ new AbstractFunction1<ExponentialSleepStrategy.RetryLoop, String>() {
+ @Override
+ public String apply(ExponentialSleepStrategy.RetryLoop loop) {
+ String result = fetchNewestOffset(ssp);
+ loop.done();
+ return result;
+ }
+ };
+
+ String offset = strategy.run(fetchNewestOffset,
+ new AbstractFunction2<Exception, ExponentialSleepStrategy.RetryLoop, BoxedUnit>() {
+ @Override
+ public BoxedUnit apply(Exception exception, ExponentialSleepStrategy.RetryLoop loop) {
+ if (loop.sleepCount() < maxRetries) {
+ LOG.warn(String.format("Fetching newest offset for: %s threw an exception. Retrying.", ssp), exception);
+ } else {
+ LOG.error(String.format("Fetching newest offset for: %s threw an exception.", ssp), exception);
+ loop.done();
+ throw new SamzaException("Exception while trying to get newest offset", exception);
+ }
+ return null;
+ }
+ }).get();
+
+ return offset;
+ }
+
+ /**
+ * Convert TopicPartition to SystemStreamPartition
+ * @param topicPartition the topic partition to be created
+ * @return an instance of SystemStreamPartition
+ */
+ private SystemStreamPartition toSystemStreamPartition(TopicPartition topicPartition) {
+ String topic = topicPartition.topic();
+ Partition partition = new Partition(topicPartition.partition());
+ return new SystemStreamPartition(systemName, topic, partition);
+ }
+
+ /**
+ * Uses {@code metadataConsumer} to fetch the metadata for the {@code topicPartitions}.
+ * Warning: If multiple threads call this with the same {@code metadataConsumer}, then this will not protect against
+ * concurrent access to the {@code metadataConsumer}.
+ */
+ private OffsetsMaps fetchTopicPartitionsMetadata(List<TopicPartition> topicPartitions) {
+ Map<SystemStreamPartition, String> oldestOffsets = new HashMap<>();
+ Map<SystemStreamPartition, String> newestOffsets = new HashMap<>();
+ Map<SystemStreamPartition, String> upcomingOffsets = new HashMap<>();
+
+ Map<TopicPartition, Long> oldestOffsetsWithLong = metadataConsumer.beginningOffsets(topicPartitions);
+ LOG.debug("Kafka-fetched beginningOffsets: {}", oldestOffsetsWithLong);
+ Map<TopicPartition, Long> upcomingOffsetsWithLong = metadataConsumer.endOffsets(topicPartitions);
+ LOG.debug("Kafka-fetched endOffsets: {}", upcomingOffsetsWithLong);
+
+ oldestOffsetsWithLong.forEach((topicPartition, offset) -> {
+ oldestOffsets.put(toSystemStreamPartition(topicPartition), String.valueOf(offset));
+ });
+
+ upcomingOffsetsWithLong.forEach((topicPartition, offset) -> {
+ upcomingOffsets.put(toSystemStreamPartition(topicPartition), String.valueOf(offset));
+
+ // Kafka's beginning Offset corresponds to the offset for the oldest message.
+ // Kafka's end offset corresponds to the offset for the upcoming message, and it is the newest offset + 1.
+ // When upcoming offset is <=0, the topic appears empty, we put oldest offset 0 and the newest offset null.
+ // When upcoming offset is >0, we subtract the upcoming offset by one for the newest offset.
+ // For normal case, the newest offset will correspond to the offset of the newest message in the stream;
+ // But for the big message, it is not the case. Seeking on the newest offset gives nothing for the newest big message.
+ // For now, we keep it as is for newest offsets the same as historical metadata structure.
+ if (offset <= 0) {
+ LOG.warn(
+ "Empty Kafka topic partition {} with upcoming offset {}. Skipping newest offset and setting oldest offset to 0 to consume from beginning",
+ topicPartition, offset);
+ oldestOffsets.put(toSystemStreamPartition(topicPartition), "0");
+ } else {
+ newestOffsets.put(toSystemStreamPartition(topicPartition), String.valueOf(offset - 1));
+ }
+ });
+ return new OffsetsMaps(oldestOffsets, newestOffsets, upcomingOffsets);
+ }
+
+ /**
+ * Fetch SystemStreamMetadata for each topic with the consumer
+ * @param topics set of topics to get metadata info for
+ * @return map of topic to SystemStreamMetadata
+ */
+ private Map<String, SystemStreamMetadata> fetchSystemStreamMetadata(Set<String> topics) {
+ Map<SystemStreamPartition, String> allOldestOffsets = new HashMap<>();
+ Map<SystemStreamPartition, String> allNewestOffsets = new HashMap<>();
+ Map<SystemStreamPartition, String> allUpcomingOffsets = new HashMap<>();
+
+ LOG.info("Fetching SystemStreamMetadata for topics {} on system {}", topics, systemName);
+
+ topics.forEach(topic -> {
+ List<PartitionInfo> partitionInfos = metadataConsumer.partitionsFor(topic);
+
+ if (partitionInfos == null) {
+ String msg = String.format("Partition info not(yet?) available for system %s topic %s", systemName, topic);
+ throw new SamzaException(msg);
+ }
+
+ List<TopicPartition> topicPartitions = partitionInfos.stream()
+ .map(partitionInfo -> new TopicPartition(partitionInfo.topic(), partitionInfo.partition()))
+ .collect(Collectors.toList());
+
+ OffsetsMaps offsetsForTopic = fetchTopicPartitionsMetadata(topicPartitions);
+ allOldestOffsets.putAll(offsetsForTopic.getOldestOffsets());
+ allNewestOffsets.putAll(offsetsForTopic.getNewestOffsets());
+ allUpcomingOffsets.putAll(offsetsForTopic.getUpcomingOffsets());
+ });
+
+ scala.collection.immutable.Map<String, SystemStreamMetadata> result =
+ KafkaSystemAdminUtilsScala.assembleMetadata(ScalaJavaUtil.toScalaMap(allOldestOffsets),
+ ScalaJavaUtil.toScalaMap(allNewestOffsets), ScalaJavaUtil.toScalaMap(allUpcomingOffsets));
+
+ LOG.debug("assembled SystemStreamMetadata is: {}", result);
+ return JavaConverters.mapAsJavaMapConverter(result).asJava();
+ }
+
+ private String fetchNewestOffset(SystemStreamPartition ssp) {
+ LOG.debug("Fetching newest offset for {}", ssp);
+ String newestOffset;
+
+ TopicPartition topicPartition = new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
+
+ // the offsets returned from the consumer is the Long type
+ Long upcomingOffset =
+ (Long) metadataConsumer.endOffsets(Collections.singletonList(topicPartition)).get(topicPartition);
+
+ // Kafka's "latest" offset is always last message in stream's offset + 1,
+ // so get newest message in stream by subtracting one. This is safe
+ // even for key-deduplicated streams, since the last message will
+ // never be deduplicated.
+ if (upcomingOffset <= 0) {
+ LOG.debug("Stripping newest offsets for {} because the topic appears empty.", topicPartition);
+ newestOffset = null;
+ } else {
+ newestOffset = String.valueOf(upcomingOffset - 1);
+ }
+
+ LOG.info("Newest offset for ssp {} is: {}", ssp, newestOffset);
+ return newestOffset;
+ }
+
+ @Override
+ public Integer offsetComparator(String offset1, String offset2) {
+ if (offset1 == null || offset2 == null) {
+ return -1;
+ }
+
+ return Long.valueOf(offset1).compareTo(Long.valueOf(offset2));
+ }
+
+ @Override
+ public boolean createStream(StreamSpec streamSpec) {
+ LOG.info("Creating Kafka topic: {} on system: {}", streamSpec.getPhysicalName(), streamSpec.getSystemName());
+
+ return KafkaSystemAdminUtilsScala.createStream(toKafkaSpec(streamSpec), getZkConnection);
+ }
+
+ @Override
+ public boolean clearStream(StreamSpec streamSpec) {
+ LOG.info("Creating Kafka topic: {} on system: {}", streamSpec.getPhysicalName(), streamSpec.getSystemName());
+
+ KafkaSystemAdminUtilsScala.clearStream(streamSpec, getZkConnection);
+
+ Map<String, List<PartitionInfo>> topicsMetadata = getTopicMetadata(ImmutableSet.of(streamSpec.getPhysicalName()));
+ return topicsMetadata.get(streamSpec.getPhysicalName()).isEmpty();
+ }
+
+ /**
+ * Converts a StreamSpec into a KafakStreamSpec. Special handling for coordinator and changelog stream.
+ * @param spec a StreamSpec object
+ * @return KafkaStreamSpec object
+ */
+ KafkaStreamSpec toKafkaSpec(StreamSpec spec) {
+ KafkaStreamSpec kafkaSpec;
+ if (spec.isChangeLogStream()) {
+ String topicName = spec.getPhysicalName();
+ ChangelogInfo topicMeta = changelogTopicMetaInformation.get(topicName);
+ if (topicMeta == null) {
+ throw new StreamValidationException("Unable to find topic information for topic " + topicName);
+ }
+
+ kafkaSpec = new KafkaStreamSpec(spec.getId(), topicName, systemName, spec.getPartitionCount(),
+ topicMeta.replicationFactor(), topicMeta.kafkaProps());
+ } else if (spec.isCoordinatorStream()) {
+ kafkaSpec =
+ new KafkaStreamSpec(spec.getId(), spec.getPhysicalName(), systemName, 1, coordinatorStreamReplicationFactor,
+ coordinatorStreamProperties);
+ } else if (intermediateStreamProperties.containsKey(spec.getId())) {
+ kafkaSpec = KafkaStreamSpec.fromSpec(spec).copyWithProperties(intermediateStreamProperties.get(spec.getId()));
+ } else {
+ kafkaSpec = KafkaStreamSpec.fromSpec(spec);
+ }
+ return kafkaSpec;
+ }
+
+ @Override
+ public void validateStream(StreamSpec streamSpec) throws StreamValidationException {
+ LOG.info("About to validate stream = " + streamSpec);
+
+ String streamName = streamSpec.getPhysicalName();
+ SystemStreamMetadata systemStreamMetadata =
+ getSystemStreamMetadata(Collections.singleton(streamName)).get(streamName);
+ if (systemStreamMetadata == null) {
+ throw new StreamValidationException(
+ "Failed to obtain metadata for stream " + streamName + ". Validation failed.");
+ }
+
+ int actualPartitionCounter = systemStreamMetadata.getSystemStreamPartitionMetadata().size();
+ int expectedPartitionCounter = streamSpec.getPartitionCount();
+ LOG.info("actualCount=" + actualPartitionCounter + "; expectedCount=" + expectedPartitionCounter);
+ if (actualPartitionCounter != expectedPartitionCounter) {
+ throw new StreamValidationException(
+ String.format("Mismatch of partitions for stream %s. Expected %d, got %d. Validation failed.", streamName,
+ expectedPartitionCounter, actualPartitionCounter));
+ }
+ }
+
+ // get partition info for topic
+ Map<String, List<PartitionInfo>> getTopicMetadata(Set<String> topics) {
+ Map<String, List<PartitionInfo>> streamToPartitionsInfo = new HashMap();
+ List<PartitionInfo> partitionInfoList;
+ for (String topic : topics) {
+ partitionInfoList = metadataConsumer.partitionsFor(topic);
+ streamToPartitionsInfo.put(topic, partitionInfoList);
+ }
+
+ return streamToPartitionsInfo;
+ }
+
+ /**
+ * Delete records up to (and including) the provided ssp offsets for
+ * all system stream partitions specified in the map.
+ * This only works with Kafka cluster 0.11 or later. Otherwise it's a no-op.
+ * @param offsets specifies up to what offsets the messages should be deleted
+ */
+ @Override
+ public void deleteMessages(Map<SystemStreamPartition, String> offsets) {
+ if (deleteCommittedMessages) {
+ KafkaSystemAdminUtilsScala.deleteMessages(adminClient, offsets);
+ deleteMessageCalled = true;
+ }
+ }
+
+ /**
+ * Container for metadata about offsets.
+ */
+ private static class OffsetsMaps {
+ private final Map<SystemStreamPartition, String> oldestOffsets;
+ private final Map<SystemStreamPartition, String> newestOffsets;
+ private final Map<SystemStreamPartition, String> upcomingOffsets;
+
+ private OffsetsMaps(Map<SystemStreamPartition, String> oldestOffsets,
+ Map<SystemStreamPartition, String> newestOffsets, Map<SystemStreamPartition, String> upcomingOffsets) {
+ this.oldestOffsets = oldestOffsets;
+ this.newestOffsets = newestOffsets;
+ this.upcomingOffsets = upcomingOffsets;
+ }
+
+ private Map<SystemStreamPartition, String> getOldestOffsets() {
+ return oldestOffsets;
+ }
+
+ private Map<SystemStreamPartition, String> getNewestOffsets() {
+ return newestOffsets;
+ }
+
+ private Map<SystemStreamPartition, String> getUpcomingOffsets() {
+ return upcomingOffsets;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/main/java/org/apache/samza/system/kafka/KafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/java/org/apache/samza/system/kafka/KafkaSystemConsumer.java b/samza-kafka/src/main/java/org/apache/samza/system/kafka/KafkaSystemConsumer.java
new file mode 100644
index 0000000..65d0e42
--- /dev/null
+++ b/samza-kafka/src/main/java/org/apache/samza/system/kafka/KafkaSystemConsumer.java
@@ -0,0 +1,366 @@
+
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.samza.system.kafka;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import kafka.common.TopicAndPartition;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.samza.SamzaException;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.system.IncomingMessageEnvelope;
+import org.apache.samza.system.SystemConsumer;
+import org.apache.samza.system.SystemStreamPartition;
+import org.apache.samza.util.BlockingEnvelopeMap;
+import org.apache.samza.util.Clock;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Option;
+
+
+public class KafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements SystemConsumer {
+
+ private static final Logger LOG = LoggerFactory.getLogger(KafkaSystemConsumer.class);
+
+ private static final long FETCH_THRESHOLD = 50000;
+ private static final long FETCH_THRESHOLD_BYTES = -1L;
+
+ protected final Consumer<K, V> kafkaConsumer;
+ protected final String systemName;
+ protected final String clientId;
+ private final AtomicBoolean stopped = new AtomicBoolean(false);
+ private final AtomicBoolean started = new AtomicBoolean(false);
+ private final Config config;
+ private final boolean fetchThresholdBytesEnabled;
+ private final KafkaSystemConsumerMetrics metrics;
+
+ // This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
+ final KafkaConsumerMessageSink messageSink;
+
+ // This proxy contains a separate thread, which reads kafka messages (with consumer.poll()) and populates
+ // BlockingEnvelopMap's buffers.
+ final private KafkaConsumerProxy proxy;
+
+ // keep registration data until the start - mapping between registered SSPs and topicPartitions, and their offsets
+ final Map<TopicPartition, String> topicPartitionsToOffset = new HashMap<>();
+ final Map<TopicPartition, SystemStreamPartition> topicPartitionsToSSP = new HashMap<>();
+
+ long perPartitionFetchThreshold;
+ long perPartitionFetchThresholdBytes;
+
+ /**
+ * Create a KafkaSystemConsumer for the provided {@code systemName}
+ * @param systemName system name for which we create the consumer
+ * @param config application config
+ * @param metrics metrics for this KafkaSystemConsumer
+ * @param clock system clock
+ */
+ public KafkaSystemConsumer(Consumer<K, V> kafkaConsumer, String systemName, Config config, String clientId,
+ KafkaSystemConsumerMetrics metrics, Clock clock) {
+
+ super(metrics.registry(), clock, metrics.getClass().getName());
+
+ this.kafkaConsumer = kafkaConsumer;
+ this.clientId = clientId;
+ this.systemName = systemName;
+ this.config = config;
+ this.metrics = metrics;
+
+ fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName);
+
+ // create a sink for passing the messages between the proxy and the consumer
+ messageSink = new KafkaConsumerMessageSink();
+
+ // Create the proxy to do the actual message reading.
+ String metricName = String.format("%s-%s", systemName, clientId);
+ proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, metrics, metricName);
+ LOG.info("{}: Created KafkaConsumerProxy {} ", this, proxy);
+ }
+
+ /**
+ * Create internal kafka consumer object, which will be used in the Proxy.
+ * @param systemName system name for which we create the consumer
+ * @param kafkaConsumerConfig config object for Kafka's KafkaConsumer
+ * @return KafkaConsumer object
+ */
+ public static <K,V> KafkaConsumer<K, V> createKafkaConsumerImpl(String systemName,
+ HashMap<String, Object> kafkaConsumerConfig) {
+
+ LOG.info("Instantiating KafkaConsumer for systemName {} with properties {}", systemName, kafkaConsumerConfig);
+ return new KafkaConsumer<>(kafkaConsumerConfig);
+ }
+
+ @Override
+ public void start() {
+ if (!started.compareAndSet(false, true)) {
+ LOG.warn("{}: Attempting to start the consumer for the second (or more) time.", this);
+ return;
+ }
+ if (stopped.get()) {
+ LOG.error("{}: Attempting to start a stopped consumer", this);
+ return;
+ }
+ // initialize the subscriptions for all the registered TopicPartitions
+ startSubscription();
+ // needs to be called after all the registrations are completed
+ setFetchThresholds();
+
+ startConsumer();
+ LOG.info("{}: Consumer started", this);
+ }
+
+ private void startSubscription() {
+ //subscribe to all the registered TopicPartitions
+ LOG.info("{}: Consumer subscribes to {}", this, topicPartitionsToSSP.keySet());
+ try {
+ synchronized (kafkaConsumer) {
+ // we are using assign (and not subscribe), so we need to specify both topic and partition
+ kafkaConsumer.assign(topicPartitionsToSSP.keySet());
+ }
+ } catch (Exception e) {
+ throw new SamzaException("Consumer subscription failed for " + this, e);
+ }
+ }
+
+ /**
+ * Set the offsets to start from.
+ * Register the TopicPartitions with the proxy.
+ * Start the proxy.
+ */
+ void startConsumer() {
+ // set the offset for each TopicPartition
+ if (topicPartitionsToOffset.size() <= 0) {
+ LOG.error ("{}: Consumer is not subscribed to any SSPs", this);
+ }
+
+ topicPartitionsToOffset.forEach((tp, startingOffsetString) -> {
+ long startingOffset = Long.valueOf(startingOffsetString);
+
+ try {
+ synchronized (kafkaConsumer) {
+ kafkaConsumer.seek(tp, startingOffset); // this value should already be the 'upcoming' value
+ }
+ } catch (Exception e) {
+ // all recoverable execptions are handled by the client.
+ // if we get here there is nothing left to do but bail out.
+ String msg =
+ String.format("%s: Got Exception while seeking to %s for partition %s", this, startingOffsetString, tp);
+ LOG.error(msg, e);
+ throw new SamzaException(msg, e);
+ }
+
+ LOG.info("{}: Changing consumer's starting offset for tp = %s to %s", this, tp, startingOffsetString);
+
+ // add the partition to the proxy
+ proxy.addTopicPartition(topicPartitionsToSSP.get(tp), startingOffset);
+ });
+
+ // start the proxy thread
+ if (proxy != null && !proxy.isRunning()) {
+ LOG.info("{}: Starting proxy {}", this, proxy);
+ proxy.start();
+ }
+ }
+
+ private void setFetchThresholds() {
+ // get the thresholds, and set defaults if not defined.
+ KafkaConfig kafkaConfig = new KafkaConfig(config);
+
+ Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName);
+ long fetchThreshold = FETCH_THRESHOLD;
+ if (fetchThresholdOption.isDefined()) {
+ fetchThreshold = Long.valueOf(fetchThresholdOption.get());
+ }
+
+ Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName);
+ long fetchThresholdBytes = FETCH_THRESHOLD_BYTES;
+ if (fetchThresholdBytesOption.isDefined()) {
+ fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get());
+ }
+
+ int numPartitions = topicPartitionsToSSP.size();
+ if (numPartitions != topicPartitionsToOffset.size()) {
+ throw new SamzaException("topicPartitionsToSSP.size() doesn't match topicPartitionsToOffset.size()");
+ }
+
+
+ if (numPartitions > 0) {
+ perPartitionFetchThreshold = fetchThreshold / numPartitions;
+ if (fetchThresholdBytesEnabled) {
+ // currently this feature cannot be enabled, because we do not have the size of the messages available.
+ // messages get double buffered, hence divide by 2
+ perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / numPartitions;
+ }
+ }
+ LOG.info("{}: fetchThresholdBytes = {}; fetchThreshold={}; numPartitions={}, perPartitionFetchThreshold={}, perPartitionFetchThresholdBytes(0 if disabled)={}",
+ this, fetchThresholdBytes, fetchThreshold, numPartitions, perPartitionFetchThreshold, perPartitionFetchThresholdBytes);
+ }
+
+ @Override
+ public void stop() {
+ if (!stopped.compareAndSet(false, true)) {
+ LOG.warn("{}: Attempting to stop stopped consumer.", this);
+ return;
+ }
+
+ LOG.info("{}: Stopping Samza kafkaConsumer ", this);
+
+ // stop the proxy (with 1 minute timeout)
+ if (proxy != null) {
+ LOG.info("{}: Stopping proxy {}", this, proxy);
+ proxy.stop(TimeUnit.SECONDS.toMillis(60));
+ }
+
+ try {
+ synchronized (kafkaConsumer) {
+ LOG.info("{}: Closing kafkaSystemConsumer {}", this, kafkaConsumer);
+ kafkaConsumer.close();
+ }
+ } catch (Exception e) {
+ LOG.warn("{}: Failed to stop KafkaSystemConsumer.", this, e);
+ }
+ }
+
+ /**
+ * record the ssp and the offset. Do not submit it to the consumer yet.
+ * @param systemStreamPartition ssp to register
+ * @param offset offset to register with
+ */
+ @Override
+ public void register(SystemStreamPartition systemStreamPartition, String offset) {
+ if (started.get()) {
+ String msg = String.format("%s: Trying to register partition after consumer has been started. ssp=%s", this,
+ systemStreamPartition);
+ throw new SamzaException(msg);
+ }
+
+ if (!systemStreamPartition.getSystem().equals(systemName)) {
+ LOG.warn("{}: ignoring SSP {}, because this consumer's system doesn't match.", this, systemStreamPartition);
+ return;
+ }
+ LOG.info("{}: Registering ssp = {} with offset {}", this, systemStreamPartition, offset);
+
+ super.register(systemStreamPartition, offset);
+
+ TopicPartition tp = toTopicPartition(systemStreamPartition);
+
+ topicPartitionsToSSP.put(tp, systemStreamPartition);
+
+ String existingOffset = topicPartitionsToOffset.get(tp);
+ // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages.
+ if (existingOffset == null || compareOffsets(existingOffset, offset) > 0) {
+ topicPartitionsToOffset.put(tp, offset);
+ }
+
+ metrics.registerTopicAndPartition(toTopicAndPartition(tp));
+ }
+
+ /**
+ * Compare two String offsets.
+ * Note. There is a method in KafkaSystemAdmin that does that, but that would require instantiation of systemadmin for each consumer.
+ * @return see {@link Long#compareTo(Long)}
+ */
+ private static int compareOffsets(String offset1, String offset2) {
+ return Long.valueOf(offset1).compareTo(Long.valueOf(offset2));
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%s:%s", systemName, clientId);
+ }
+
+ @Override
+ public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
+ Set<SystemStreamPartition> systemStreamPartitions, long timeout) throws InterruptedException {
+
+ // check if the proxy is running
+ if (!proxy.isRunning()) {
+ stop();
+ String message = String.format("%s: KafkaConsumerProxy has stopped.", this);
+ throw new SamzaException(message, proxy.getFailureCause());
+ }
+
+ return super.poll(systemStreamPartitions, timeout);
+ }
+
+ /**
+ * convert from TopicPartition to TopicAndPartition
+ */
+ public static TopicAndPartition toTopicAndPartition(TopicPartition tp) {
+ return new TopicAndPartition(tp.topic(), tp.partition());
+ }
+
+ /**
+ * convert to TopicPartition from SystemStreamPartition
+ */
+ public static TopicPartition toTopicPartition(SystemStreamPartition ssp) {
+ return new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
+ }
+
+ /**
+ * return system name for this consumer
+ * @return system name
+ */
+ public String getSystemName() {
+ return systemName;
+ }
+
+ public class KafkaConsumerMessageSink {
+
+ public void setIsAtHighWatermark(SystemStreamPartition ssp, boolean isAtHighWatermark) {
+ setIsAtHead(ssp, isAtHighWatermark);
+ }
+
+ boolean needsMoreMessages(SystemStreamPartition ssp) {
+ LOG.debug("{}: needsMoreMessages from following SSP: {}. fetchLimitByBytes enabled={}; messagesSizeInQueue={};"
+ + "(limit={}); messagesNumInQueue={}(limit={};", this, ssp, fetchThresholdBytesEnabled,
+ getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes, getNumMessagesInQueue(ssp),
+ perPartitionFetchThreshold);
+
+ if (fetchThresholdBytesEnabled) {
+ return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes;
+ } else {
+ return getNumMessagesInQueue(ssp) < perPartitionFetchThreshold;
+ }
+ }
+
+ void addMessage(SystemStreamPartition ssp, IncomingMessageEnvelope envelope) {
+ LOG.trace("{}: Incoming message ssp = {}: envelope = {}.", this, ssp, envelope);
+
+ try {
+ put(ssp, envelope);
+ } catch (InterruptedException e) {
+ throw new SamzaException(
+ String.format("%s: Consumer was interrupted while trying to add message with offset %s for ssp %s", this,
+ envelope.getOffset(), ssp));
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala b/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala
index e5cca36..f492518 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala
@@ -62,6 +62,11 @@ object KafkaConfig {
val JOB_COORDINATOR_REPLICATION_FACTOR = "job.coordinator." + TOPIC_REPLICATION_FACTOR
val JOB_COORDINATOR_SEGMENT_BYTES = "job.coordinator." + SEGMENT_BYTES
+ val CONSUMER_CONFIGS_CONFIG_KEY = "systems.%s.consumer.%s"
+ val PRODUCER_BOOTSTRAP_SERVERS_CONFIG_KEY = "systems.%s.producer.bootstrap.servers"
+ val PRODUCER_CONFIGS_CONFIG_KEY = "systems.%s.producer.%s"
+ val CONSUMER_ZK_CONNECT_CONFIG_KEY = "systems.%s.consumer.zookeeper.connect"
+
/**
* Defines how low a queue can get for a single system/stream/partition
* combination before trying to fetch more messages for it.
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConsumerConfig.java
deleted file mode 100644
index 6cebc28..0000000
--- a/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConsumerConfig.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- */
-
-package org.apache.samza.config;
-
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.kafka.clients.consumer.ConsumerConfig;
-import org.apache.kafka.clients.consumer.RangeAssignor;
-import org.apache.kafka.common.serialization.ByteArrayDeserializer;
-import org.apache.samza.SamzaException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import scala.Option;
-
-
-/**
- * The configuration class for KafkaConsumer
- */
-public class KafkaConsumerConfig extends HashMap<String, Object> {
-
- public static final Logger LOG = LoggerFactory.getLogger(KafkaConsumerConfig.class);
-
- static final String PRODUCER_CLIENT_ID_PREFIX = "kafka-producer";
- static final String CONSUMER_CLIENT_ID_PREFIX = "kafka-consumer";
- static final String ADMIN_CLIENT_ID_PREFIX = "samza-admin";
-
- /*
- * By default, KafkaConsumer will fetch some big number of available messages for all the partitions.
- * This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
- */
- static final String DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS = "100";
-
- private KafkaConsumerConfig(Map<String, Object> map) {
- super(map);
- }
-
- /**
- * Helper method to create configs for use in Kafka consumer.
- * The values are based on the "consumer" subset of the configs provided by the app and Samza overrides.
- *
- * @param config config provided by the app.
- * @param systemName system name to get the consumer configuration for.
- * @param clientId client id to be used in the Kafka consumer.
- * @return KafkaConsumerConfig
- */
- public static KafkaConsumerConfig getKafkaSystemConsumerConfig(Config config, String systemName, String clientId) {
-
- Config subConf = config.subset(String.format("systems.%s.consumer.", systemName), true);
-
- //Kafka client configuration
- String groupId = getConsumerGroupId(config);
-
- Map<String, Object> consumerProps = new HashMap<>();
- consumerProps.putAll(subConf);
-
- consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
- consumerProps.put(ConsumerConfig.CLIENT_ID_CONFIG, clientId);
-
- // These are values we enforce in sazma, and they cannot be overwritten.
-
- // Disable consumer auto-commit because Samza controls commits
- consumerProps.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
-
- // Translate samza config value to kafka config value
- consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
- getAutoOffsetResetValue((String) consumerProps.get(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG)));
-
- // if consumer bootstrap servers are not configured, get them from the producer configs
- if (!subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
- String bootstrapServers =
- config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
- if (StringUtils.isEmpty(bootstrapServers)) {
- throw new SamzaException("Missing " + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + " config for " + systemName);
- }
- consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
- }
-
- // Always use default partition assignment strategy. Do not allow override.
- consumerProps.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG, RangeAssignor.class.getName());
-
- // the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
- // default to byte[]
- if (!consumerProps.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
- LOG.info("setting key serialization for the consumer(for system {}) to ByteArrayDeserializer", systemName);
- consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
- }
- if (!consumerProps.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
- LOG.info("setting value serialization for the consumer(for system {}) to ByteArrayDeserializer", systemName);
- consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
- }
-
- // Override default max poll config if there is no value
- consumerProps.computeIfAbsent(ConsumerConfig.MAX_POLL_RECORDS_CONFIG,
- (k) -> DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS);
-
- return new KafkaConsumerConfig(consumerProps);
- }
-
- // group id should be unique per job
- static String getConsumerGroupId(Config config) {
- JobConfig jobConfig = new JobConfig(config);
- Option jobNameOption = jobConfig.getName();
- if (jobNameOption.isEmpty()) {
- throw new ConfigException("Missing job name");
- }
- String jobName = (String) jobNameOption.get();
-
- String jobId = jobConfig.getJobId();
-
- return String.format("%s-%s", jobName, jobId);
- }
-
- // client id should be unique per job
- public static String getConsumerClientId(Config config) {
- return getConsumerClientId(CONSUMER_CLIENT_ID_PREFIX, config);
- }
-
- public static String getProducerClientId(Config config) {
- return getConsumerClientId(PRODUCER_CLIENT_ID_PREFIX, config);
- }
-
- public static String getAdminClientId(Config config) {
- return getConsumerClientId(ADMIN_CLIENT_ID_PREFIX, config);
- }
-
- static String getConsumerClientId(String id, Config config) {
- JobConfig jobConfig = new JobConfig(config);
- Option jobNameOption = jobConfig.getName();
- if (jobNameOption.isEmpty()) {
- throw new ConfigException("Missing job name");
- }
- String jobName = (String) jobNameOption.get();
-
- String jobId = jobConfig.getJobId();
-
- return String.format("%s-%s-%s", id.replaceAll("\\W", "_"), jobName.replaceAll("\\W", "_"),
- jobId.replaceAll("\\W", "_"));
- }
-
- /**
- * If settings for auto.reset in samza are different from settings in Kafka (auto.offset.reset),
- * then need to convert them (see kafka.apache.org/documentation):
- * "largest" -> "latest"
- * "smallest" -> "earliest"
- *
- * If no setting specified we return "latest" (same as Kafka).
- * @param autoOffsetReset value from the app provided config
- * @return String representing the config value for "auto.offset.reset" property
- */
- static String getAutoOffsetResetValue(final String autoOffsetReset) {
- final String SAMZA_OFFSET_LARGEST = "largest";
- final String SAMZA_OFFSET_SMALLEST = "smallest";
- final String KAFKA_OFFSET_LATEST = "latest";
- final String KAFKA_OFFSET_EARLIEST = "earliest";
- final String KAFKA_OFFSET_NONE = "none";
-
- if (autoOffsetReset == null) {
- return KAFKA_OFFSET_LATEST; // return default
- }
-
- // accept kafka values directly
- if (autoOffsetReset.equals(KAFKA_OFFSET_EARLIEST) || autoOffsetReset.equals(KAFKA_OFFSET_LATEST)
- || autoOffsetReset.equals(KAFKA_OFFSET_NONE)) {
- return autoOffsetReset;
- }
-
- String newAutoOffsetReset;
- switch (autoOffsetReset) {
- case SAMZA_OFFSET_LARGEST:
- newAutoOffsetReset = KAFKA_OFFSET_LATEST;
- break;
- case SAMZA_OFFSET_SMALLEST:
- newAutoOffsetReset = KAFKA_OFFSET_EARLIEST;
- break;
- default:
- newAutoOffsetReset = KAFKA_OFFSET_LATEST;
- }
- LOG.info("AutoOffsetReset value converted from {} to {}", autoOffsetReset, newAutoOffsetReset);
- return newAutoOffsetReset;
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 04071c1..e47add7 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -90,6 +90,8 @@ class KafkaConsumerProxy<K, V> {
consumerPollThread.setDaemon(true);
consumerPollThread.setName(
"Samza KafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
+
+ LOG.info("Creating KafkaConsumerProxy with systeName={}, clientId={}, metricsName={}", systemName, clientId, metricName);
}
/**
[2/3] samza git commit: SAMZA-1868: Create new SamzaAmdmin for Kafka
Posted by bo...@apache.org.
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemAdmin.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemAdmin.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemAdmin.scala
deleted file mode 100644
index 6ab4d32..0000000
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemAdmin.scala
+++ /dev/null
@@ -1,608 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.samza.system.kafka
-
-import java.util
-import java.util.{Properties, UUID}
-
-import com.google.common.annotations.VisibleForTesting
-import kafka.admin.{AdminClient, AdminUtils}
-import kafka.api._
-import kafka.common.TopicAndPartition
-import kafka.consumer.{ConsumerConfig, SimpleConsumer}
-import kafka.utils.ZkUtils
-import org.apache.kafka.clients.CommonClientConfigs
-import org.apache.kafka.common.errors.TopicExistsException
-import org.apache.kafka.common.TopicPartition
-import org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata
-import org.apache.samza.system._
-import org.apache.samza.util.{ClientUtilTopicMetadataStore, ExponentialSleepStrategy, KafkaUtil, Logging}
-import org.apache.samza.{Partition, SamzaException}
-
-import scala.collection.JavaConverters._
-
-
-object KafkaSystemAdmin extends Logging {
-
- @VisibleForTesting @volatile var deleteMessagesCalled = false
- val CLEAR_STREAM_RETRIES = 3
-
- /**
- * A helper method that takes oldest, newest, and upcoming offsets for each
- * system stream partition, and creates a single map from stream name to
- * SystemStreamMetadata.
- */
- def assembleMetadata(oldestOffsets: Map[SystemStreamPartition, String], newestOffsets: Map[SystemStreamPartition, String], upcomingOffsets: Map[SystemStreamPartition, String]): Map[String, SystemStreamMetadata] = {
- val allMetadata = (oldestOffsets.keySet ++ newestOffsets.keySet ++ upcomingOffsets.keySet)
- .groupBy(_.getStream)
- .map {
- case (streamName, systemStreamPartitions) =>
- val streamPartitionMetadata = systemStreamPartitions
- .map(systemStreamPartition => {
- val partitionMetadata = new SystemStreamPartitionMetadata(
- // If the topic/partition is empty then oldest and newest will
- // be stripped of their offsets, so default to null.
- oldestOffsets.getOrElse(systemStreamPartition, null),
- newestOffsets.getOrElse(systemStreamPartition, null),
- upcomingOffsets(systemStreamPartition))
- (systemStreamPartition.getPartition, partitionMetadata)
- })
- .toMap
- val streamMetadata = new SystemStreamMetadata(streamName, streamPartitionMetadata.asJava)
- (streamName, streamMetadata)
- }
- .toMap
-
- // This is typically printed downstream and it can be spammy, so debug level here.
- debug("Got metadata: %s" format allMetadata)
-
- allMetadata
- }
-}
-
-/**
- * A helper class that is used to construct the changelog stream specific information
- *
- * @param replicationFactor The number of replicas for the changelog stream
- * @param kafkaProps The kafka specific properties that need to be used for changelog stream creation
- */
-case class ChangelogInfo(var replicationFactor: Int, var kafkaProps: Properties)
-
-/**
- * A Kafka-based implementation of SystemAdmin.
- */
-class KafkaSystemAdmin(
- /**
- * The system name to use when creating SystemStreamPartitions to return in
- * the getSystemStreamMetadata responser.
- */
- systemName: String,
-
- // TODO whenever Kafka decides to make the Set[Broker] class public, let's switch to Set[Broker] here.
- /**
- * List of brokers that are part of the Kafka system that we wish to
- * interact with. The format is host1:port1,host2:port2.
- */
- brokerListString: String,
-
- /**
- * A method that returns a ZkUtils for the Kafka system. This is invoked
- * when the system admin is attempting to create a coordinator stream.
- */
- connectZk: () => ZkUtils,
-
- /**
- * Custom properties to use when the system admin tries to create a new
- * coordinator stream.
- */
- coordinatorStreamProperties: Properties = new Properties,
-
- /**
- * The replication factor to use when the system admin creates a new
- * coordinator stream.
- */
- coordinatorStreamReplicationFactor: Int = 1,
-
- /**
- * The timeout to use for the simple consumer when fetching metadata from
- * Kafka. Equivalent to Kafka's socket.timeout.ms configuration.
- */
- timeout: Int = Int.MaxValue,
-
- /**
- * The buffer size to use for the simple consumer when fetching metadata
- * from Kafka. Equivalent to Kafka's socket.receive.buffer.bytes
- * configuration.
- */
- bufferSize: Int = ConsumerConfig.SocketBufferSize,
-
- /**
- * The client ID to use for the simple consumer when fetching metadata from
- * Kafka. Equivalent to Kafka's client.id configuration.
- */
- clientId: String = UUID.randomUUID.toString,
-
- /**
- * Replication factor for the Changelog topic in kafka
- * Kafka properties to be used during the Changelog topic creation
- */
- topicMetaInformation: Map[String, ChangelogInfo] = Map[String, ChangelogInfo](),
-
- /**
- * Kafka properties to be used during the intermediate topic creation
- */
- intermediateStreamProperties: Map[String, Properties] = Map(),
-
- /**
- * Whether deleteMessages() API can be used
- */
- deleteCommittedMessages: Boolean = false) extends ExtendedSystemAdmin with Logging {
-
- import KafkaSystemAdmin._
-
- @volatile var running = false
- @volatile var adminClient: AdminClient = null
-
- override def start() = {
- if (!running) {
- running = true
- adminClient = createAdminClient()
- }
- }
-
- override def stop() = {
- if (running) {
- running = false
- adminClient.close()
- adminClient = null
- }
- }
-
- private def createAdminClient(): AdminClient = {
- val props = new Properties()
- props.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, brokerListString)
- AdminClient.create(props)
- }
-
- override def getSystemStreamPartitionCounts(streams: util.Set[String], cacheTTL: Long): util.Map[String, SystemStreamMetadata] = {
- getSystemStreamPartitionCounts(streams, new ExponentialSleepStrategy(initialDelayMs = 500), cacheTTL)
- }
-
- def getSystemStreamPartitionCounts(streams: util.Set[String], retryBackoff: ExponentialSleepStrategy, cacheTTL: Long = Long.MaxValue): util.Map[String, SystemStreamMetadata] = {
- debug("Fetching system stream partition count for: %s" format streams)
- var metadataTTL = cacheTTL
- retryBackoff.run(
- loop => {
- val metadata = TopicMetadataCache.getTopicMetadata(
- streams.asScala.toSet,
- systemName,
- getTopicMetadata,
- metadataTTL)
- val result = metadata.map {
- case (topic, topicMetadata) => {
- KafkaUtil.maybeThrowException(topicMetadata.error.exception())
- val partitionsMap = topicMetadata.partitionsMetadata.map {
- pm =>
- new Partition(pm.partitionId) -> new SystemStreamPartitionMetadata("", "", "")
- }.toMap[Partition, SystemStreamPartitionMetadata]
- (topic -> new SystemStreamMetadata(topic, partitionsMap.asJava))
- }
- }
- loop.done
- result.asJava
- },
-
- (exception, loop) => {
- warn("Unable to fetch last offsets for streams %s due to %s. Retrying." format (streams, exception))
- debug("Exception detail:", exception)
- if (metadataTTL == Long.MaxValue) {
- metadataTTL = 5000 // Revert to the default cache expiration
- }
- }
- ).getOrElse(throw new SamzaException("Failed to get system stream metadata"))
- }
-
- /**
- * Returns the offset for the message after the specified offset for each
- * SystemStreamPartition that was passed in.
- */
-
- override def getOffsetsAfter(offsets: java.util.Map[SystemStreamPartition, String]) = {
- // This is safe to do with Kafka, even if a topic is key-deduped. If the
- // offset doesn't exist on a compacted topic, Kafka will return the first
- // message AFTER the offset that was specified in the fetch request.
- offsets.asScala.mapValues(offset => (offset.toLong + 1).toString).asJava
- }
-
- override def getSystemStreamMetadata(streams: java.util.Set[String]) =
- getSystemStreamMetadata(streams, new ExponentialSleepStrategy(initialDelayMs = 500)).asJava
-
- /**
- * Given a set of stream names (topics), fetch metadata from Kafka for each
- * stream, and return a map from stream name to SystemStreamMetadata for
- * each stream. This method will return null for oldest and newest offsets
- * if a given SystemStreamPartition is empty. This method will block and
- * retry indefinitely until it gets a successful response from Kafka.
- */
- def getSystemStreamMetadata(streams: java.util.Set[String], retryBackoff: ExponentialSleepStrategy) = {
- debug("Fetching system stream metadata for: %s" format streams)
- var metadataTTL = Long.MaxValue // Trust the cache until we get an exception
- retryBackoff.run(
- loop => {
- val metadata = TopicMetadataCache.getTopicMetadata(
- streams.asScala.toSet,
- systemName,
- getTopicMetadata,
- metadataTTL)
-
- debug("Got metadata for streams: %s" format metadata)
-
- val brokersToTopicPartitions = getTopicsAndPartitionsByBroker(metadata)
- var oldestOffsets = Map[SystemStreamPartition, String]()
- var newestOffsets = Map[SystemStreamPartition, String]()
- var upcomingOffsets = Map[SystemStreamPartition, String]()
-
- // Get oldest, newest, and upcoming offsets for each topic and partition.
- for ((broker, topicsAndPartitions) <- brokersToTopicPartitions) {
- debug("Fetching offsets for %s:%s: %s" format (broker.host, broker.port, topicsAndPartitions))
-
- val consumer = new SimpleConsumer(broker.host, broker.port, timeout, bufferSize, clientId)
- try {
- upcomingOffsets ++= getOffsets(consumer, topicsAndPartitions, OffsetRequest.LatestTime)
- oldestOffsets ++= getOffsets(consumer, topicsAndPartitions, OffsetRequest.EarliestTime)
-
- // Kafka's "latest" offset is always last message in stream's offset +
- // 1, so get newest message in stream by subtracting one. this is safe
- // even for key-deduplicated streams, since the last message will
- // never be deduplicated.
- newestOffsets = upcomingOffsets.mapValues(offset => (offset.toLong - 1).toString)
- // Keep only oldest/newest offsets where there is a message. Should
- // return null offsets for empty streams.
- upcomingOffsets.foreach {
- case (topicAndPartition, offset) =>
- if (offset.toLong <= 0) {
- debug("Stripping newest offsets for %s because the topic appears empty." format topicAndPartition)
- newestOffsets -= topicAndPartition
- debug("Setting oldest offset to 0 to consume from beginning")
- oldestOffsets += (topicAndPartition -> "0")
- }
- }
- } finally {
- consumer.close
- }
- }
-
- val result = assembleMetadata(oldestOffsets, newestOffsets, upcomingOffsets)
- loop.done
- result
- },
-
- (exception, loop) => {
- warn("Unable to fetch last offsets for streams %s due to %s. Retrying." format (streams, exception))
- debug("Exception detail:", exception)
- metadataTTL = 5000 // Revert to the default cache expiration
- }).getOrElse(throw new SamzaException("Failed to get system stream metadata"))
- }
-
- /**
- * Returns the newest offset for the specified SSP.
- * This method is fast and targeted. It minimizes the number of kafka requests.
- * It does not retry indefinitely if there is any failure.
- * It returns null if the topic is empty. To get the offsets for *all*
- * partitions, it would be more efficient to call getSystemStreamMetadata
- */
- override def getNewestOffset(ssp: SystemStreamPartition, maxRetries: Integer) = {
- debug("Fetching newest offset for: %s" format ssp)
- var offset: String = null
- var metadataTTL = Long.MaxValue // Trust the cache until we get an exception
- var retries = maxRetries
- new ExponentialSleepStrategy().run(
- loop => {
- val metadata = TopicMetadataCache.getTopicMetadata(
- Set(ssp.getStream),
- systemName,
- getTopicMetadata,
- metadataTTL)
- debug("Got metadata for streams: %s" format metadata)
-
- val brokersToTopicPartitions = getTopicsAndPartitionsByBroker(metadata)
- val topicAndPartition = new TopicAndPartition(ssp.getStream, ssp.getPartition.getPartitionId)
- val broker = brokersToTopicPartitions.filter((e) => e._2.contains(topicAndPartition)).head._1
-
- // Get oldest, newest, and upcoming offsets for each topic and partition.
- debug("Fetching offset for %s:%s: %s" format (broker.host, broker.port, topicAndPartition))
- val consumer = new SimpleConsumer(broker.host, broker.port, timeout, bufferSize, clientId)
- try {
- offset = getOffsets(consumer, Set(topicAndPartition), OffsetRequest.LatestTime).head._2
-
- // Kafka's "latest" offset is always last message in stream's offset +
- // 1, so get newest message in stream by subtracting one. this is safe
- // even for key-deduplicated streams, since the last message will
- // never be deduplicated.
- if (offset.toLong <= 0) {
- debug("Stripping newest offsets for %s because the topic appears empty." format topicAndPartition)
- offset = null
- } else {
- offset = (offset.toLong - 1).toString
- }
- } finally {
- consumer.close
- }
-
- debug("Got offset %s for %s." format(offset, ssp))
- loop.done
- },
-
- (exception, loop) => {
- if (retries > 0) {
- warn("Exception while trying to get offset for %s: %s. Retrying." format(ssp, exception))
- metadataTTL = 0L // Force metadata refresh
- retries -= 1
- } else {
- warn("Exception while trying to get offset for %s" format(ssp), exception)
- loop.done
- throw exception
- }
- })
-
- offset
- }
-
- /**
- * Helper method to use topic metadata cache when fetching metadata, so we
- * don't hammer Kafka more than we need to.
- */
- def getTopicMetadata(topics: Set[String]) = {
- new ClientUtilTopicMetadataStore(brokerListString, clientId, timeout)
- .getTopicInfo(topics)
- }
-
- /**
- * Break topic metadata topic/partitions into per-broker map so that we can
- * execute only one offset request per broker.
- */
- private def getTopicsAndPartitionsByBroker(metadata: Map[String, TopicMetadata]) = {
- val brokersToTopicPartitions = metadata
- .values
- // Convert the topic metadata to a Seq[(Broker, TopicAndPartition)]
- .flatMap(topicMetadata => {
- KafkaUtil.maybeThrowException(topicMetadata.error.exception())
- topicMetadata
- .partitionsMetadata
- // Convert Seq[PartitionMetadata] to Seq[(Broker, TopicAndPartition)]
- .map(partitionMetadata => {
- val topicAndPartition = new TopicAndPartition(topicMetadata.topic, partitionMetadata.partitionId)
- val leader = partitionMetadata
- .leader
- .getOrElse(throw new SamzaException("Need leaders for all partitions when fetching offsets. No leader available for TopicAndPartition: %s" format topicAndPartition))
- (leader, topicAndPartition)
- })
- })
-
- // Convert to a Map[Broker, Seq[(Broker, TopicAndPartition)]]
- .groupBy(_._1)
- // Convert to a Map[Broker, Set[TopicAndPartition]]
- .mapValues(_.map(_._2).toSet)
-
- debug("Got topic partition data for brokers: %s" format brokersToTopicPartitions)
-
- brokersToTopicPartitions
- }
-
- /**
- * Use a SimpleConsumer to fetch either the earliest or latest offset from
- * Kafka for each topic/partition in the topicsAndPartitions set. It is
- * assumed that all topics/partitions supplied reside on the broker that the
- * consumer is connected to.
- */
- private def getOffsets(consumer: SimpleConsumer, topicsAndPartitions: Set[TopicAndPartition], earliestOrLatest: Long) = {
- debug("Getting offsets for %s using earliest/latest value of %s." format (topicsAndPartitions, earliestOrLatest))
-
- var offsets = Map[SystemStreamPartition, String]()
- val partitionOffsetInfo = topicsAndPartitions
- .map(topicAndPartition => (topicAndPartition, PartitionOffsetRequestInfo(earliestOrLatest, 1)))
- .toMap
- val brokerOffsets = consumer
- .getOffsetsBefore(new OffsetRequest(partitionOffsetInfo))
- .partitionErrorAndOffsets
- .mapValues(partitionErrorAndOffset => {
- KafkaUtil.maybeThrowException(partitionErrorAndOffset.error.exception())
- partitionErrorAndOffset.offsets.head
- })
-
- for ((topicAndPartition, offset) <- brokerOffsets) {
- offsets += new SystemStreamPartition(systemName, topicAndPartition.topic, new Partition(topicAndPartition.partition)) -> offset.toString
- }
-
- debug("Got offsets for %s using earliest/latest value of %s: %s" format (topicsAndPartitions, earliestOrLatest, offsets))
-
- offsets
- }
-
- /**
- * @inheritdoc
- */
- override def createStream(spec: StreamSpec): Boolean = {
- info("Create topic %s in system %s" format (spec.getPhysicalName, systemName))
- val kSpec = toKafkaSpec(spec)
- var streamCreated = false
-
- new ExponentialSleepStrategy(initialDelayMs = 500).run(
- loop => {
- val zkClient = connectZk()
- try {
- AdminUtils.createTopic(
- zkClient,
- kSpec.getPhysicalName,
- kSpec.getPartitionCount,
- kSpec.getReplicationFactor,
- kSpec.getProperties)
- } finally {
- zkClient.close
- }
-
- streamCreated = true
- loop.done
- },
-
- (exception, loop) => {
- exception match {
- case e: TopicExistsException =>
- streamCreated = false
- loop.done
- case e: Exception =>
- warn("Failed to create topic %s: %s. Retrying." format (spec.getPhysicalName, e))
- debug("Exception detail:", e)
- }
- })
-
- streamCreated
- }
-
- /**
- * Converts a StreamSpec into a KafakStreamSpec. Special handling for coordinator and changelog stream.
- * @param spec a StreamSpec object
- * @return KafkaStreamSpec object
- */
- def toKafkaSpec(spec: StreamSpec): KafkaStreamSpec = {
- if (spec.isChangeLogStream) {
- val topicName = spec.getPhysicalName
- val topicMeta = topicMetaInformation.getOrElse(topicName, throw new StreamValidationException("Unable to find topic information for topic " + topicName))
- new KafkaStreamSpec(spec.getId, topicName, systemName, spec.getPartitionCount, topicMeta.replicationFactor,
- topicMeta.kafkaProps)
- } else if (spec.isCoordinatorStream){
- new KafkaStreamSpec(spec.getId, spec.getPhysicalName, systemName, 1, coordinatorStreamReplicationFactor,
- coordinatorStreamProperties)
- } else if (intermediateStreamProperties.contains(spec.getId)) {
- KafkaStreamSpec.fromSpec(spec).copyWithProperties(intermediateStreamProperties(spec.getId))
- } else {
- KafkaStreamSpec.fromSpec(spec)
- }
- }
-
- /**
- * @inheritdoc
- *
- * Validates a stream in Kafka. Should not be called before createStream(),
- * since ClientUtils.fetchTopicMetadata(), used by different Kafka clients,
- * is not read-only and will auto-create a new topic.
- */
- override def validateStream(spec: StreamSpec): Unit = {
- val topicName = spec.getPhysicalName
- info("Validating topic %s." format topicName)
-
- val retryBackoff: ExponentialSleepStrategy = new ExponentialSleepStrategy
- var metadataTTL = Long.MaxValue // Trust the cache until we get an exception
- retryBackoff.run(
- loop => {
- val metadataStore = new ClientUtilTopicMetadataStore(brokerListString, clientId, timeout)
- val topicMetadataMap = TopicMetadataCache.getTopicMetadata(Set(topicName), systemName, metadataStore.getTopicInfo, metadataTTL)
- val topicMetadata = topicMetadataMap(topicName)
- KafkaUtil.maybeThrowException(topicMetadata.error.exception())
-
- val partitionCount = topicMetadata.partitionsMetadata.length
- if (partitionCount != spec.getPartitionCount) {
- throw new StreamValidationException("Topic validation failed for topic %s because partition count %s did not match expected partition count of %d" format (topicName, topicMetadata.partitionsMetadata.length, spec.getPartitionCount))
- }
-
- info("Successfully validated topic %s." format topicName)
- loop.done
- },
-
- (exception, loop) => {
- exception match {
- case e: StreamValidationException => throw e
- case e: Exception =>
- warn("While trying to validate topic %s: %s. Retrying." format (topicName, e))
- debug("Exception detail:", e)
- metadataTTL = 5000L // Revert to the default value
- }
- })
- }
-
- /**
- * @inheritdoc
- *
- * Delete a stream in Kafka. Deleting topics works only when the broker is configured with "delete.topic.enable=true".
- * Otherwise it's a no-op.
- */
- override def clearStream(spec: StreamSpec): Boolean = {
- info("Delete topic %s in system %s" format (spec.getPhysicalName, systemName))
- val kSpec = KafkaStreamSpec.fromSpec(spec)
- var retries = CLEAR_STREAM_RETRIES
- new ExponentialSleepStrategy().run(
- loop => {
- val zkClient = connectZk()
- try {
- AdminUtils.deleteTopic(
- zkClient,
- kSpec.getPhysicalName)
- } finally {
- zkClient.close
- }
-
- loop.done
- },
-
- (exception, loop) => {
- if (retries > 0) {
- warn("Exception while trying to delete topic %s: %s. Retrying." format (spec.getPhysicalName, exception))
- retries -= 1
- } else {
- warn("Fail to delete topic %s: %s" format (spec.getPhysicalName, exception))
- loop.done
- throw exception
- }
- })
-
- val topicMetadata = getTopicMetadata(Set(kSpec.getPhysicalName)).get(kSpec.getPhysicalName).get
- topicMetadata.partitionsMetadata.isEmpty
- }
-
- /**
- * @inheritdoc
- *
- * Delete records up to (and including) the provided ssp offsets for all system stream partitions specified in the map
- * This only works with Kafka cluster 0.11 or later. Otherwise it's a no-op.
- */
- override def deleteMessages(offsets: util.Map[SystemStreamPartition, String]) {
- if (!running) {
- throw new SamzaException(s"KafkaSystemAdmin has not started yet for system $systemName")
- }
- if (deleteCommittedMessages) {
- val nextOffsets = offsets.asScala.toSeq.map { case (systemStreamPartition, offset) =>
- (new TopicPartition(systemStreamPartition.getStream, systemStreamPartition.getPartition.getPartitionId), offset.toLong + 1)
- }.toMap
- adminClient.deleteRecordsBefore(nextOffsets)
- deleteMessagesCalled = true
- }
- }
-
- /**
- * Compare the two offsets. Returns x where x < 0 if offset1 < offset2;
- * x == 0 if offset1 == offset2; x > 0 if offset1 > offset2.
- *
- * Currently it's used in the context of the broadcast streams to detect
- * the mismatch between two streams when consuming the broadcast streams.
- */
- override def offsetComparator(offset1: String, offset2: String): Integer = {
- offset1.toLong compare offset2.toLong
- }
-}
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemAdminUtilsScala.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemAdminUtilsScala.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemAdminUtilsScala.scala
new file mode 100644
index 0000000..6ff2b50
--- /dev/null
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemAdminUtilsScala.scala
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.samza.system.kafka
+
+import java.util
+import java.util.Properties
+
+import kafka.admin.{AdminClient, AdminUtils}
+import kafka.utils.{Logging, ZkUtils}
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.errors.TopicExistsException
+import org.apache.samza.config.ApplicationConfig.ApplicationMode
+import org.apache.samza.config.{ApplicationConfig, Config, KafkaConfig, StreamConfig}
+import org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata
+import org.apache.samza.system.{StreamSpec, SystemStreamMetadata, SystemStreamPartition}
+import org.apache.samza.util.ExponentialSleepStrategy
+import org.slf4j.{Logger, LoggerFactory}
+
+import scala.collection.JavaConverters._
+
+/**
+ * A helper class that is used to construct the changelog stream specific information
+ *
+ * @param replicationFactor The number of replicas for the changelog stream
+ * @param kafkaProps The kafka specific properties that need to be used for changelog stream creation
+ */
+case class ChangelogInfo(var replicationFactor: Int, var kafkaProps: Properties)
+
+
+// TODO move to org.apache.kafka.clients.admin.AdminClien from the kafka.admin.AdminClient
+object KafkaSystemAdminUtilsScala extends Logging {
+
+ val CLEAR_STREAM_RETRIES = 3
+ val CREATE_STREAM_RETRIES = 10
+
+ /**
+ * @inheritdoc
+ *
+ * Delete a stream in Kafka. Deleting topics works only when the broker is configured with "delete.topic.enable=true".
+ * Otherwise it's a no-op.
+ */
+ def clearStream(spec: StreamSpec, connectZk: java.util.function.Supplier[ZkUtils]): Unit = {
+ info("Deleting topic %s for system %s" format(spec.getPhysicalName, spec.getSystemName))
+ val kSpec = KafkaStreamSpec.fromSpec(spec)
+ var retries = CLEAR_STREAM_RETRIES
+ new ExponentialSleepStrategy().run(
+ loop => {
+ val zkClient = connectZk.get()
+ try {
+ AdminUtils.deleteTopic(
+ zkClient,
+ kSpec.getPhysicalName)
+ } finally {
+ zkClient.close
+ }
+
+ loop.done
+ },
+
+ (exception, loop) => {
+ if (retries > 0) {
+ warn("Exception while trying to delete topic %s. Retrying." format (spec.getPhysicalName), exception)
+ retries -= 1
+ } else {
+ warn("Fail to delete topic %s." format (spec.getPhysicalName), exception)
+ loop.done
+ throw exception
+ }
+ })
+ }
+
+
+ def createStream(kSpec: KafkaStreamSpec, connectZk: java.util.function.Supplier[ZkUtils]): Boolean = {
+ info("Creating topic %s for system %s" format(kSpec.getPhysicalName, kSpec.getSystemName))
+ var streamCreated = false
+ var retries = CREATE_STREAM_RETRIES
+
+ new ExponentialSleepStrategy(initialDelayMs = 500).run(
+ loop => {
+ val zkClient = connectZk.get()
+ try {
+ AdminUtils.createTopic(
+ zkClient,
+ kSpec.getPhysicalName,
+ kSpec.getPartitionCount,
+ kSpec.getReplicationFactor,
+ kSpec.getProperties)
+ } finally {
+ zkClient.close
+ }
+
+ streamCreated = true
+ loop.done
+ },
+
+ (exception, loop) => {
+ exception match {
+ case e: TopicExistsException =>
+ streamCreated = false
+ loop.done
+ case e: Exception =>
+ if (retries > 0) {
+ warn("Failed to create topic %s. Retrying." format (kSpec.getPhysicalName), exception)
+ retries -= 1
+ } else {
+ error("Failed to create topic %s. Bailing out." format (kSpec.getPhysicalName), exception)
+ throw exception
+ }
+ }
+ })
+
+ streamCreated
+ }
+
+ /**
+ * A helper method that takes oldest, newest, and upcoming offsets for each
+ * system stream partition, and creates a single map from stream name to
+ * SystemStreamMetadata.
+ */
+ def assembleMetadata(oldestOffsets: Map[SystemStreamPartition, String], newestOffsets: Map[SystemStreamPartition, String], upcomingOffsets: Map[SystemStreamPartition, String]): Map[String, SystemStreamMetadata] = {
+ val allMetadata = (oldestOffsets.keySet ++ newestOffsets.keySet ++ upcomingOffsets.keySet)
+ .groupBy(_.getStream)
+ .map {
+ case (streamName, systemStreamPartitions) =>
+ val streamPartitionMetadata = systemStreamPartitions
+ .map(systemStreamPartition => {
+ val partitionMetadata = new SystemStreamPartitionMetadata(
+ // If the topic/partition is empty then oldest and newest will
+ // be stripped of their offsets, so default to null.
+ oldestOffsets.getOrElse(systemStreamPartition, null),
+ newestOffsets.getOrElse(systemStreamPartition, null),
+ upcomingOffsets(systemStreamPartition))
+ (systemStreamPartition.getPartition, partitionMetadata)
+ })
+ .toMap
+ val streamMetadata = new SystemStreamMetadata(streamName, streamPartitionMetadata.asJava)
+ (streamName, streamMetadata)
+ }
+ .toMap
+
+ // This is typically printed downstream and it can be spammy, so debug level here.
+ debug("Got metadata: %s" format allMetadata)
+
+ allMetadata
+ }
+
+ def getCoordinatorTopicProperties(config: KafkaConfig) = {
+ val segmentBytes = config.getCoordinatorSegmentBytes
+ (new Properties /: Map(
+ "cleanup.policy" -> "compact",
+ "segment.bytes" -> segmentBytes)) { case (props, (k, v)) => props.put(k, v); props }
+ }
+
+ def getIntermediateStreamProperties(config: Config): Map[String, Properties] = {
+ val appConfig = new ApplicationConfig(config)
+ if (appConfig.getAppMode == ApplicationMode.BATCH) {
+ val streamConfig = new StreamConfig(config)
+ streamConfig.getStreamIds().filter(streamConfig.getIsIntermediateStream(_)).map(streamId => {
+ val properties = new Properties()
+ properties.putAll(streamConfig.getStreamProperties(streamId))
+ properties.putIfAbsent("retention.ms", String.valueOf(KafkaConfig.DEFAULT_RETENTION_MS_FOR_BATCH))
+ (streamId, properties)
+ }).toMap
+ } else {
+ Map()
+ }
+ }
+
+ def deleteMessages(adminClient : AdminClient, offsets: util.Map[SystemStreamPartition, String]) = {
+ val nextOffsets = offsets.asScala.toSeq.map { case (systemStreamPartition, offset) =>
+ (new TopicPartition(systemStreamPartition.getStream, systemStreamPartition.getPartition.getPartitionId), offset.toLong + 1)
+ }.toMap
+ adminClient.deleteRecordsBefore(nextOffsets);
+ }
+}
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
deleted file mode 100644
index 10ce274..0000000
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
+++ /dev/null
@@ -1,371 +0,0 @@
-
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- */
-
-package org.apache.samza.system.kafka;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import kafka.common.TopicAndPartition;
-import org.apache.kafka.clients.consumer.Consumer;
-import org.apache.kafka.clients.consumer.KafkaConsumer;
-import org.apache.kafka.common.TopicPartition;
-import org.apache.samza.SamzaException;
-import org.apache.samza.config.Config;
-import org.apache.samza.config.KafkaConfig;
-import org.apache.samza.config.KafkaConsumerConfig;
-import org.apache.samza.system.IncomingMessageEnvelope;
-import org.apache.samza.system.SystemConsumer;
-import org.apache.samza.system.SystemStreamPartition;
-import org.apache.samza.util.BlockingEnvelopeMap;
-import org.apache.samza.util.Clock;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import scala.Option;
-
-
-public class KafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements SystemConsumer {
-
- private static final Logger LOG = LoggerFactory.getLogger(KafkaSystemConsumer.class);
-
- private static final long FETCH_THRESHOLD = 50000;
- private static final long FETCH_THRESHOLD_BYTES = -1L;
-
- private final Consumer<K, V> kafkaConsumer;
- private final String systemName;
- private final String clientId;
- private final AtomicBoolean stopped = new AtomicBoolean(false);
- private final AtomicBoolean started = new AtomicBoolean(false);
- private final Config config;
- private final boolean fetchThresholdBytesEnabled;
- private final KafkaSystemConsumerMetrics metrics;
-
- // This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
- final KafkaConsumerMessageSink messageSink;
-
- // This proxy contains a separate thread, which reads kafka messages (with consumer.poll()) and populates
- // BlockingEnvelopMap's buffers.
- final private KafkaConsumerProxy proxy;
-
- // keep registration data until the start - mapping between registered SSPs and topicPartitions, and their offsets
- final Map<TopicPartition, String> topicPartitionsToOffset = new HashMap<>();
- final Map<TopicPartition, SystemStreamPartition> topicPartitionsToSSP = new HashMap<>();
-
- long perPartitionFetchThreshold;
- long perPartitionFetchThresholdBytes;
-
- /**
- * Create a KafkaSystemConsumer for the provided {@code systemName}
- * @param systemName system name for which we create the consumer
- * @param config application config
- * @param metrics metrics for this KafkaSystemConsumer
- * @param clock system clock
- */
- public KafkaSystemConsumer(Consumer<K, V> kafkaConsumer, String systemName, Config config, String clientId,
- KafkaSystemConsumerMetrics metrics, Clock clock) {
-
- super(metrics.registry(), clock, metrics.getClass().getName());
-
- this.kafkaConsumer = kafkaConsumer;
- this.clientId = clientId;
- this.systemName = systemName;
- this.config = config;
- this.metrics = metrics;
-
- fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName);
-
- // create a sink for passing the messages between the proxy and the consumer
- messageSink = new KafkaConsumerMessageSink();
-
- // Create the proxy to do the actual message reading.
- String metricName = String.format("%s", systemName);
- proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, metrics, metricName);
- LOG.info("{}: Created KafkaConsumerProxy {} ", this, proxy);
- }
-
- /**
- * Create internal kafka consumer object, which will be used in the Proxy.
- * @param systemName system name for which we create the consumer
- * @param clientId client id to use int the kafka client
- * @param config config
- * @return kafka consumer object
- */
- public static KafkaConsumer<byte[], byte[]> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
-
- // extract kafka client configs
- KafkaConsumerConfig consumerConfig = KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId);
-
- LOG.info("{}: KafkaClient properties {}", systemName, consumerConfig);
-
- return new KafkaConsumer(consumerConfig);
- }
-
- @Override
- public void start() {
- if (!started.compareAndSet(false, true)) {
- LOG.warn("{}: Attempting to start the consumer for the second (or more) time.", this);
- return;
- }
- if (stopped.get()) {
- LOG.error("{}: Attempting to start a stopped consumer", this);
- return;
- }
- // initialize the subscriptions for all the registered TopicPartitions
- startSubscription();
- // needs to be called after all the registrations are completed
- setFetchThresholds();
-
- startConsumer();
- LOG.info("{}: Consumer started", this);
- }
-
- private void startSubscription() {
- //subscribe to all the registered TopicPartitions
- LOG.info("{}: Consumer subscribes to {}", this, topicPartitionsToSSP.keySet());
- try {
- synchronized (kafkaConsumer) {
- // we are using assign (and not subscribe), so we need to specify both topic and partition
- kafkaConsumer.assign(topicPartitionsToSSP.keySet());
- }
- } catch (Exception e) {
- throw new SamzaException("Consumer subscription failed for " + this, e);
- }
- }
-
- /**
- * Set the offsets to start from.
- * Register the TopicPartitions with the proxy.
- * Start the proxy.
- */
- void startConsumer() {
- // set the offset for each TopicPartition
- if (topicPartitionsToOffset.size() <= 0) {
- LOG.error ("{}: Consumer is not subscribed to any SSPs", this);
- }
-
- topicPartitionsToOffset.forEach((tp, startingOffsetString) -> {
- long startingOffset = Long.valueOf(startingOffsetString);
-
- try {
- synchronized (kafkaConsumer) {
- kafkaConsumer.seek(tp, startingOffset); // this value should already be the 'upcoming' value
- }
- } catch (Exception e) {
- // all recoverable execptions are handled by the client.
- // if we get here there is nothing left to do but bail out.
- String msg =
- String.format("%s: Got Exception while seeking to %s for partition %s", this, startingOffsetString, tp);
- LOG.error(msg, e);
- throw new SamzaException(msg, e);
- }
-
- LOG.info("{}: Changing consumer's starting offset for tp = %s to %s", this, tp, startingOffsetString);
-
- // add the partition to the proxy
- proxy.addTopicPartition(topicPartitionsToSSP.get(tp), startingOffset);
- });
-
- // start the proxy thread
- if (proxy != null && !proxy.isRunning()) {
- LOG.info("{}: Starting proxy {}", this, proxy);
- proxy.start();
- }
- }
-
- private void setFetchThresholds() {
- // get the thresholds, and set defaults if not defined.
- KafkaConfig kafkaConfig = new KafkaConfig(config);
-
- Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName);
- long fetchThreshold = FETCH_THRESHOLD;
- if (fetchThresholdOption.isDefined()) {
- fetchThreshold = Long.valueOf(fetchThresholdOption.get());
- }
-
- Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName);
- long fetchThresholdBytes = FETCH_THRESHOLD_BYTES;
- if (fetchThresholdBytesOption.isDefined()) {
- fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get());
- }
-
- int numPartitions = topicPartitionsToSSP.size();
- if (numPartitions != topicPartitionsToOffset.size()) {
- throw new SamzaException("topicPartitionsToSSP.size() doesn't match topicPartitionsToOffset.size()");
- }
-
-
- if (numPartitions > 0) {
- perPartitionFetchThreshold = fetchThreshold / numPartitions;
- if (fetchThresholdBytesEnabled) {
- // currently this feature cannot be enabled, because we do not have the size of the messages available.
- // messages get double buffered, hence divide by 2
- perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / numPartitions;
- }
- }
- LOG.info("{}: fetchThresholdBytes = {}; fetchThreshold={}; numPartitions={}, perPartitionFetchThreshold={}, perPartitionFetchThresholdBytes(0 if disabled)={}",
- this, fetchThresholdBytes, fetchThreshold, numPartitions, perPartitionFetchThreshold, perPartitionFetchThresholdBytes);
- }
-
- @Override
- public void stop() {
- if (!stopped.compareAndSet(false, true)) {
- LOG.warn("{}: Attempting to stop stopped consumer.", this);
- return;
- }
-
- LOG.info("{}: Stopping Samza kafkaConsumer ", this);
-
- // stop the proxy (with 1 minute timeout)
- if (proxy != null) {
- LOG.info("{}: Stopping proxy {}", this, proxy);
- proxy.stop(TimeUnit.SECONDS.toMillis(60));
- }
-
- try {
- synchronized (kafkaConsumer) {
- LOG.info("{}: Closing kafkaSystemConsumer {}", this, kafkaConsumer);
- kafkaConsumer.close();
- }
- } catch (Exception e) {
- LOG.warn("{}: Failed to stop KafkaSystemConsumer.", this, e);
- }
- }
-
- /**
- * record the ssp and the offset. Do not submit it to the consumer yet.
- * @param systemStreamPartition ssp to register
- * @param offset offset to register with
- */
- @Override
- public void register(SystemStreamPartition systemStreamPartition, String offset) {
- if (started.get()) {
- String msg = String.format("%s: Trying to register partition after consumer has been started. ssp=%s", this,
- systemStreamPartition);
- throw new SamzaException(msg);
- }
-
- if (!systemStreamPartition.getSystem().equals(systemName)) {
- LOG.warn("{}: ignoring SSP {}, because this consumer's system doesn't match.", this, systemStreamPartition);
- return;
- }
- LOG.info("{}: Registering ssp = {} with offset {}", this, systemStreamPartition, offset);
-
- super.register(systemStreamPartition, offset);
-
- TopicPartition tp = toTopicPartition(systemStreamPartition);
-
- topicPartitionsToSSP.put(tp, systemStreamPartition);
-
- String existingOffset = topicPartitionsToOffset.get(tp);
- // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages.
- if (existingOffset == null || compareOffsets(existingOffset, offset) > 0) {
- topicPartitionsToOffset.put(tp, offset);
- }
-
- metrics.registerTopicAndPartition(toTopicAndPartition(tp));
- }
-
- /**
- * Compare two String offsets.
- * Note. There is a method in KafkaSystemAdmin that does that, but that would require instantiation of systemadmin for each consumer.
- * @return see {@link Long#compareTo(Long)}
- */
- private static int compareOffsets(String offset1, String offset2) {
- return Long.valueOf(offset1).compareTo(Long.valueOf(offset2));
- }
-
- @Override
- public String toString() {
- return String.format("%s:%s", systemName, clientId);
- }
-
- @Override
- public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
- Set<SystemStreamPartition> systemStreamPartitions, long timeout) throws InterruptedException {
-
- // check if the proxy is running
- if (!proxy.isRunning()) {
- stop();
- String message = String.format("%s: KafkaConsumerProxy has stopped.", this);
- throw new SamzaException(message, proxy.getFailureCause());
- }
-
- return super.poll(systemStreamPartitions, timeout);
- }
-
- /**
- * convert from TopicPartition to TopicAndPartition
- */
- public static TopicAndPartition toTopicAndPartition(TopicPartition tp) {
- return new TopicAndPartition(tp.topic(), tp.partition());
- }
-
- /**
- * convert to TopicPartition from SystemStreamPartition
- */
- public static TopicPartition toTopicPartition(SystemStreamPartition ssp) {
- return new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
- }
-
- /**
- * return system name for this consumer
- * @return system name
- */
- public String getSystemName() {
- return systemName;
- }
-
- public class KafkaConsumerMessageSink {
-
- public void setIsAtHighWatermark(SystemStreamPartition ssp, boolean isAtHighWatermark) {
- setIsAtHead(ssp, isAtHighWatermark);
- }
-
- boolean needsMoreMessages(SystemStreamPartition ssp) {
- LOG.debug("{}: needsMoreMessages from following SSP: {}. fetchLimitByBytes enabled={}; messagesSizeInQueue={};"
- + "(limit={}); messagesNumInQueue={}(limit={};", this, ssp, fetchThresholdBytesEnabled,
- getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes, getNumMessagesInQueue(ssp),
- perPartitionFetchThreshold);
-
- if (fetchThresholdBytesEnabled) {
- return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes;
- } else {
- return getNumMessagesInQueue(ssp) < perPartitionFetchThreshold;
- }
- }
-
- void addMessage(SystemStreamPartition ssp, IncomingMessageEnvelope envelope) {
- LOG.trace("{}: Incoming message ssp = {}: envelope = {}.", this, ssp, envelope);
-
- try {
- put(ssp, envelope);
- } catch (InterruptedException e) {
- throw new SamzaException(
- String.format("%s: Consumer was interrupted while trying to add message with offset %s for ssp %s", this,
- envelope.getOffset(), ssp));
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index ba5390b..f314f92 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -21,13 +21,11 @@ package org.apache.samza.system.kafka
import java.util.Properties
-import kafka.utils.ZkUtils
+import com.google.common.annotations.VisibleForTesting
import org.apache.kafka.clients.producer.KafkaProducer
-import org.apache.samza.SamzaException
import org.apache.samza.config.ApplicationConfig.ApplicationMode
import org.apache.samza.config.KafkaConfig.Config2Kafka
import org.apache.samza.config.StorageConfig._
-import org.apache.samza.config.SystemConfig.Config2System
import org.apache.samza.config.TaskConfig.Config2Task
import org.apache.samza.config._
import org.apache.samza.metrics.MetricsRegistry
@@ -35,32 +33,40 @@ import org.apache.samza.system.{SystemAdmin, SystemConsumer, SystemFactory, Syst
import org.apache.samza.util._
object KafkaSystemFactory extends Logging {
+ @VisibleForTesting
def getInjectedProducerProperties(systemName: String, config: Config) = if (config.isChangelogSystem(systemName)) {
warn("System name '%s' is being used as a changelog. Disabling compression since Kafka does not support compression for log compacted topics." format systemName)
Map[String, String]("compression.type" -> "none")
} else {
Map[String, String]()
}
+
+ val CLIENTID_PRODUCER_PREFIX = "kafka-producer"
+ val CLIENTID_CONSUMER_PREFIX = "kafka-consumer"
+ val CLIENTID_ADMIN_PREFIX = "kafka-admin-consumer"
}
class KafkaSystemFactory extends SystemFactory with Logging {
def getConsumer(systemName: String, config: Config, registry: MetricsRegistry): SystemConsumer = {
- val clientId = KafkaConsumerConfig.getConsumerClientId( config)
val metrics = new KafkaSystemConsumerMetrics(systemName, registry)
- val kafkaConsumer = KafkaSystemConsumer.getKafkaConsumerImpl(systemName, clientId, config)
+ val clientId = KafkaConsumerConfig.createClientId(KafkaSystemFactory.CLIENTID_CONSUMER_PREFIX, config);
+ val kafkaConsumerConfig = KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId);
+
+ val kafkaConsumer = KafkaSystemConsumer.createKafkaConsumerImpl[Array[Byte], Array[Byte]](systemName, kafkaConsumerConfig)
info("Created kafka consumer for system %s, clientId %s: %s" format (systemName, clientId, kafkaConsumer))
- val kafkaSystemConsumer = new KafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, new SystemClock)
- info("Created samza system consumer %s" format (kafkaSystemConsumer.toString))
+ val kafkaSystemConsumer = new KafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics,
+ new SystemClock)
+ info("Created samza system consumer for system %s, config %s: %s" format(systemName, config, kafkaSystemConsumer))
kafkaSystemConsumer
}
def getProducer(systemName: String, config: Config, registry: MetricsRegistry): SystemProducer = {
- val clientId = KafkaConsumerConfig.getProducerClientId(config)
val injectedProps = KafkaSystemFactory.getInjectedProducerProperties(systemName, config)
+ val clientId = KafkaConsumerConfig.createClientId(KafkaSystemFactory.CLIENTID_PRODUCER_PREFIX, config);
val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId, injectedProps)
val getProducer = () => {
new KafkaProducer[Array[Byte], Array[Byte]](producerConfig.getProducerProperties)
@@ -70,6 +76,7 @@ class KafkaSystemFactory extends SystemFactory with Logging {
// Unlike consumer, no need to use encoders here, since they come for free
// inside the producer configs. Kafka's producer will handle all of this
// for us.
+ info("Creating kafka producer for system %s, producerClientId %s" format(systemName, clientId))
new KafkaSystemProducer(
systemName,
@@ -80,43 +87,11 @@ class KafkaSystemFactory extends SystemFactory with Logging {
}
def getAdmin(systemName: String, config: Config): SystemAdmin = {
- val clientId = KafkaConsumerConfig.getAdminClientId(config)
- val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId)
- val bootstrapServers = producerConfig.bootsrapServers
- val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)
- val timeout = consumerConfig.socketTimeoutMs
- val bufferSize = consumerConfig.socketReceiveBufferBytes
- val zkConnect = Option(consumerConfig.zkConnect)
- .getOrElse(throw new SamzaException("no zookeeper.connect defined in config"))
- val connectZk = () => {
- ZkUtils(zkConnect, 6000, 6000, false)
- }
- val coordinatorStreamProperties = getCoordinatorTopicProperties(config)
- val coordinatorStreamReplicationFactor = config.getCoordinatorReplicationFactor.toInt
- val storeToChangelog = config.getKafkaChangelogEnabledStores()
- // Construct the meta information for each topic, if the replication factor is not defined, we use 2 as the number of replicas for the change log stream.
- val topicMetaInformation = storeToChangelog.map { case (storeName, topicName) => {
- val replicationFactor = config.getChangelogStreamReplicationFactor(storeName).toInt
- val changelogInfo = ChangelogInfo(replicationFactor, config.getChangelogKafkaProperties(storeName))
- info("Creating topic meta information for topic: %s with replication factor: %s" format(topicName, replicationFactor))
- (topicName, changelogInfo)
- }
- }
+ // extract kafka client configs
+ val clientId = KafkaConsumerConfig.createClientId(KafkaSystemFactory.CLIENTID_ADMIN_PREFIX, config);
+ val consumerConfig = KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId)
- val deleteCommittedMessages = config.deleteCommittedMessages(systemName).exists(isEnabled => isEnabled.toBoolean)
- val intermediateStreamProperties: Map[String, Properties] = getIntermediateStreamProperties(config)
- new KafkaSystemAdmin(
- systemName,
- bootstrapServers,
- connectZk,
- coordinatorStreamProperties,
- coordinatorStreamReplicationFactor,
- timeout,
- bufferSize,
- clientId,
- topicMetaInformation,
- intermediateStreamProperties,
- deleteCommittedMessages)
+ new KafkaSystemAdmin(systemName, config, KafkaSystemConsumer.createKafkaConsumerImpl(systemName, consumerConfig))
}
def getCoordinatorTopicProperties(config: Config) = {
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/main/scala/org/apache/samza/util/KafkaUtil.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/util/KafkaUtil.scala b/samza-kafka/src/main/scala/org/apache/samza/util/KafkaUtil.scala
index 2d09301..90dfff3 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/util/KafkaUtil.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/util/KafkaUtil.scala
@@ -37,17 +37,6 @@ object KafkaUtil extends Logging {
val CHECKPOINT_LOG_VERSION_NUMBER = 1
val counter = new AtomicLong(0)
- def getClientId(id: String, config: Config): String = getClientId(
- id,
- config.getName.getOrElse(throw new ConfigException("Missing job name.")),
- config.getJobId)
-
- def getClientId(id: String, jobName: String, jobId: String): String =
- "%s-%s-%s" format
- (id.replaceAll("[^A-Za-z0-9]", "_"),
- jobName.replaceAll("[^A-Za-z0-9]", "_"),
- jobId.replaceAll("[^A-Za-z0-9]", "_"))
-
private def abs(n: Int) = if (n == Integer.MIN_VALUE) 0 else math.abs(n)
def getIntegerPartitionKey(envelope: OutgoingMessageEnvelope, partitions: java.util.List[PartitionInfo]): Integer = {
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/test/java/org/apache/samza/config/TestKafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/java/org/apache/samza/config/TestKafkaConsumerConfig.java b/samza-kafka/src/test/java/org/apache/samza/config/TestKafkaConsumerConfig.java
index de5d093..62f6269 100644
--- a/samza-kafka/src/test/java/org/apache/samza/config/TestKafkaConsumerConfig.java
+++ b/samza-kafka/src/test/java/org/apache/samza/config/TestKafkaConsumerConfig.java
@@ -30,9 +30,11 @@ import org.junit.Test;
public class TestKafkaConsumerConfig {
public final static String SYSTEM_NAME = "testSystem";
+ public final static String JOB_NAME = "jobName";
+ public final static String JOB_ID = "jobId";
public final static String KAFKA_PRODUCER_PROPERTY_PREFIX = "systems." + SYSTEM_NAME + ".producer.";
public final static String KAFKA_CONSUMER_PROPERTY_PREFIX = "systems." + SYSTEM_NAME + ".consumer.";
- private final static String CLIENT_ID = "clientId";
+ private final static String CLIENT_ID_PREFIX = "consumer-client";
@Test
public void testDefaults() {
@@ -44,15 +46,16 @@ public class TestKafkaConsumerConfig {
props.put(KAFKA_CONSUMER_PROPERTY_PREFIX + ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,
"100"); // should NOT be ignored
- props.put(JobConfig.JOB_NAME(), "jobName");
+ props.put(JobConfig.JOB_NAME(), JOB_NAME);
// if KAFKA_CONSUMER_PROPERTY_PREFIX is set, then PRODUCER should be ignored
props.put(KAFKA_PRODUCER_PROPERTY_PREFIX + "bootstrap.servers", "ignroeThis:9092");
props.put(KAFKA_CONSUMER_PROPERTY_PREFIX + "bootstrap.servers", "useThis:9092");
Config config = new MapConfig(props);
+ String clientId = KafkaConsumerConfig.createClientId(CLIENT_ID_PREFIX, config);
KafkaConsumerConfig kafkaConsumerConfig =
- KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, SYSTEM_NAME, CLIENT_ID);
+ KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, SYSTEM_NAME, clientId);
Assert.assertEquals("false", kafkaConsumerConfig.get(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG));
@@ -71,21 +74,34 @@ public class TestKafkaConsumerConfig {
Assert.assertEquals(ByteArrayDeserializer.class.getName(),
kafkaConsumerConfig.get(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG));
- Assert.assertEquals(CLIENT_ID, kafkaConsumerConfig.get(ConsumerConfig.CLIENT_ID_CONFIG));
+ // validate group and client id generation
+ Assert.assertEquals(CLIENT_ID_PREFIX.replace("-", "_") + "-" + JOB_NAME + "-" + "1",
+ kafkaConsumerConfig.get(ConsumerConfig.CLIENT_ID_CONFIG));
+
+ Assert.assertEquals(CLIENT_ID_PREFIX.replace("-", "_") + "-jobName-1",
+ KafkaConsumerConfig.createClientId(CLIENT_ID_PREFIX, config));
+
+ Assert.assertEquals("jobName-1", KafkaConsumerConfig.createConsumerGroupId(config));
+
+ // validate setting of group and client id
+ Assert.assertEquals(KafkaConsumerConfig.createConsumerGroupId(config),
+ kafkaConsumerConfig.get(ConsumerConfig.GROUP_ID_CONFIG));
- Assert.assertEquals(KafkaConsumerConfig.getConsumerGroupId(config),
+ Assert.assertEquals(KafkaConsumerConfig.createConsumerGroupId(config),
kafkaConsumerConfig.get(ConsumerConfig.GROUP_ID_CONFIG));
- Assert.assertEquals(KafkaConsumerConfig.CONSUMER_CLIENT_ID_PREFIX.replace("-", "_") + "-jobName-1",
- KafkaConsumerConfig.getConsumerClientId(config));
- Assert.assertEquals("jobName-1", KafkaConsumerConfig.getConsumerGroupId(config));
- props.put(JobConfig.JOB_ID(), "jobId");
+ Assert.assertEquals(KafkaConsumerConfig.createClientId(CLIENT_ID_PREFIX, config),
+ kafkaConsumerConfig.get(ConsumerConfig.CLIENT_ID_CONFIG));
+
+ // with non-default job id
+ props.put(JobConfig.JOB_ID(), JOB_ID);
config = new MapConfig(props);
+ Assert.assertEquals(CLIENT_ID_PREFIX.replace("-", "_") + "-jobName-jobId",
+ kafkaConsumerConfig.createClientId(CLIENT_ID_PREFIX, config));
+
+ Assert.assertEquals("jobName-jobId", KafkaConsumerConfig.createConsumerGroupId(config));
- Assert.assertEquals(KafkaConsumerConfig.CONSUMER_CLIENT_ID_PREFIX.replace("-", "_") + "-jobName-jobId",
- KafkaConsumerConfig.getConsumerClientId(config));
- Assert.assertEquals("jobName-jobId", KafkaConsumerConfig.getConsumerGroupId(config));
}
// test stuff that should not be overridden
@@ -103,8 +119,9 @@ public class TestKafkaConsumerConfig {
props.put(JobConfig.JOB_NAME(), "jobName");
Config config = new MapConfig(props);
+ String clientId = KafkaConsumerConfig.createClientId(CLIENT_ID_PREFIX, config);
KafkaConsumerConfig kafkaConsumerConfig =
- KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, SYSTEM_NAME, CLIENT_ID);
+ KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, SYSTEM_NAME, clientId);
Assert.assertEquals("useThis:9092", kafkaConsumerConfig.get(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
@@ -121,29 +138,30 @@ public class TestKafkaConsumerConfig {
map.put(JobConfig.JOB_NAME(), "jobName");
map.put(JobConfig.JOB_ID(), "jobId");
- String result = KafkaConsumerConfig.getConsumerClientId("consumer", new MapConfig(map));
+
+ String result = KafkaConsumerConfig.createClientId("consumer", new MapConfig(map));
Assert.assertEquals("consumer-jobName-jobId", result);
- result = KafkaConsumerConfig.getConsumerClientId("consumer-", new MapConfig(map));
+ result = KafkaConsumerConfig.createClientId("consumer-", new MapConfig(map));
Assert.assertEquals("consumer_-jobName-jobId", result);
- result = KafkaConsumerConfig.getConsumerClientId("super-duper-consumer", new MapConfig(map));
+ result = KafkaConsumerConfig.createClientId("super-duper-consumer", new MapConfig(map));
Assert.assertEquals("super_duper_consumer-jobName-jobId", result);
map.put(JobConfig.JOB_NAME(), " very important!job");
- result = KafkaConsumerConfig.getConsumerClientId("consumer", new MapConfig(map));
+ result = KafkaConsumerConfig.createClientId("consumer", new MapConfig(map));
Assert.assertEquals("consumer-_very_important_job-jobId", result);
map.put(JobConfig.JOB_ID(), "number-#3");
- result = KafkaConsumerConfig.getConsumerClientId("consumer", new MapConfig(map));
+ result = KafkaConsumerConfig.createClientId("consumer", new MapConfig(map));
Assert.assertEquals("consumer-_very_important_job-number__3", result);
}
@Test(expected = SamzaException.class)
public void testNoBootstrapServers() {
- KafkaConsumerConfig kafkaConsumerConfig =
- KafkaConsumerConfig.getKafkaSystemConsumerConfig(new MapConfig(Collections.emptyMap()), SYSTEM_NAME,
- "clientId");
+ Config config = new MapConfig(Collections.emptyMap());
+ String clientId = KafkaConsumerConfig.createClientId("clientId", config);
+ KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, SYSTEM_NAME, clientId);
Assert.fail("didn't get exception for the missing config:" + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG);
}
http://git-wip-us.apache.org/repos/asf/samza/blob/63d33fa0/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemAdminJava.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemAdminJava.java b/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemAdminJava.java
index 7e968bf..27601b0 100644
--- a/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemAdminJava.java
+++ b/samza-kafka/src/test/java/org/apache/samza/system/kafka/TestKafkaSystemAdminJava.java
@@ -19,14 +19,22 @@
package org.apache.samza.system.kafka;
+import com.google.common.collect.ImmutableSet;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
-import java.util.Properties;
import kafka.api.TopicMetadata;
+import org.apache.kafka.common.PartitionInfo;
+import org.apache.samza.Partition;
+import org.apache.samza.config.ApplicationConfig;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.JobConfig;
+import org.apache.samza.config.MapConfig;
import org.apache.samza.system.StreamSpec;
import org.apache.samza.system.StreamValidationException;
import org.apache.samza.system.SystemAdmin;
-import org.apache.samza.util.ScalaJavaUtil;
+import org.apache.samza.system.SystemStreamPartition;
+import org.junit.Assert;
import org.junit.Test;
import org.mockito.Mockito;
@@ -36,6 +44,97 @@ import static org.junit.Assert.*;
public class TestKafkaSystemAdminJava extends TestKafkaSystemAdmin {
@Test
+ public void testGetOffsetsAfter() {
+ SystemStreamPartition ssp1 = new SystemStreamPartition(SYSTEM(), TOPIC(), new Partition(0));
+ SystemStreamPartition ssp2 = new SystemStreamPartition(SYSTEM(), TOPIC(), new Partition(1));
+ Map<SystemStreamPartition, String> offsets = new HashMap<>();
+ offsets.put(ssp1, "1");
+ offsets.put(ssp2, "2");
+
+ offsets = systemAdmin().getOffsetsAfter(offsets);
+
+ Assert.assertEquals("2", offsets.get(ssp1));
+ Assert.assertEquals("3", offsets.get(ssp2));
+ }
+
+ @Test
+ public void testToKafkaSpec() {
+ String topicName = "testStream";
+
+ int defaultPartitionCount = 2;
+ int changeLogPartitionFactor = 5;
+ Map<String, String> map = new HashMap<>();
+ Config config = new MapConfig(map);
+ StreamSpec spec = new StreamSpec("id", topicName, SYSTEM(), defaultPartitionCount, config);
+
+ KafkaSystemAdmin kafkaAdmin = systemAdmin();
+ KafkaStreamSpec kafkaSpec = kafkaAdmin.toKafkaSpec(spec);
+
+ Assert.assertEquals("id", kafkaSpec.getId());
+ Assert.assertEquals(topicName, kafkaSpec.getPhysicalName());
+ Assert.assertEquals(SYSTEM(), kafkaSpec.getSystemName());
+ Assert.assertEquals(defaultPartitionCount, kafkaSpec.getPartitionCount());
+
+ // validate that conversion is using coordination metadata
+ map.put("job.coordinator.segment.bytes", "123");
+ map.put("job.coordinator.cleanup.policy", "superCompact");
+ int coordReplicatonFactor = 4;
+ map.put(org.apache.samza.config.KafkaConfig.JOB_COORDINATOR_REPLICATION_FACTOR(),
+ String.valueOf(coordReplicatonFactor));
+
+ KafkaSystemAdmin admin = Mockito.spy(createSystemAdmin(SYSTEM(), map));
+ spec = StreamSpec.createCoordinatorStreamSpec(topicName, SYSTEM());
+ kafkaSpec = admin.toKafkaSpec(spec);
+ Assert.assertEquals(coordReplicatonFactor, kafkaSpec.getReplicationFactor());
+ Assert.assertEquals("123", kafkaSpec.getProperties().getProperty("segment.bytes"));
+ // cleanup policy is overridden in the KafkaAdmin
+ Assert.assertEquals("compact", kafkaSpec.getProperties().getProperty("cleanup.policy"));
+
+ // validate that conversion is using changeLog metadata
+ map = new HashMap<>();
+ map.put(JobConfig.JOB_DEFAULT_SYSTEM(), SYSTEM());
+
+ map.put(String.format("stores.%s.changelog", "fakeStore"), topicName);
+ int changeLogReplicationFactor = 3;
+ map.put(String.format("stores.%s.changelog.replication.factor", "fakeStore"),
+ String.valueOf(changeLogReplicationFactor));
+ admin = Mockito.spy(createSystemAdmin(SYSTEM(), map));
+ spec = StreamSpec.createChangeLogStreamSpec(topicName, SYSTEM(), changeLogPartitionFactor);
+ kafkaSpec = admin.toKafkaSpec(spec);
+ Assert.assertEquals(changeLogReplicationFactor, kafkaSpec.getReplicationFactor());
+
+ // same, but with missing topic info
+ try {
+ admin = Mockito.spy(createSystemAdmin(SYSTEM(), map));
+ spec = StreamSpec.createChangeLogStreamSpec("anotherTopic", SYSTEM(), changeLogPartitionFactor);
+ kafkaSpec = admin.toKafkaSpec(spec);
+ Assert.fail("toKafkaSpec should've failed for missing topic");
+ } catch (StreamValidationException e) {
+ // expected
+ }
+
+ // validate that conversion is using intermediate streams properties
+ String interStreamId = "isId";
+
+ Map<String, String> interStreamMap = new HashMap<>();
+ interStreamMap.put("app.mode", ApplicationConfig.ApplicationMode.BATCH.toString());
+ interStreamMap.put(String.format("streams.%s.samza.intermediate", interStreamId), "true");
+ interStreamMap.put(String.format("streams.%s.samza.system", interStreamId), "testSystem");
+ interStreamMap.put(String.format("streams.%s.p1", interStreamId), "v1");
+ interStreamMap.put(String.format("streams.%s.retention.ms", interStreamId), "123");
+ // legacy format
+ interStreamMap.put(String.format("systems.%s.streams.%s.p2", "testSystem", interStreamId), "v2");
+
+ admin = Mockito.spy(createSystemAdmin(SYSTEM(), interStreamMap));
+ spec = new StreamSpec(interStreamId, topicName, SYSTEM(), defaultPartitionCount, config);
+ kafkaSpec = admin.toKafkaSpec(spec);
+ Assert.assertEquals("v1", kafkaSpec.getProperties().getProperty("p1"));
+ Assert.assertEquals("v2", kafkaSpec.getProperties().getProperty("p2"));
+ Assert.assertEquals("123", kafkaSpec.getProperties().getProperty("retention.ms"));
+ Assert.assertEquals(defaultPartitionCount, kafkaSpec.getPartitionCount());
+ }
+
+ @Test
public void testCreateCoordinatorStream() {
SystemAdmin admin = Mockito.spy(systemAdmin());
StreamSpec spec = StreamSpec.createCoordinatorStreamSpec("testCoordinatorStream", "testSystem");
@@ -49,10 +148,14 @@ public class TestKafkaSystemAdminJava extends TestKafkaSystemAdmin {
public void testCreateCoordinatorStreamWithSpecialCharsInTopicName() {
final String STREAM = "test.coordinator_test.Stream";
- Properties coordProps = new Properties();
- Map<String, ChangelogInfo> changeLogMap = new HashMap<>();
+ Map<String, String> map = new HashMap<>();
+ map.put("job.coordinator.segment.bytes", "123");
+ map.put("job.coordinator.cleanup.policy", "compact");
+ int coordReplicatonFactor = 2;
+ map.put(org.apache.samza.config.KafkaConfig.JOB_COORDINATOR_REPLICATION_FACTOR(),
+ String.valueOf(coordReplicatonFactor));
- KafkaSystemAdmin admin = Mockito.spy(createSystemAdmin(coordProps, 1, ScalaJavaUtil.toScalaMap(changeLogMap)));
+ KafkaSystemAdmin admin = Mockito.spy(createSystemAdmin(SYSTEM(), map));
StreamSpec spec = StreamSpec.createCoordinatorStreamSpec(STREAM, SYSTEM());
Mockito.doAnswer(invocationOnMock -> {
@@ -62,6 +165,10 @@ public class TestKafkaSystemAdminJava extends TestKafkaSystemAdmin {
assertEquals(SYSTEM(), internalSpec.getSystemName());
assertEquals(STREAM, internalSpec.getPhysicalName());
assertEquals(1, internalSpec.getPartitionCount());
+ Assert.assertEquals(coordReplicatonFactor, ((KafkaStreamSpec) internalSpec).getReplicationFactor());
+ Assert.assertEquals("123", ((KafkaStreamSpec) internalSpec).getProperties().getProperty("segment.bytes"));
+ // cleanup policy is overridden in the KafkaAdmin
+ Assert.assertEquals("compact", ((KafkaStreamSpec) internalSpec).getProperties().getProperty("cleanup.policy"));
return internalSpec;
}).when(admin).toKafkaSpec(Mockito.any());
@@ -71,62 +178,38 @@ public class TestKafkaSystemAdminJava extends TestKafkaSystemAdmin {
}
@Test
- public void testCreateChangelogStream() {
- final String STREAM = "testChangeLogStream";
- final int PARTITIONS = 12;
- final int REP_FACTOR = 1;
-
- Properties coordProps = new Properties();
- Properties changeLogProps = new Properties();
- changeLogProps.setProperty("cleanup.policy", "compact");
- changeLogProps.setProperty("segment.bytes", "139");
- Map<String, ChangelogInfo> changeLogMap = new HashMap<>();
- changeLogMap.put(STREAM, new ChangelogInfo(REP_FACTOR, changeLogProps));
-
- KafkaSystemAdmin admin = Mockito.spy(createSystemAdmin(coordProps, 1, ScalaJavaUtil.toScalaMap(changeLogMap)));
- StreamSpec spec = StreamSpec.createChangeLogStreamSpec(STREAM, SYSTEM(), PARTITIONS);
-
- Mockito.doAnswer(invocationOnMock -> {
- StreamSpec internalSpec = (StreamSpec) invocationOnMock.callRealMethod();
- assertTrue(internalSpec instanceof KafkaStreamSpec); // KafkaStreamSpec is used to carry replication factor
- assertTrue(internalSpec.isChangeLogStream());
- assertEquals(SYSTEM(), internalSpec.getSystemName());
- assertEquals(STREAM, internalSpec.getPhysicalName());
- assertEquals(REP_FACTOR, ((KafkaStreamSpec) internalSpec).getReplicationFactor());
- assertEquals(PARTITIONS, internalSpec.getPartitionCount());
- assertEquals(changeLogProps, ((KafkaStreamSpec) internalSpec).getProperties());
-
- return internalSpec;
- }).when(admin).toKafkaSpec(Mockito.any());
-
- admin.createStream(spec);
- admin.validateStream(spec);
+ public void testCreateChangelogStreamHelp() {
+ testCreateChangelogStreamHelp("testChangeLogStream");
}
@Test
public void testCreateChangelogStreamWithSpecialCharsInTopicName() {
- final String STREAM = "test.Change_Log.Stream";
+ // cannot contain period
+ testCreateChangelogStreamHelp("test-Change_Log-Stream");
+ }
+
+ public void testCreateChangelogStreamHelp(final String topic) {
final int PARTITIONS = 12;
- final int REP_FACTOR = 1;
+ final int REP_FACTOR = 2;
- Properties coordProps = new Properties();
- Properties changeLogProps = new Properties();
- changeLogProps.setProperty("cleanup.policy", "compact");
- changeLogProps.setProperty("segment.bytes", "139");
- Map<String, ChangelogInfo> changeLogMap = new HashMap<>();
- changeLogMap.put(STREAM, new ChangelogInfo(REP_FACTOR, changeLogProps));
+ Map<String, String> map = new HashMap<>();
+ map.put(JobConfig.JOB_DEFAULT_SYSTEM(), SYSTEM());
+ map.put(String.format("stores.%s.changelog", "fakeStore"), topic);
+ map.put(String.format("stores.%s.changelog.replication.factor", "fakeStore"), String.valueOf(REP_FACTOR));
+ map.put(String.format("stores.%s.changelog.kafka.segment.bytes", "fakeStore"), "139");
+ KafkaSystemAdmin admin = Mockito.spy(createSystemAdmin(SYSTEM(), map));
+ StreamSpec spec = StreamSpec.createChangeLogStreamSpec(topic, SYSTEM(), PARTITIONS);
- KafkaSystemAdmin admin = Mockito.spy(createSystemAdmin(coordProps, 1, ScalaJavaUtil.toScalaMap(changeLogMap)));
- StreamSpec spec = StreamSpec.createChangeLogStreamSpec(STREAM, SYSTEM(), PARTITIONS);
Mockito.doAnswer(invocationOnMock -> {
StreamSpec internalSpec = (StreamSpec) invocationOnMock.callRealMethod();
assertTrue(internalSpec instanceof KafkaStreamSpec); // KafkaStreamSpec is used to carry replication factor
assertTrue(internalSpec.isChangeLogStream());
assertEquals(SYSTEM(), internalSpec.getSystemName());
- assertEquals(STREAM, internalSpec.getPhysicalName());
+ assertEquals(topic, internalSpec.getPhysicalName());
assertEquals(REP_FACTOR, ((KafkaStreamSpec) internalSpec).getReplicationFactor());
assertEquals(PARTITIONS, internalSpec.getPartitionCount());
- assertEquals(changeLogProps, ((KafkaStreamSpec) internalSpec).getProperties());
+ assertEquals("139", ((KafkaStreamSpec) internalSpec).getProperties().getProperty("segment.bytes"));
+ assertEquals("compact", ((KafkaStreamSpec) internalSpec).getProperties().getProperty("cleanup.policy"));
return internalSpec;
}).when(admin).toKafkaSpec(Mockito.any());
@@ -176,7 +259,7 @@ public class TestKafkaSystemAdminJava extends TestKafkaSystemAdmin {
systemAdmin().validateStream(spec2);
}
- @Test
+ //@Test //TODO - currently the connection to ZK fails, but since it checks for empty, the tests succeeds. SAMZA-1887
public void testClearStream() {
StreamSpec spec = new StreamSpec("testId", "testStreamClear", "testSystem", 8);
@@ -184,8 +267,8 @@ public class TestKafkaSystemAdminJava extends TestKafkaSystemAdmin {
systemAdmin().createStream(spec));
assertTrue(systemAdmin().clearStream(spec));
- scala.collection.immutable.Set<String> topic = new scala.collection.immutable.Set.Set1<>(spec.getPhysicalName());
- scala.collection.immutable.Map<String, TopicMetadata> metadata = systemAdmin().getTopicMetadata(topic);
- assertTrue(metadata.get(spec.getPhysicalName()).get().partitionsMetadata().isEmpty());
+ ImmutableSet<String> topics = ImmutableSet.of(spec.getPhysicalName());
+ Map<String, List<PartitionInfo>> metadata = systemAdmin().getTopicMetadata(topics);
+ assertTrue(metadata.get(spec.getPhysicalName()).isEmpty());
}
}