You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by bo...@apache.org on 2018/09/18 23:00:58 UTC
[01/47] samza git commit: reduce debugging from info to debug in
KafkaCheckpointManager.java
Repository: samza
Updated Branches:
refs/heads/NewKafkaSystemConsumer [created] 361596317
reduce debugging from info to debug in KafkaCheckpointManager.java
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/a31a7aa2
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/a31a7aa2
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/a31a7aa2
Branch: refs/heads/NewKafkaSystemConsumer
Commit: a31a7aa29b7be4bb46f8e651b6b8fa46a65b48e2
Parents: c93dd8f
Author: Boris Shkolnik <bs...@linkedin.com>
Authored: Mon Oct 16 15:25:49 2017 -0700
Committer: Boris Shkolnik <bs...@linkedin.com>
Committed: Mon Oct 16 15:25:49 2017 -0700
----------------------------------------------------------------------
.../org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/a31a7aa2/samza-kafka/src/main/scala/org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala b/samza-kafka/src/main/scala/org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala
index 4eb6666..b016b4a 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala
@@ -103,7 +103,7 @@ class KafkaCheckpointManager(
systemProducer.send(taskName.getTaskName, envelope)
systemProducer.flush(taskName.getTaskName) // make sure it is written
- info("Completed writing checkpoint=%s into %s topic for system %s." format(checkpoint, checkpointTopic, systemName) )
+ debug("Completed writing checkpoint=%s into %s topic for system %s." format(checkpoint, checkpointTopic, systemName) )
loop.done
},
[27/47] samza git commit: LOG line
Posted by bo...@apache.org.
LOG line
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/19ba3003
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/19ba3003
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/19ba3003
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 19ba3003078f989400cd9fbefee7c00421bebc6c
Parents: 59b3dc1
Author: Boris S <bo...@apache.org>
Authored: Fri Aug 31 15:15:04 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Aug 31 15:20:12 2018 -0700
----------------------------------------------------------------------
.../org/apache/samza/system/kafka/NewKafkaSystemConsumer.java | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/19ba3003/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index e34812f..aeeadce 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -348,7 +348,8 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
}
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> res = super.poll(systemStreamPartitions, timeout);
- LOG.info("=============================>. Res in POLL:" + res.toString());
+ LOG.info("=============================>. Res for " + systemStreamPartitions);
+ LOG.info("=============================>. Res:" + res.toString());
return res;
}
[13/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/78ad578c
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/78ad578c
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/78ad578c
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 78ad578cc022af3f9c5f9a15826a9dc010502a89
Parents: 88f8559 d28f0c8
Author: Boris S <bo...@apache.org>
Authored: Tue Aug 7 19:04:17 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Aug 7 19:04:17 2018 -0700
----------------------------------------------------------------------
.../org/apache/samza/metrics/ListGauge.java | 3 +-
.../samza/runtime/RemoteApplicationRunner.java | 69 +----
.../storage/TaskSideInputStorageManager.java | 38 ++-
.../MetricsSnapshotReporterFactory.scala | 4 +-
.../runtime/TestRemoteApplicationRunner.java | 70 +++++
.../TestTaskSideInputStorageManager.java | 295 +++++++++++++++++++
.../samza/storage/kv/RocksDbTableProvider.java | 4 +
.../table/TestLocalTableWithSideInputs.java | 161 ++++++++++
.../apache/samza/test/table/TestTableData.java | 22 +-
.../table/TestTableDescriptorsProvider.java | 2 +-
10 files changed, 597 insertions(+), 71 deletions(-)
----------------------------------------------------------------------
[03/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/d4620d66
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/d4620d66
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/d4620d66
Branch: refs/heads/NewKafkaSystemConsumer
Commit: d4620d6690f74cad9472d0e27a1b31aeb4156c54
Parents: 410ce78 958edc4
Author: Boris S <bo...@apache.org>
Authored: Tue Oct 24 17:11:48 2017 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Oct 24 17:11:48 2017 -0700
----------------------------------------------------------------------
.../apache/samza/storage/kv/KeyValueStore.java | 69 ++++---------
.../kafka/KafkaCheckpointManager.scala | 103 +++++++------------
.../kv/inmemory/InMemoryKeyValueStore.scala | 8 --
.../samza/storage/kv/RocksDbKeyValueStore.scala | 86 ++++++----------
.../storage/kv/TestRocksDbKeyValueStore.scala | 4 +-
.../apache/samza/storage/kv/CachedStore.scala | 2 +-
.../samza/storage/kv/MockKeyValueStore.scala | 8 --
7 files changed, 93 insertions(+), 187 deletions(-)
----------------------------------------------------------------------
[04/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/bbffb79b
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/bbffb79b
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/bbffb79b
Branch: refs/heads/NewKafkaSystemConsumer
Commit: bbffb79b8b9799a41e8e82ded60f83550736886b
Parents: d4620d6 cc1ca2c
Author: Boris S <bo...@apache.org>
Authored: Tue Oct 24 17:54:20 2017 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Oct 24 17:54:20 2017 -0700
----------------------------------------------------------------------
.../src/main/java/org/apache/samza/task/StreamOperatorTask.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
[08/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/1ad58d43
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/1ad58d43
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/1ad58d43
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 1ad58d43fbe00a57054cb85b0be2eef6ee6470a6
Parents: 06b1ac3 2d10732
Author: Boris S <bo...@apache.org>
Authored: Tue Oct 31 12:46:53 2017 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Oct 31 12:46:53 2017 -0700
----------------------------------------------------------------------
build.gradle | 11 +
.../apache/samza/operators/MessageStream.java | 41 +-
.../system/eventhub/EventHubClientManager.java | 69 ++++
.../eventhub/EventHubClientManagerFactory.java | 32 ++
.../samza/system/eventhub/EventHubConfig.java | 181 +++++++++
.../system/eventhub/EventHubSystemFactory.java | 65 +++
.../samza/system/eventhub/Interceptor.java | 34 ++
.../eventhub/SamzaEventHubClientManager.java | 104 +++++
.../eventhub/admin/EventHubSystemAdmin.java | 199 +++++++++
.../eventhub/admin/PassThroughInterceptor.java | 33 ++
.../EventHubIncomingMessageEnvelope.java | 42 ++
.../consumer/EventHubSystemConsumer.java | 401 +++++++++++++++++++
.../system/eventhub/metrics/SamzaHistogram.java | 62 +++
.../producer/EventHubSystemProducer.java | 345 ++++++++++++++++
.../samza/system/eventhub/MockEventData.java | 57 +++
.../MockEventHubClientManagerFactory.java | 196 +++++++++
.../eventhub/MockEventHubConfigFactory.java | 61 +++
.../system/eventhub/TestMetricsRegistry.java | 85 ++++
.../eventhub/admin/TestEventHubSystemAdmin.java | 113 ++++++
.../consumer/ITestEventHubSystemConsumer.java | 76 ++++
.../consumer/TestEventHubSystemConsumer.java | 272 +++++++++++++
.../producer/ITestEventHubSystemProducer.java | 163 ++++++++
.../producer/TestEventHubSystemProducer.java | 153 +++++++
.../samza/execution/JobGraphJsonGenerator.java | 4 +-
.../samza/operators/MessageStreamImpl.java | 44 +-
.../apache/samza/operators/StreamGraphImpl.java | 74 +++-
.../samza/operators/impl/OperatorImpl.java | 32 +-
.../samza/operators/impl/OperatorImplGraph.java | 20 +-
.../operators/impl/PartialJoinOperatorImpl.java | 32 +-
.../operators/impl/WindowOperatorImpl.java | 21 +-
.../impl/store/TimeSeriesStoreImpl.java | 9 +-
.../samza/operators/spec/InputOperatorSpec.java | 2 +-
.../samza/operators/spec/JoinOperatorSpec.java | 14 +-
.../samza/operators/spec/OperatorSpec.java | 14 +-
.../samza/operators/spec/OperatorSpecs.java | 36 +-
.../operators/spec/OutputOperatorSpec.java | 2 +-
.../operators/spec/PartitionByOperatorSpec.java | 2 +-
.../samza/operators/spec/SinkOperatorSpec.java | 2 +-
.../operators/spec/StreamOperatorSpec.java | 2 +-
.../operators/spec/WindowOperatorSpec.java | 4 +-
.../apache/samza/system/SystemConsumers.scala | 14 +-
.../samza/example/KeyValueStoreExample.java | 2 +-
.../samza/example/OrderShipmentJoinExample.java | 2 +-
.../samza/example/PageViewCounterExample.java | 2 +-
.../samza/example/RepartitionExample.java | 5 +-
.../org/apache/samza/example/WindowExample.java | 5 +-
.../samza/execution/TestExecutionPlanner.java | 44 +-
.../execution/TestJobGraphJsonGenerator.java | 16 +-
.../org/apache/samza/execution/TestJobNode.java | 34 +-
.../samza/operators/TestJoinOperator.java | 33 +-
.../samza/operators/TestMessageStreamImpl.java | 26 +-
.../samza/operators/TestStreamGraphImpl.java | 61 +--
.../samza/operators/TestWindowOperator.java | 102 ++++-
.../samza/operators/impl/TestOperatorImpl.java | 2 +-
.../operators/impl/TestOperatorImplGraph.java | 47 ++-
.../operators/spec/TestWindowOperatorSpec.java | 4 +-
.../samza/system/TestSystemConsumers.scala | 60 ++-
.../samza/storage/kv/RocksDbKeyValueStore.scala | 90 +++--
.../storage/kv/TestRocksDbKeyValueStore.scala | 36 +-
.../rest/model/yarn/YarnApplicationInfo.java | 12 +-
.../proxy/job/YarnRestJobStatusProvider.java | 41 +-
.../job/TestYarnRestJobStatusProvider.java | 77 ++++
.../src/main/config/perf/kv-perf.properties | 34 +-
.../performance/TestKeyValuePerformance.scala | 37 +-
.../EndOfStreamIntegrationTest.java | 2 +-
.../WatermarkIntegrationTest.java | 2 +-
.../test/operator/RepartitionJoinWindowApp.java | 13 +-
.../samza/test/operator/SessionWindowApp.java | 4 +-
.../samza/test/operator/TumblingWindowApp.java | 3 +-
.../test/integration/TestStatefulTask.scala | 2 +-
70 files changed, 3595 insertions(+), 326 deletions(-)
----------------------------------------------------------------------
[43/47] samza git commit: Added Test for KafkaConsumerConfig
Posted by bo...@apache.org.
Added Test for KafkaConsumerConfig
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/32c92828
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/32c92828
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/32c92828
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 32c92828eaff98f4c2e6691533ece9f502ef1f98
Parents: 2480aa3
Author: Boris S <bo...@apache.org>
Authored: Wed Sep 12 14:06:41 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Sep 12 14:06:41 2018 -0700
----------------------------------------------------------------------
.../clients/consumer/KafkaConsumerConfig.java | 23 ++--
.../org/apache/samza/config/KafkaConfig.scala | 5 +-
.../samza/system/kafka/KafkaConsumerProxy.java | 14 ++-
.../consumer/TestKafkaConsumerConfig.java | 121 +++++++++++++++++++
4 files changed, 149 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/32c92828/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index 1a97ec7..8ada1b4 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -54,7 +54,7 @@ public class KafkaConsumerConfig extends ConsumerConfig {
* By default, KafkaConsumer will fetch ALL available messages for all the partitions.
* This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
*/
- private static final String DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS = "100";
+ static final String DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS = "100";
private KafkaConsumerConfig(Properties props) {
super(props);
@@ -83,6 +83,11 @@ public class KafkaConsumerConfig extends ConsumerConfig {
//Kafka client configuration
+ // put overrides
+ consumerProps.putAll(injectProps);
+
+ // These are values we enforce in sazma, and they cannot be overwritten.
+
// Disable consumer auto-commit because Samza controls commits
consumerProps.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
@@ -106,28 +111,24 @@ public class KafkaConsumerConfig extends ConsumerConfig {
// the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
// default to byte[]
- if (!config.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
+ if (!consumerProps.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
LOG.info("setting default key serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
}
- if (!config.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
+ if (!consumerProps.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
LOG.info("setting default value serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
}
- // NOT SURE THIS IS NEEDED TODO
- final String maxPollRecords =
- subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS);
- consumerProps.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
-
- // put overrides
- consumerProps.putAll(injectProps);
+ // Override default max poll config if there is no value
+ consumerProps.computeIfAbsent(ConsumerConfig.MAX_POLL_RECORDS_CONFIG,
+ (k) -> DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS);
return new KafkaConsumerConfig(consumerProps);
}
// group id should be unique per job
- private static String getConsumerGroupId(Config config) {
+ static String getConsumerGroupId(Config config) {
JobConfig jobConfig = new JobConfig(config);
Option<String> jobIdOption = jobConfig.getJobId();
Option<String> jobNameOption = jobConfig.getName();
http://git-wip-us.apache.org/repos/asf/samza/blob/32c92828/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala b/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala
index 26664ea..ef43e72 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala
@@ -289,7 +289,10 @@ class KafkaConfig(config: Config) extends ScalaMapConfig(config) {
properties
}
- // kafka config
+ /**
+ * @deprecated Use KafkaConsumerConfig
+ */
+ @Deprecated
def getKafkaSystemConsumerConfig( systemName: String,
clientId: String,
groupId: String = "undefined-samza-consumer-group-%s" format UUID.randomUUID.toString,
http://git-wip-us.apache.org/repos/asf/samza/blob/32c92828/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 4b99fcc..83e7a58 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -431,12 +431,22 @@ public class KafkaConsumerProxy<K, V> {
return failureCause;
}
- public void stop(long timeout) {
+ /**
+ * stop the thread and wait for it to stop
+ * @param timeoutMs how long to wait in join
+ */
+ public void stop(long timeoutMs) {
LOG.info("Shutting down KafkaConsumerProxy poll thread:" + consumerPollThread.getName());
isRunning = false;
try {
- consumerPollThread.join(timeout);
+ consumerPollThread.join(timeoutMs);
+ // join returns event if the thread didn't finish
+ // in this case we should interrupt it and wait again
+ if (consumerPollThread.isAlive()) {
+ consumerPollThread.interrupt();
+ consumerPollThread.join(timeoutMs);
+ }
} catch (InterruptedException e) {
LOG.warn("Join in KafkaConsumerProxy has failed", e);
consumerPollThread.interrupt();
http://git-wip-us.apache.org/repos/asf/samza/blob/32c92828/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java b/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
new file mode 100644
index 0000000..ee300d0
--- /dev/null
+++ b/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
@@ -0,0 +1,121 @@
+package org.apache.kafka.clients.consumer;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.apache.samza.SamzaException;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.MapConfig;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+
+public class TestKafkaConsumerConfig {
+ private final Map<String, String> props = new HashMap<>();
+ public final static String SYSTEM_NAME = "testSystem";
+ public final static String KAFKA_PRODUCER_PROPERTY_PREFIX = "systems." + SYSTEM_NAME + ".producer.";
+ public final static String KAFKA_CONSUMER_PROPERTY_PREFIX = "systems." + SYSTEM_NAME + ".consumer.";
+ private final static String CLIENT_ID = "clientId";
+
+ @Before
+ public void setProps() {
+
+ }
+
+ @Test
+ public void testDefaultsAndOverrides() {
+
+ Map<String, String> overrides = new HashMap<>();
+ overrides.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true"); // should be ignored
+ overrides.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG, "Ignore"); // should be ignored
+ overrides.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "100"); // should NOT be ignored
+
+ // if KAFKA_CONSUMER_PROPERTY_PREFIX is set, then PRODUCER should be ignored
+ props.put(KAFKA_PRODUCER_PROPERTY_PREFIX + "bootstrap.servers", "ignroeThis:9092");
+ props.put(KAFKA_CONSUMER_PROPERTY_PREFIX + "bootstrap.servers", "useThis:9092");
+
+ // should be overridden
+ props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "true"); //ignore
+ props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, "1000"); // ignore
+
+
+ // should be overridden
+ props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "200");
+
+ Config config = new MapConfig(props);
+ KafkaConsumerConfig kafkaConsumerConfig = KafkaConsumerConfig.getKafkaSystemConsumerConfig(
+ config, SYSTEM_NAME, CLIENT_ID, overrides);
+
+ Assert.assertEquals(kafkaConsumerConfig.getBoolean(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG), false);
+
+ Assert.assertEquals(
+ kafkaConsumerConfig.getInt(ConsumerConfig.MAX_POLL_RECORDS_CONFIG),
+ Integer.valueOf(KafkaConsumerConfig.DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS));
+
+ Assert.assertEquals(
+ kafkaConsumerConfig.getList(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG).get(0),
+ RangeAssignor.class.getName());
+
+ Assert.assertEquals(
+ kafkaConsumerConfig.getList(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG).get(0),
+ "useThis:9092");
+ Assert.assertEquals(
+ kafkaConsumerConfig.getInt(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG).longValue(),
+ 100);
+
+ Assert.assertEquals(
+ kafkaConsumerConfig.getClass(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG),
+ ByteArrayDeserializer.class);
+
+ Assert.assertEquals(
+ kafkaConsumerConfig.getClass(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG),
+ ByteArrayDeserializer.class);
+
+ Assert.assertEquals(
+ kafkaConsumerConfig.getString(ConsumerConfig.CLIENT_ID_CONFIG),
+ CLIENT_ID);
+
+ Assert.assertEquals(
+ kafkaConsumerConfig.getString(ConsumerConfig.GROUP_ID_CONFIG),
+ KafkaConsumerConfig.getConsumerGroupId(config));
+ }
+
+ @Test
+ // test stuff that should not be overridden
+ public void testNotOverride() {
+
+ // if KAFKA_CONSUMER_PROPERTY_PREFIX is not set, then PRODUCER should be used
+ props.put(KAFKA_PRODUCER_PROPERTY_PREFIX + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "useThis:9092");
+ props.put(KAFKA_CONSUMER_PROPERTY_PREFIX + ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, TestKafkaConsumerConfig.class.getName());
+ props.put(KAFKA_CONSUMER_PROPERTY_PREFIX + ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, TestKafkaConsumerConfig.class.getName());
+
+
+ Config config = new MapConfig(props);
+ KafkaConsumerConfig kafkaConsumerConfig = KafkaConsumerConfig.getKafkaSystemConsumerConfig(
+ config, SYSTEM_NAME, CLIENT_ID, Collections.emptyMap());
+
+ Assert.assertEquals(
+ kafkaConsumerConfig.getList(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG).get(0),
+ "useThis:9092");
+
+ Assert.assertEquals(
+ kafkaConsumerConfig.getClass(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG),
+ TestKafkaConsumerConfig.class);
+
+ Assert.assertEquals(
+ kafkaConsumerConfig.getClass(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG),
+ TestKafkaConsumerConfig.class);
+ }
+
+
+
+ @Test(expected = SamzaException.class)
+ public void testNoBootstrapServers() {
+ KafkaConsumerConfig kafkaConsumerConfig = KafkaConsumerConfig.getKafkaSystemConsumerConfig(
+ new MapConfig(Collections.emptyMap()), SYSTEM_NAME, "clientId", Collections.emptyMap());
+
+ Assert.fail("didn't get exception for the missing config:" + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG);
+ }
+}
[11/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/0edf343b
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/0edf343b
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/0edf343b
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 0edf343b8c5360437fa10cc5a543696a4edcc319
Parents: 67e611e f249e71
Author: Boris S <bo...@apache.org>
Authored: Fri Jun 8 10:15:36 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Jun 8 10:15:36 2018 -0700
----------------------------------------------------------------------
.travis.yml | 3 +-
README.md | 4 +-
bin/check-all.sh | 2 +-
bin/integration-tests.sh | 3 +-
build.gradle | 24 +-
docs/Gemfile | 7 +-
docs/Gemfile.lock | 95 ++-
docs/README.md | 4 +-
docs/community/committers.md | 8 +-
.../documentation/versioned/azure/eventhubs.md | 6 +-
.../versioned/container/metrics-table.html | 8 +
.../versioned/jobs/configuration-table.html | 91 ++-
.../documentation/versioned/jobs/logging.md | 2 +-
.../documentation/versioned/rest/monitors.md | 8 +
.../versioned/deploy-samza-job-from-hdfs.md | 2 +-
.../tutorials/versioned/deploy-samza-to-CDH.md | 4 +-
.../versioned/hello-samza-high-level-yarn.md | 2 +-
.../versioned/hello-samza-high-level-zk.md | 2 +-
.../versioned/remote-debugging-samza.md | 2 +-
.../versioned/run-in-multi-node-yarn.md | 4 +-
.../versioned/samza-rest-getting-started.md | 2 +-
docs/startup/download/index.md | 19 +-
docs/startup/hello-samza/versioned/index.md | 2 +-
gradle.properties | 2 +-
gradle/buildscript.gradle | 2 +-
gradle/dependency-versions-scala-2.10.gradle | 2 +-
gradle/dependency-versions-scala-2.11.gradle | 2 +-
gradle/dependency-versions-scala-2.12.gradle | 2 +-
gradle/dependency-versions.gradle | 7 +-
gradle/release.gradle | 11 +-
.../samza/application/StreamApplication.java | 7 +-
.../samza/checkpoint/CheckpointManager.java | 12 +-
.../java/org/apache/samza/config/MapConfig.java | 9 +-
.../apache/samza/operators/MessageStream.java | 34 +-
.../apache/samza/operators/TimerRegistry.java | 41 ++
.../operators/functions/ClosableFunction.java | 3 +
.../operators/functions/FilterFunction.java | 3 +-
.../operators/functions/FlatMapFunction.java | 3 +-
.../operators/functions/FoldLeftFunction.java | 16 +-
.../operators/functions/InitableFunction.java | 3 +-
.../samza/operators/functions/JoinFunction.java | 3 +-
.../samza/operators/functions/MapFunction.java | 3 +-
.../samza/operators/functions/SinkFunction.java | 3 +-
.../functions/StreamTableJoinFunction.java | 3 +-
.../operators/functions/SupplierFunction.java | 38 ++
.../operators/functions/TimerFunction.java | 65 ++
.../samza/operators/triggers/AnyTrigger.java | 10 +-
.../samza/operators/triggers/Trigger.java | 3 +-
.../apache/samza/operators/windows/Window.java | 3 +-
.../apache/samza/operators/windows/Windows.java | 53 +-
.../windows/internal/WindowInternal.java | 32 +-
.../apache/samza/runtime/ApplicationRunner.java | 19 +
.../samza/serializers/SerializableSerde.java | 2 +-
.../org/apache/samza/sql/udfs/ScalarUdf.java | 4 +-
.../samza/storage/kv/KeyValueSnapshot.java | 42 ++
.../apache/samza/storage/kv/KeyValueStore.java | 14 +
.../org/apache/samza/system/StreamSpec.java | 39 +-
.../org/apache/samza/system/SystemAdmin.java | 23 +-
.../samza/system/SystemStreamPartition.java | 4 +-
.../table/LocalStoreBackedTableProvider.java | 37 --
.../org/apache/samza/table/ReadWriteTable.java | 9 +-
.../org/apache/samza/table/ReadableTable.java | 11 +-
.../org/apache/samza/table/TableProvider.java | 18 +-
.../java/org/apache/samza/table/TableSpec.java | 12 +-
.../java/org/apache/samza/task/TaskContext.java | 19 +
.../org/apache/samza/task/TimerCallback.java | 34 ++
.../java/org/apache/samza/util/RateLimiter.java | 108 ++++
.../samza/operators/windows/TestWindowPane.java | 2 +-
.../autoscaling/deployer/ConfigManager.java | 4 +-
.../samza/system/kinesis/KinesisConfig.java | 28 +-
.../samza/coordinator/AzureJobCoordinator.java | 93 +--
.../samza/coordinator/data/ProcessorEntity.java | 8 +
.../scheduler/JMVersionUpgradeScheduler.java | 9 +-
.../eventhub/EventHubClientManagerFactory.java | 3 +-
.../samza/system/eventhub/EventHubConfig.java | 117 +++-
.../eventhub/SamzaEventHubClientManager.java | 39 +-
.../eventhub/admin/EventHubSystemAdmin.java | 138 +++--
.../consumer/EventHubSystemConsumer.java | 304 ++++++----
.../system/eventhub/metrics/SamzaHistogram.java | 69 ++-
.../eventhub/producer/AsyncSystemProducer.java | 218 +++++++
.../producer/EventHubSystemProducer.java | 378 +++++++-----
.../java/org/apache/samza/util/TableUtils.java | 49 +-
.../samza/system/eventhub/MockEventData.java | 21 +-
.../MockEventHubClientManagerFactory.java | 25 +-
.../eventhub/MockEventHubConfigFactory.java | 18 +-
.../system/eventhub/TestMetricsRegistry.java | 7 +-
.../consumer/TestEventHubSystemConsumer.java | 75 ++-
.../producer/ITestEventHubSystemProducer.java | 10 +-
.../producer/TestEventHubSystemProducer.java | 232 +++++--
samza-azure/src/test/resources/log4j.xml | 43 ++
.../AbstractContainerAllocator.java | 2 +-
.../ClusterBasedJobCoordinator.java | 78 ++-
.../clustermanager/ContainerProcessManager.java | 14 +-
.../HostAwareContainerAllocator.java | 17 +-
.../clustermanager/ResourceRequestState.java | 53 +-
.../apache/samza/config/JavaStorageConfig.java | 18 +-
.../apache/samza/config/JavaSystemConfig.java | 12 +-
.../apache/samza/config/JavaTableConfig.java | 2 +-
.../samza/config/JobCoordinatorConfig.java | 4 +-
.../org/apache/samza/config/TaskConfigJava.java | 35 +-
.../container/ContainerHeartbeatClient.java | 4 +-
.../container/ContainerHeartbeatMonitor.java | 3 +
.../apache/samza/container/LocalityManager.java | 94 ++-
.../apache/samza/container/RunLoopFactory.java | 25 +-
.../SamzaContainerExceptionHandler.java | 57 --
.../apache/samza/container/TaskContextImpl.java | 31 +-
.../disk/PollingScanDiskSpaceMonitor.java | 20 +-
.../grouper/task/TaskAssignmentManager.java | 37 +-
.../container/host/StatisticsMonitorImpl.java | 20 +-
.../StreamPartitionCountMonitor.java | 10 +-
.../AbstractCoordinatorStreamManager.java | 132 ----
.../stream/CoordinatorStreamManager.java | 170 ++++++
.../stream/CoordinatorStreamSystemConsumer.java | 32 +-
.../stream/CoordinatorStreamSystemProducer.java | 37 +-
.../stream/CoordinatorStreamWriter.java | 2 +-
.../samza/execution/ExecutionPlanner.java | 30 +-
.../org/apache/samza/execution/JobGraph.java | 17 +-
.../samza/execution/JobGraphJsonGenerator.java | 4 +-
.../org/apache/samza/execution/JobNode.java | 64 +-
.../org/apache/samza/execution/StreamEdge.java | 1 +
.../apache/samza/execution/StreamManager.java | 32 +-
.../org/apache/samza/job/model/JobModel.java | 4 +-
.../samza/operators/MessageStreamImpl.java | 89 +--
.../samza/operators/OperatorSpecGraph.java | 132 ++++
.../apache/samza/operators/StreamGraphImpl.java | 315 ----------
.../apache/samza/operators/StreamGraphSpec.java | 299 +++++++++
.../org/apache/samza/operators/TableImpl.java | 3 +-
.../operators/impl/BroadcastOperatorImpl.java | 84 +++
.../operators/impl/ControlMessageSender.java | 43 +-
.../samza/operators/impl/EndOfStreamStates.java | 6 +-
.../samza/operators/impl/OperatorImpl.java | 70 ++-
.../samza/operators/impl/OperatorImplGraph.java | 110 ++--
.../operators/impl/OutputOperatorImpl.java | 5 +-
.../operators/impl/PartitionByOperatorImpl.java | 16 +-
.../operators/impl/StreamOperatorImpl.java | 3 +-
.../samza/operators/impl/WatermarkStates.java | 12 +-
.../operators/impl/WindowOperatorImpl.java | 23 +-
.../operators/spec/BroadcastOperatorSpec.java | 49 ++
.../operators/spec/FilterOperatorSpec.java | 74 +++
.../operators/spec/FlatMapOperatorSpec.java | 47 ++
.../samza/operators/spec/InputOperatorSpec.java | 21 +-
.../samza/operators/spec/JoinOperatorSpec.java | 23 +-
.../samza/operators/spec/MapOperatorSpec.java | 77 +++
.../samza/operators/spec/MergeOperatorSpec.java | 51 ++
.../samza/operators/spec/OperatorSpec.java | 29 +-
.../samza/operators/spec/OperatorSpecs.java | 85 +--
.../operators/spec/OutputOperatorSpec.java | 6 +
.../samza/operators/spec/OutputStreamImpl.java | 17 +-
.../operators/spec/PartitionByOperatorSpec.java | 29 +-
.../operators/spec/SendToTableOperatorSpec.java | 15 +-
.../samza/operators/spec/SinkOperatorSpec.java | 6 +
.../operators/spec/StreamOperatorSpec.java | 14 +-
.../spec/StreamTableJoinOperatorSpec.java | 6 +
.../operators/spec/WindowOperatorSpec.java | 22 +-
.../stream/IntermediateMessageStreamImpl.java | 4 +-
.../samza/operators/triggers/Cancellable.java | 2 +-
.../samza/operators/triggers/TriggerImpl.java | 6 +-
.../operators/util/InternalInMemoryStore.java | 6 +
.../apache/samza/operators/util/MathUtils.java | 50 --
.../apache/samza/processor/StreamProcessor.java | 238 ++++----
.../runtime/AbstractApplicationRunner.java | 41 +-
.../samza/runtime/ApplicationRunnerMain.java | 9 +
.../samza/runtime/LocalApplicationRunner.java | 77 ++-
.../samza/runtime/LocalContainerRunner.java | 30 +-
.../samza/runtime/RemoteApplicationRunner.java | 78 ++-
.../serializers/model/SamzaObjectMapper.java | 17 +-
.../standalone/PassthroughJobCoordinator.java | 37 +-
.../storage/ChangelogPartitionManager.java | 89 ---
.../samza/storage/ChangelogStreamManager.java | 154 +++++
.../apache/samza/storage/StorageRecovery.java | 63 +-
.../org/apache/samza/system/SystemAdmins.java | 71 +++
.../samza/system/inmemory/InMemoryManager.java | 179 ++++++
.../system/inmemory/InMemorySystemAdmin.java | 137 +++++
.../system/inmemory/InMemorySystemConsumer.java | 148 +++++
.../system/inmemory/InMemorySystemFactory.java | 50 ++
.../system/inmemory/InMemorySystemProducer.java | 103 ++++
.../org/apache/samza/table/TableManager.java | 53 +-
.../samza/table/caching/CachingTable.java | 199 ++++++
.../table/caching/CachingTableDescriptor.java | 173 ++++++
.../table/caching/CachingTableProvider.java | 138 +++++
.../caching/CachingTableProviderFactory.java | 34 ++
.../samza/table/caching/SupplierGauge.java | 46 ++
.../table/caching/guava/GuavaCacheTable.java | 112 ++++
.../guava/GuavaCacheTableDescriptor.java | 76 +++
.../caching/guava/GuavaCacheTableProvider.java | 92 +++
.../guava/GuavaCacheTableProviderFactory.java | 34 ++
.../samza/table/remote/CreditFunction.java | 36 ++
.../table/remote/RemoteReadWriteTable.java | 184 ++++++
.../samza/table/remote/RemoteReadableTable.java | 181 ++++++
.../table/remote/RemoteTableDescriptor.java | 194 ++++++
.../samza/table/remote/RemoteTableProvider.java | 144 +++++
.../remote/RemoteTableProviderFactory.java | 38 ++
.../samza/table/remote/TableReadFunction.java | 66 ++
.../samza/table/remote/TableWriteFunction.java | 86 +++
.../apache/samza/table/utils/SerdeUtils.java | 66 ++
.../org/apache/samza/task/AsyncRunLoop.java | 151 ++++-
.../apache/samza/task/StreamOperatorTask.java | 48 +-
.../apache/samza/task/SystemTimerScheduler.java | 154 +++++
.../org/apache/samza/task/TaskCallbackImpl.java | 22 +-
.../apache/samza/task/TaskCallbackManager.java | 11 +-
.../task/TaskCallbackTimeoutException.java | 42 --
.../org/apache/samza/task/TaskFactoryUtil.java | 42 +-
.../apache/samza/util/ClassLoaderHelper.java | 48 --
.../samza/util/EmbeddedTaggedRateLimiter.java | 136 +++++
.../java/org/apache/samza/util/MathUtil.java | 77 +++
.../samza/util/MetricsReporterLoader.java | 6 +-
.../util/SamzaUncaughtExceptionHandler.java | 69 +++
.../org/apache/samza/util/ScalaToJavaUtils.java | 41 --
.../org/apache/samza/util/ShutdownUtil.java | 74 +++
.../apache/samza/util/ThrottlingExecutor.java | 4 +-
.../apache/samza/util/ThrottlingScheduler.java | 2 +-
.../samza/zk/ScheduleAfterDebounceTime.java | 96 ++-
.../samza/zk/ZkBarrierForVersionUpgrade.java | 125 +++-
.../org/apache/samza/zk/ZkControllerImpl.java | 22 +-
.../samza/zk/ZkCoordinationUtilsFactory.java | 6 +-
.../org/apache/samza/zk/ZkJobCoordinator.java | 229 ++++---
.../samza/zk/ZkJobCoordinatorFactory.java | 2 +-
.../org/apache/samza/zk/ZkLeaderElector.java | 3 +-
.../main/java/org/apache/samza/zk/ZkUtils.java | 159 +++--
.../org/apache/samza/zk/ZkUtilsMetrics.java | 6 +
.../samza/checkpoint/CheckpointTool.scala | 23 +-
.../apache/samza/checkpoint/OffsetManager.scala | 10 +-
.../org/apache/samza/config/JobConfig.scala | 11 +
.../apache/samza/config/SerializerConfig.scala | 32 +
.../org/apache/samza/config/StorageConfig.scala | 7 +-
.../org/apache/samza/config/StreamConfig.scala | 14 +-
.../org/apache/samza/config/SystemConfig.scala | 7 +
.../org/apache/samza/config/TaskConfig.scala | 58 +-
.../org/apache/samza/container/RunLoop.scala | 4 +-
.../apache/samza/container/SamzaContainer.scala | 215 ++++---
.../samza/container/SamzaContainerMetrics.scala | 2 +
.../apache/samza/container/TaskInstance.scala | 42 +-
.../samza/coordinator/JobModelManager.scala | 169 +-----
.../stream/CoordinatorStreamSystemFactory.scala | 50 --
.../scala/org/apache/samza/job/JobRunner.scala | 52 +-
.../org/apache/samza/job/local/ProcessJob.scala | 167 +++---
.../samza/job/local/ProcessJobFactory.scala | 44 +-
.../samza/job/local/ThreadJobFactory.scala | 49 +-
.../org/apache/samza/metrics/JmxServer.scala | 1 +
.../org/apache/samza/metrics/JvmMetrics.scala | 24 +-
.../reporter/MetricsSnapshotReporter.scala | 31 +-
.../MetricsSnapshotReporterFactory.scala | 6 +-
.../apache/samza/serializers/SerdeManager.scala | 2 -
.../samza/storage/TaskStorageManager.scala | 45 +-
.../samza/system/StreamMetadataCache.scala | 25 +-
.../apache/samza/system/SystemConsumers.scala | 11 +-
.../system/chooser/BootstrappingChooser.scala | 38 +-
.../samza/system/chooser/DefaultChooser.scala | 12 +-
.../filereader/FileReaderSystemConsumer.scala | 27 +-
.../org/apache/samza/util/CommandLine.scala | 4 +-
.../samza/util/CoordinatorStreamUtil.scala | 94 +++
.../apache/samza/util/DaemonThreadFactory.scala | 39 --
.../samza/util/ExponentialSleepStrategy.scala | 17 +-
.../scala/org/apache/samza/util/FileUtil.scala | 104 ++++
.../scala/org/apache/samza/util/HttpUtil.scala | 89 +++
.../samza/util/LexicographicComparator.scala | 39 --
.../org/apache/samza/util/ScalaJavaUtil.scala | 62 ++
.../scala/org/apache/samza/util/TimerUtil.scala | 56 ++
.../org/apache/samza/util/TimerUtils.scala | 56 --
.../main/scala/org/apache/samza/util/Util.scala | 352 ++---------
.../MockClusterResourceManager.java | 39 +-
.../MockHostAwareContainerAllocator.java | 68 +++
.../TestClusterBasedJobCoordinator.java | 7 +-
.../TestContainerProcessManager.java | 138 +++--
.../TestHostAwareContainerAllocator.java | 164 ++++-
.../samza/container/TestLocalityManager.java | 26 +-
.../TestSamzaContainerExceptionHandler.java | 39 --
.../TestSamzaUncaughtExceptionHandler.java | 40 ++
.../grouper/task/TestTaskAssignmentManager.java | 25 +-
.../MockCoordinatorStreamSystemFactory.java | 6 +-
.../apache/samza/example/BroadcastExample.java | 71 ---
.../samza/example/KeyValueStoreExample.java | 131 ----
.../org/apache/samza/example/MergeExample.java | 60 --
.../samza/example/OrderShipmentJoinExample.java | 115 ----
.../samza/example/PageViewCounterExample.java | 95 ---
.../samza/example/RepartitionExample.java | 90 ---
.../org/apache/samza/example/WindowExample.java | 81 ---
.../samza/execution/TestExecutionPlanner.java | 150 ++---
.../apache/samza/execution/TestJobGraph.java | 68 +--
.../execution/TestJobGraphJsonGenerator.java | 59 +-
.../org/apache/samza/execution/TestJobNode.java | 14 +-
.../apache/samza/execution/TestStreamEdge.java | 4 +-
.../samza/execution/TestStreamManager.java | 24 +-
.../samza/operators/TestJoinOperator.java | 152 ++---
.../samza/operators/TestMessageStreamImpl.java | 55 +-
.../samza/operators/TestOperatorSpecGraph.java | 185 ++++++
.../samza/operators/TestStreamGraphImpl.java | 601 -------------------
.../samza/operators/TestStreamGraphSpec.java | 601 +++++++++++++++++++
.../data/TestOutputMessageEnvelope.java | 14 +
.../impl/TestControlMessageSender.java | 32 +-
.../samza/operators/impl/TestOperatorImpl.java | 6 +
.../operators/impl/TestOperatorImplGraph.java | 298 ++++++---
.../operators/impl/TestStreamOperatorImpl.java | 4 +-
.../operators/impl/TestWindowOperator.java | 263 ++++----
.../operators/impl/store/TestInMemoryStore.java | 16 +
.../operators/spec/OperatorSpecTestUtils.java | 141 +++++
.../samza/operators/spec/TestOperatorSpec.java | 465 ++++++++++++++
.../spec/TestPartitionByOperatorSpec.java | 165 +++++
.../operators/spec/TestWindowOperatorSpec.java | 306 +++++++++-
.../runtime/TestAbstractApplicationRunner.java | 36 +-
.../runtime/TestApplicationRunnerMain.java | 2 +
.../runtime/TestLocalApplicationRunner.java | 46 +-
.../runtime/TestRemoteApplicationRunner.java | 53 ++
.../model/TestSamzaObjectMapper.java | 33 +-
.../system/inmemory/TestInMemorySystem.java | 211 +++++++
.../apache/samza/table/TestTableManager.java | 14 +-
.../samza/table/caching/TestCachingTable.java | 299 +++++++++
.../table/remote/TestRemoteTableDescriptor.java | 244 ++++++++
.../apache/samza/task/IdentityStreamTask.java | 55 ++
.../org/apache/samza/task/TestAsyncRunLoop.java | 333 +++++-----
.../samza/task/TestSystemTimerScheduler.java | 176 ++++++
.../apache/samza/task/TestTaskFactoryUtil.java | 64 +-
.../testUtils/InvalidStreamApplication.java | 25 -
.../util/TestEmbeddedTaggedRateLimiter.java | 230 +++++++
.../org/apache/samza/util/TestMathUtils.java | 43 +-
.../org/apache/samza/util/TestShutdownUtil.java | 63 ++
.../samza/zk/TestScheduleAfterDebounceTime.java | 59 +-
.../zk/TestZkBarrierForVersionUpgrade.java | 280 ++++-----
.../apache/samza/zk/TestZkJobCoordinator.java | 39 +-
.../apache/samza/zk/TestZkLeaderElector.java | 2 +-
.../apache/samza/zk/TestZkProcessorLatch.java | 6 +-
.../java/org/apache/samza/zk/TestZkUtils.java | 119 +++-
.../samza/checkpoint/TestOffsetManager.scala | 28 +-
.../samza/config/TestSerializerConfig.scala | 57 ++
.../samza/container/TestSamzaContainer.scala | 29 +-
.../samza/container/TestTaskInstance.scala | 4 +-
.../samza/coordinator/TestJobCoordinator.scala | 77 ++-
.../TestStreamPartitionCountMonitor.scala | 14 +-
.../coordinator/server/TestHttpServer.scala | 7 +-
.../apache/samza/job/local/TestProcessJob.scala | 133 +++-
.../processor/StreamProcessorTestUtils.scala | 6 +-
.../samza/serializers/TestSerdeManager.scala | 3 +
.../samza/storage/TestTaskStorageManager.scala | 46 +-
.../samza/system/TestStreamMetadataCache.scala | 101 ++--
.../chooser/TestBootstrappingChooser.scala | 94 ++-
.../system/chooser/TestDefaultChooser.scala | 17 +-
.../samza/util/TestDaemonThreadFactory.scala | 37 --
.../org/apache/samza/util/TestFileUtil.scala | 84 +++
.../scala/org/apache/samza/util/TestUtil.scala | 100 +--
.../ElasticsearchSystemFactory.java | 2 +-
.../samza/system/hdfs/HdfsSystemAdmin.java | 2 +-
.../samza/system/hdfs/HdfsSystemConsumer.java | 2 +-
.../hdfs/partitioner/HdfsFileSystemAdapter.java | 7 +-
.../apache/samza/system/hdfs/HdfsConfig.scala | 4 +-
.../samza/system/hdfs/HdfsSystemProducer.scala | 4 +-
.../hdfs/writer/AvroDataFileHdfsWriter.scala | 16 +-
.../system/hdfs/TestHdfsSystemConsumer.java | 2 +-
.../partitioner/TestHdfsFileSystemAdapter.java | 2 +-
.../resources/partitioner/subfolder/testfile002 | 16 +
.../kafka/KafkaCheckpointLogKeySerde.java | 5 +
.../samza/system/kafka/KafkaStreamSpec.java | 18 +-
.../kafka/KafkaCheckpointManager.scala | 91 ++-
.../kafka/KafkaCheckpointManagerFactory.scala | 2 +-
.../org/apache/samza/config/KafkaConfig.scala | 24 +-
.../apache/samza/system/kafka/BrokerProxy.scala | 25 +-
.../apache/samza/system/kafka/GetOffset.scala | 4 +-
.../samza/system/kafka/KafkaSystemAdmin.scala | 72 ++-
.../system/kafka/KafkaSystemConsumer.scala | 2 +
.../samza/system/kafka/KafkaSystemFactory.scala | 10 +-
.../system/kafka/KafkaSystemProducer.scala | 4 +-
.../samza/system/kafka/TopicMetadataCache.scala | 2 +-
.../scala/org/apache/samza/util/KafkaUtil.scala | 8 +-
.../kafka/TestKafkaCheckpointManagerJava.java | 45 +-
.../samza/system/kafka/MockKafkaProducer.java | 25 +-
.../samza/system/kafka/TestKafkaStreamSpec.java | 3 +-
.../system/kafka/TestKafkaSystemAdminJava.java | 44 +-
.../kafka/TestKafkaCheckpointManager.scala | 38 +-
.../apache/samza/config/TestKafkaConfig.scala | 10 +-
.../samza/system/kafka/TestBrokerProxy.scala | 9 +-
.../system/kafka/TestKafkaSystemAdmin.scala | 54 +-
.../system/kafka/TestKafkaSystemConsumer.scala | 4 +-
.../system/kafka/TestTopicMetadataCache.scala | 32 +-
.../org/apache/samza/utils/TestKafkaUtil.scala | 7 +-
.../kv/inmemory/InMemoryKeyValueStore.scala | 14 +-
.../kv/inmemory/TestInMemoryKeyValueStore.java | 84 +++
.../samza/storage/kv/RocksDbKeyValueReader.java | 5 +-
.../samza/storage/kv/RocksDbOptionsHelper.java | 12 +-
.../RocksDbKeyValueStorageEngineFactory.scala | 3 +
.../samza/storage/kv/RocksDbKeyValueStore.scala | 76 ++-
.../kv/TestRocksDbKeyValueStoreJava.java | 140 +++++
.../storage/kv/TestRocksDbKeyValueStore.scala | 6 +-
.../kv/BaseLocalStoreBackedTableProvider.java | 54 +-
.../kv/LocalStoreBackedReadWriteTable.java | 10 +-
.../kv/LocalStoreBackedReadableTable.java | 8 +-
.../samza/storage/kv/AccessLoggedStore.scala | 9 +-
.../kv/BaseKeyValueStorageEngineFactory.scala | 8 +-
.../apache/samza/storage/kv/CachedStore.scala | 4 +
.../storage/kv/KeyValueStorageEngine.scala | 52 +-
.../kv/KeyValueStorageEngineMetrics.scala | 21 +-
.../samza/storage/kv/KeyValueStoreMetrics.scala | 5 +-
.../apache/samza/storage/kv/LoggedStore.scala | 3 +
.../storage/kv/NullSafeKeyValueStore.scala | 14 +-
.../storage/kv/SerializedKeyValueStore.scala | 15 +
.../TestLocalBaseStoreBackedTableProvider.java | 6 +-
.../samza/storage/kv/MockKeyValueStore.scala | 4 +
.../storage/kv/TestKeyValueStorageEngine.scala | 6 +-
.../samza/logging/log4j/StreamAppender.java | 69 ++-
.../samza/logging/log4j/MockSystemAdmin.java | 74 +++
.../samza/logging/log4j/MockSystemFactory.java | 2 +-
.../samza/logging/log4j/TestStreamAppender.java | 57 ++
.../samza/monitor/LocalStoreMonitorConfig.java | 4 +-
.../org/apache/samza/monitor/MonitorConfig.java | 8 +
.../org/apache/samza/monitor/MonitorLoader.java | 4 +-
.../samza/monitor/SamzaMonitorService.java | 8 +-
.../apache/samza/rest/SamzaRestApplication.java | 4 +-
.../samza/rest/proxy/job/AbstractJobProxy.java | 8 +-
.../rest/proxy/job/SimpleYarnJobProxy.java | 5 +-
.../samza/rest/proxy/task/SamzaTaskProxy.java | 14 +-
.../rest/proxy/task/SamzaTaskProxyFactory.java | 7 +-
.../samza/rest/resources/TasksResource.java | 9 +-
.../samza/monitor/TestLocalStoreMonitor.java | 84 +--
.../apache/samza/sql/avro/AvroRelConverter.java | 239 +++++---
.../samza/sql/avro/AvroTypeFactoryImpl.java | 9 +-
.../samza/sql/data/SamzaSqlCompositeKey.java | 82 +++
.../sql/data/SamzaSqlExecutionContext.java | 24 +-
.../samza/sql/data/SamzaSqlRelMessage.java | 175 ++++--
.../org/apache/samza/sql/fn/FlattenUdf.java | 2 +-
.../org/apache/samza/sql/fn/RegexMatchUdf.java | 39 ++
.../sql/impl/ConfigBasedIOResolverFactory.java | 125 ++++
.../impl/ConfigBasedSourceResolverFactory.java | 71 ---
.../samza/sql/impl/ConfigBasedUdfResolver.java | 13 +-
.../interfaces/RelSchemaProviderFactory.java | 3 +-
.../interfaces/SamzaRelConverterFactory.java | 7 +-
.../samza/sql/interfaces/SourceResolver.java | 34 --
.../sql/interfaces/SourceResolverFactory.java | 36 --
.../samza/sql/interfaces/SqlIOConfig.java | 136 +++++
.../samza/sql/interfaces/SqlIOResolver.java | 45 ++
.../sql/interfaces/SqlIOResolverFactory.java | 36 ++
.../sql/interfaces/SqlSystemStreamConfig.java | 74 ---
.../apache/samza/sql/planner/QueryPlanner.java | 45 +-
.../sql/planner/SamzaSqlScalarFunctionImpl.java | 7 +-
.../sql/runner/SamzaSqlApplicationConfig.java | 83 +--
.../sql/runner/SamzaSqlApplicationRunner.java | 23 +-
.../SamzaSqlRelMessageSerdeFactory.java | 67 +++
.../SamzaSqlRelRecordSerdeFactory.java | 67 +++
.../samza/sql/testutil/SamzaSqlQueryParser.java | 67 +--
.../samza/sql/translator/FilterTranslator.java | 47 +-
.../samza/sql/translator/JoinTranslator.java | 294 +++++++++
.../translator/LogicalAggregateTranslator.java | 102 ++++
.../samza/sql/translator/ProjectTranslator.java | 71 ++-
.../samza/sql/translator/QueryTranslator.java | 103 +++-
.../SamzaSqlRelMessageJoinFunction.java | 121 ++++
.../samza/sql/translator/ScanTranslator.java | 52 +-
.../samza/sql/translator/TranslatorContext.java | 79 ++-
.../apache/samza/sql/TestQueryTranslator.java | 103 ----
.../sql/TestSamzaSqlApplicationConfig.java | 92 ---
.../samza/sql/TestSamzaSqlFileParser.java | 58 --
.../samza/sql/TestSamzaSqlQueryParser.java | 70 ---
.../samza/sql/TestSamzaSqlRelMessage.java | 46 --
.../samza/sql/TestSamzaSqlRelMessageSerde.java | 102 ++++
.../samza/sql/TestSamzaSqlRelRecordSerde.java | 86 +++
.../samza/sql/avro/TestAvroRelConversion.java | 132 +++-
.../samza/sql/avro/schemas/AddressRecord.java | 52 ++
.../apache/samza/sql/avro/schemas/Company.avsc | 39 ++
.../apache/samza/sql/avro/schemas/Company.java | 52 ++
.../sql/avro/schemas/EnrichedPageView.avsc | 81 +++
.../sql/avro/schemas/EnrichedPageView.java | 60 ++
.../org/apache/samza/sql/avro/schemas/Kind.java | 30 +
.../apache/samza/sql/avro/schemas/PageView.avsc | 39 ++
.../apache/samza/sql/avro/schemas/PageView.java | 52 ++
.../samza/sql/avro/schemas/PageViewCount.avsc | 45 ++
.../samza/sql/avro/schemas/PageViewCount.java | 56 ++
.../samza/sql/avro/schemas/PhoneNumber.java | 50 ++
.../apache/samza/sql/avro/schemas/Profile.avsc | 149 +++++
.../apache/samza/sql/avro/schemas/Profile.java | 72 +++
.../samza/sql/avro/schemas/SimpleRecord.avsc | 2 +-
.../samza/sql/avro/schemas/SimpleRecord.java | 2 +-
.../samza/sql/avro/schemas/StreetNumRecord.java | 48 ++
.../samza/sql/data/TestSamzaSqlRelMessage.java | 46 ++
.../samza/sql/e2e/TestSamzaSqlEndToEnd.java | 137 -----
.../apache/samza/sql/e2e/TestSamzaSqlTable.java | 69 +++
.../runner/TestSamzaSqlApplicationConfig.java | 95 +++
.../runner/TestSamzaSqlApplicationRunner.java | 56 ++
.../samza/sql/system/SimpleSystemAdmin.java | 11 +-
.../samza/sql/system/TestAvroSystemFactory.java | 186 +++++-
.../samza/sql/testutil/MyTestArrayUdf.java | 5 +-
.../apache/samza/sql/testutil/MyTestUdf.java | 2 +-
.../samza/sql/testutil/SamzaSqlTestConfig.java | 97 ++-
.../sql/testutil/TestIOResolverFactory.java | 196 ++++++
.../sql/testutil/TestSamzaSqlFileParser.java | 58 ++
.../sql/testutil/TestSamzaSqlQueryParser.java | 75 +++
.../sql/translator/TestFilterTranslator.java | 136 +++++
.../sql/translator/TestJoinTranslator.java | 191 ++++++
.../sql/translator/TestProjectTranslator.java | 289 +++++++++
.../sql/translator/TestQueryTranslator.java | 596 ++++++++++++++++++
.../TestSamzaSqlRelMessageJoinFunction.java | 118 ++++
.../sql/translator/TranslatorTestBase.java | 72 +++
samza-sql/src/test/resources/log4j.xml | 6 +
samza-test/src/main/config/join/README | 8 +-
.../example/AppWithGlobalConfigExample.java | 86 +++
.../apache/samza/example/BroadcastExample.java | 70 +++
.../samza/example/KeyValueStoreExample.java | 138 +++++
.../org/apache/samza/example/MergeExample.java | 62 ++
.../samza/example/OrderShipmentJoinExample.java | 121 ++++
.../samza/example/PageViewCounterExample.java | 100 +++
.../samza/example/RepartitionExample.java | 96 +++
.../org/apache/samza/example/WindowExample.java | 86 +++
.../samza/test/framework/StreamAssert.java | 181 ++++++
samza-test/src/main/python/configs/tests.json | 2 +-
.../performance/TestKeyValuePerformance.scala | 6 +-
.../samza/processor/TestZkStreamProcessor.java | 11 +-
.../processor/TestZkStreamProcessorBase.java | 8 +-
.../TestZkStreamProcessorFailures.java | 8 +-
.../EndOfStreamIntegrationTest.java | 8 +-
.../WatermarkIntegrationTest.java | 7 +-
.../samza/test/operator/BroadcastAssertApp.java | 59 ++
.../test/operator/RepartitionJoinWindowApp.java | 80 ++-
.../test/operator/RepartitionWindowApp.java | 72 +++
.../samza/test/operator/SessionWindowApp.java | 21 +-
...StreamApplicationIntegrationTestHarness.java | 21 +-
.../operator/TestRepartitionJoinWindowApp.java | 120 +++-
.../test/operator/TestRepartitionWindowApp.java | 90 +++
.../samza/test/operator/TumblingWindowApp.java | 20 +-
.../samza/test/operator/data/PageView.java | 63 +-
.../test/processor/SharedContextFactories.java | 117 ++++
.../test/processor/TestStreamApplication.java | 148 +++++
.../test/processor/TestStreamProcessor.java | 9 +-
.../processor/TestZkLocalApplicationRunner.java | 363 +++++------
.../test/samzasql/TestSamzaSqlEndToEnd.java | 469 +++++++++++++++
.../apache/samza/test/table/TestLocalTable.java | 257 +++++---
.../samza/test/table/TestRemoteTable.java | 248 ++++++++
.../apache/samza/test/timer/TestTimerApp.java | 87 +++
.../org/apache/samza/test/timer/TimerTest.java | 51 ++
.../samza/storage/kv/TestKeyValueStores.scala | 240 ++++----
.../AbstractIntegrationTestHarness.scala | 8 +-
.../AbstractKafkaServerTestHarness.scala | 11 +-
.../harness/AbstractZookeeperTestHarness.scala | 10 +-
.../test/integration/StreamTaskTestUtil.scala | 39 +-
.../integration/TestShutdownStatefulTask.scala | 2 +-
.../test/integration/TestStatefulTask.scala | 4 +-
samza-tools/config/bench-log4j.xml | 35 ++
samza-tools/config/eh-bench.properties | 26 +
samza-tools/scripts/eh-consumer.sh | 2 +-
samza-tools/scripts/generate-kafka-events.sh | 2 +-
samza-tools/scripts/samza-sql-console.sh | 2 +-
samza-tools/scripts/system-consumer-bench.sh | 34 ++
.../scripts/system-consumer-with-samza-bench.sh | 34 ++
samza-tools/scripts/system-producer-bench.sh | 34 ++
.../tools/ConsoleLoggingSystemFactory.java | 27 +-
.../samza/tools/EventHubConsoleConsumer.java | 64 +-
.../apache/samza/tools/GenerateKafkaEvents.java | 4 +-
.../org/apache/samza/tools/SamzaSqlConsole.java | 40 +-
.../tools/avro/AvroSchemaGenRelConverter.java | 4 +-
.../tools/benchmark/AbstractSamzaBench.java | 153 +++++
.../benchmark/ConfigBasedSspGrouperFactory.java | 87 +++
.../tools/benchmark/SystemConsumerBench.java | 91 +++
.../benchmark/SystemConsumerWithSamzaBench.java | 117 ++++
.../tools/benchmark/SystemProducerBench.java | 124 ++++
.../tools/json/JsonRelConverterFactory.java | 4 +-
.../apache/samza/tools/udf/RegexMatchUdf.java | 40 --
.../job/yarn/YarnClusterResourceManager.java | 38 +-
.../samza/validation/YarnJobValidationTool.java | 15 +-
.../webapp/ApplicationMasterRestClient.java | 111 ++++
.../apache/samza/job/yarn/ClientHelper.scala | 54 +-
.../yarn/SamzaAppMasterSecurityManager.scala | 31 +-
.../yarn/SamzaContainerSecurityManager.scala | 34 +-
.../apache/samza/job/yarn/YarnContainer.scala | 4 +-
.../org/apache/samza/job/yarn/YarnJob.scala | 14 +-
.../webapp/ApplicationMasterRestServlet.scala | 76 ++-
.../webapp/TestApplicationMasterRestClient.java | 330 ++++++++++
.../TestYarnContainerHeartbeatServlet.java | 8 +-
.../samza/job/yarn/TestClientHelper.scala | 36 +-
.../yarn/TestSamzaYarnAppMasterService.scala | 16 +-
settings.gradle | 4 +-
sonar-project.properties | 6 +-
565 files changed, 25625 insertions(+), 8630 deletions(-)
----------------------------------------------------------------------
[14/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/7887d884
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/7887d884
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/7887d884
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 7887d884e8a062563a7e4b8b418d817828f51f23
Parents: 88f8559 a8ddede
Author: Boris S <bo...@apache.org>
Authored: Sun Aug 12 23:48:39 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Sun Aug 12 23:48:39 2018 -0700
----------------------------------------------------------------------
.../java/org/apache/samza/config/Config.java | 3 +
.../org/apache/samza/metrics/ListGauge.java | 3 +-
.../org/apache/samza/runtime/LocationId.java | 60 +++
.../samza/runtime/LocationIdProvider.java | 28 ++
.../runtime/LocationIdProviderFactory.java | 28 ++
.../org/apache/samza/table/ReadWriteTable.java | 43 +-
.../org/apache/samza/table/ReadableTable.java | 19 +
.../org/apache/samza/config/TestConfig.java | 41 ++
.../DefaultLocationIdProviderFactory.java | 32 ++
.../samza/runtime/RemoteApplicationRunner.java | 69 +---
.../storage/TaskSideInputStorageManager.java | 38 +-
.../samza/system/inmemory/InMemoryManager.java | 10 +-
.../system/inmemory/InMemorySystemAdmin.java | 6 +-
.../system/inmemory/InMemorySystemFactory.java | 2 +-
.../samza/table/caching/CachingTable.java | 237 ++++++++---
.../table/caching/CachingTableDescriptor.java | 14 -
.../table/caching/CachingTableProvider.java | 4 +-
.../table/caching/guava/GuavaCacheTable.java | 140 ++++++-
.../samza/table/remote/CreditFunction.java | 36 --
.../table/remote/RemoteReadWriteTable.java | 157 ++++---
.../samza/table/remote/RemoteReadableTable.java | 243 ++++++++---
.../table/remote/RemoteTableDescriptor.java | 47 ++-
.../samza/table/remote/RemoteTableProvider.java | 73 +++-
.../samza/table/remote/TableRateLimiter.java | 167 ++++++++
.../samza/table/remote/TableReadFunction.java | 54 ++-
.../samza/table/remote/TableWriteFunction.java | 86 +++-
.../table/utils/DefaultTableReadMetrics.java | 2 +
.../table/utils/DefaultTableWriteMetrics.java | 4 +
.../org/apache/samza/config/JobConfig.scala | 5 +
.../MetricsSnapshotReporterFactory.scala | 4 +-
.../runtime/TestRemoteApplicationRunner.java | 70 ++++
.../TestTaskSideInputStorageManager.java | 295 +++++++++++++
.../samza/table/caching/TestCachingTable.java | 275 +++++++-----
.../samza/table/remote/TestRemoteTable.java | 413 +++++++++++++++++++
.../table/remote/TestRemoteTableDescriptor.java | 55 +--
.../table/remote/TestTableRateLimiter.java | 103 +++++
.../samza/storage/kv/RocksDbTableProvider.java | 4 +
.../kv/LocalStoreBackedReadWriteTable.java | 49 +++
.../kv/LocalStoreBackedReadableTable.java | 23 ++
.../sql/impl/ConfigBasedIOResolverFactory.java | 4 +-
.../sql/interfaces/SqlIOResolverFactory.java | 5 +-
.../sql/runner/SamzaSqlApplicationConfig.java | 2 +-
.../sql/testutil/TestIOResolverFactory.java | 34 +-
.../apache/samza/test/table/TestLocalTable.java | 61 +++
.../table/TestLocalTableWithSideInputs.java | 161 ++++++++
.../samza/test/table/TestRemoteTable.java | 40 +-
.../apache/samza/test/table/TestTableData.java | 22 +-
.../table/TestTableDescriptorsProvider.java | 3 +-
48 files changed, 2722 insertions(+), 552 deletions(-)
----------------------------------------------------------------------
[38/47] samza git commit: debug
Posted by bo...@apache.org.
debug
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/ddada94d
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/ddada94d
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/ddada94d
Branch: refs/heads/NewKafkaSystemConsumer
Commit: ddada94d09a8ac78ec7a88eff9dc77cd39dba32d
Parents: 2655221
Author: Boris S <bo...@apache.org>
Authored: Mon Sep 10 16:28:12 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Mon Sep 10 16:28:12 2018 -0700
----------------------------------------------------------------------
.../org/apache/samza/system/kafka/KafkaConsumerProxy.java | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/ddada94d/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 0825c90..92f9183 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -100,7 +100,7 @@ public class KafkaConsumerProxy<K, V> {
consumerPollThread.start();
// we need to wait until the thread starts
- while (!isRunning) {
+ while (!isRunning && failureCause == null) {
try {
consumerPollThreadStartLatch.await(3000, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
@@ -378,9 +378,8 @@ public class KafkaConsumerProxy<K, V> {
kafkaConsumerMetrics.incClientReads(metricName);
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> response;
- if (LOG.isDebugEnabled()) {
- LOG.debug("pollConsumer from following SSPs: {}; total#={}", SSPsToFetch, SSPsToFetch.size());
- }
+ LOG.debug("pollConsumer from following SSPs: {}; total#={}", SSPsToFetch, SSPsToFetch.size());
+
response = pollConsumer(SSPsToFetch, 500); // TODO should be default value from ConsumerConfig
// move the responses into the queue
[29/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/add733b8
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/add733b8
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/add733b8
Branch: refs/heads/NewKafkaSystemConsumer
Commit: add733b85f78046badd9af36ebf533d19388151c
Parents: 8ab04b2 b0b2922
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 4 17:23:06 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 4 17:23:06 2018 -0700
----------------------------------------------------------------------
.../org/apache/samza/execution/JobNode.java | 5 +++++
.../runtime/AbstractApplicationRunner.java | 17 ++++++++-------
.../sql/runner/SamzaSqlApplicationConfig.java | 6 ++++--
.../runner/TestSamzaSqlApplicationConfig.java | 22 +++++++++++++++++++-
.../sql/testutil/TestSamzaSqlFileParser.java | 1 -
.../table/TestLocalTableWithSideInputs.java | 3 ++-
6 files changed, 41 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
[35/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/728dc181
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/728dc181
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/728dc181
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 728dc18143618b80df6e74a373c0024ced34544b
Parents: add733b abf49ea
Author: Boris S <bo...@apache.org>
Authored: Fri Sep 7 15:17:47 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Sep 7 15:17:47 2018 -0700
----------------------------------------------------------------------
.../application/ApplicationDescriptor.java | 80 +++
.../samza/application/SamzaApplication.java | 40 ++
.../samza/application/StreamApplication.java | 75 +--
.../StreamApplicationDescriptor.java | 113 ++++
.../samza/application/TaskApplication.java | 86 +++
.../application/TaskApplicationDescriptor.java | 64 ++
.../java/org/apache/samza/config/Config.java | 3 +-
.../samza/metrics/MetricsReporterFactory.java | 5 +-
.../apache/samza/operators/MessageStream.java | 9 +-
.../org/apache/samza/operators/StreamGraph.java | 120 ----
.../operators/functions/ClosableFunction.java | 7 +-
.../operators/functions/InitableFunction.java | 6 +-
.../operators/functions/StreamExpander.java | 16 +-
.../apache/samza/runtime/ApplicationRunner.java | 92 +--
.../samza/runtime/ApplicationRunners.java | 82 +++
.../apache/samza/runtime/ProcessorContext.java | 31 +
.../runtime/ProcessorLifecycleListener.java | 55 ++
.../ProcessorLifecycleListenerFactory.java | 40 ++
.../samza/task/AsyncStreamTaskFactory.java | 10 +-
.../apache/samza/task/StreamTaskFactory.java | 6 +-
.../java/org/apache/samza/task/TaskFactory.java | 38 ++
.../samza/runtime/TestApplicationRunners.java | 88 +++
.../application/ApplicationDescriptorImpl.java | 179 ++++++
.../application/ApplicationDescriptorUtil.java | 51 ++
.../samza/application/ApplicationUtil.java | 63 ++
.../application/LegacyTaskApplication.java | 37 ++
.../StreamApplicationDescriptorImpl.java | 381 ++++++++++++
.../TaskApplicationDescriptorImpl.java | 129 ++++
.../samza/container/SamzaContainerListener.java | 22 +-
.../samza/execution/ExecutionPlanner.java | 7 +-
.../org/apache/samza/execution/JobGraph.java | 6 -
.../org/apache/samza/execution/JobPlanner.java | 188 ++++++
.../apache/samza/execution/LocalJobPlanner.java | 134 +++++
.../samza/execution/RemoteJobPlanner.java | 96 +++
.../samza/operators/MessageStreamImpl.java | 57 +-
.../samza/operators/OperatorSpecGraph.java | 26 +-
.../apache/samza/operators/StreamGraphSpec.java | 336 -----------
.../samza/operators/spec/OperatorSpec.java | 2 +-
.../stream/IntermediateMessageStreamImpl.java | 6 +-
.../apache/samza/processor/StreamProcessor.java | 122 ++--
.../StreamProcessorLifecycleListener.java | 49 --
.../runtime/AbstractApplicationRunner.java | 135 -----
.../samza/runtime/ApplicationRunnerMain.java | 42 +-
.../samza/runtime/LocalApplicationRunner.java | 355 ++++-------
.../samza/runtime/LocalContainerRunner.java | 56 +-
.../samza/runtime/RemoteApplicationRunner.java | 123 ++--
.../apache/samza/task/StreamOperatorTask.java | 5 +-
.../org/apache/samza/task/TaskFactoryUtil.java | 137 ++---
.../apache/samza/container/SamzaContainer.scala | 16 +-
.../scala/org/apache/samza/job/JobRunner.scala | 2 -
.../samza/job/local/ThreadJobFactory.scala | 48 +-
.../application/MockStreamApplication.java | 29 +
.../samza/application/TestApplicationUtil.java | 96 +++
.../TestStreamApplicationDescriptorImpl.java | 584 +++++++++++++++++++
.../TestTaskApplicationDescriptorImpl.java | 144 +++++
.../samza/execution/TestExecutionPlanner.java | 192 +++---
.../execution/TestJobGraphJsonGenerator.java | 120 ++--
.../org/apache/samza/execution/TestJobNode.java | 53 +-
.../samza/execution/TestLocalJobPlanner.java | 211 +++++++
.../samza/execution/TestRemoteJobPlanner.java | 88 +++
.../samza/operators/TestJoinOperator.java | 103 ++--
.../samza/operators/TestMessageStreamImpl.java | 29 +-
.../samza/operators/TestOperatorSpecGraph.java | 19 +-
.../samza/operators/TestStreamGraphSpec.java | 506 ----------------
.../operators/impl/TestOperatorImplGraph.java | 190 +++---
.../operators/impl/TestWindowOperator.java | 147 ++---
.../spec/TestPartitionByOperatorSpec.java | 70 ++-
.../samza/processor/TestStreamProcessor.java | 139 +++--
.../runtime/TestApplicationRunnerMain.java | 47 +-
.../runtime/TestLocalApplicationRunner.java | 311 +++-------
.../runtime/TestRemoteApplicationRunner.java | 35 +-
.../apache/samza/task/MockAsyncStreamTask.java | 31 +
.../org/apache/samza/task/MockStreamTask.java | 31 +
.../apache/samza/task/TestTaskFactoryUtil.java | 215 ++-----
.../samza/testUtils/TestAsyncStreamTask.java | 35 --
.../samza/testUtils/TestStreamApplication.java | 33 --
.../apache/samza/testUtils/TestStreamTask.java | 34 --
.../samza/container/TestSamzaContainer.scala | 76 ++-
.../samza/sql/runner/SamzaSqlApplication.java | 13 +-
.../sql/runner/SamzaSqlApplicationRunner.java | 53 +-
.../samza/sql/translator/JoinTranslator.java | 2 +-
.../samza/sql/translator/QueryTranslator.java | 27 +-
.../samza/sql/translator/ScanTranslator.java | 8 +-
.../samza/sql/translator/TranslatorContext.java | 19 +-
.../apache/samza/sql/e2e/TestSamzaSqlTable.java | 8 +-
.../runner/TestSamzaSqlApplicationRunner.java | 2 -
.../sql/translator/TestFilterTranslator.java | 6 +-
.../sql/translator/TestJoinTranslator.java | 16 +-
.../sql/translator/TestProjectTranslator.java | 14 +-
.../sql/translator/TestQueryTranslator.java | 162 +++--
.../example/AppWithGlobalConfigExample.java | 25 +-
.../apache/samza/example/BroadcastExample.java | 22 +-
.../samza/example/KeyValueStoreExample.java | 19 +-
.../org/apache/samza/example/MergeExample.java | 18 +-
.../samza/example/OrderShipmentJoinExample.java | 19 +-
.../samza/example/PageViewCounterExample.java | 15 +-
.../samza/example/RepartitionExample.java | 19 +-
.../samza/example/TaskApplicationExample.java | 77 +++
.../org/apache/samza/example/WindowExample.java | 18 +-
.../samza/system/mock/MockSystemConsumer.java | 4 +-
.../apache/samza/test/framework/TestRunner.java | 41 +-
.../integration/LocalApplicationRunnerMain.java | 21 +-
.../TestStandaloneIntegrationApplication.java | 9 +-
.../processor/TestZkStreamProcessorBase.java | 20 +-
.../EndOfStreamIntegrationTest.java | 37 +-
.../WatermarkIntegrationTest.java | 62 +-
.../test/framework/BroadcastAssertApp.java | 7 +-
.../StreamApplicationIntegrationTest.java | 9 +-
...StreamApplicationIntegrationTestHarness.java | 42 +-
.../samza/test/framework/TestTimerApp.java | 7 +-
.../apache/samza/test/framework/TimerTest.java | 18 +-
.../test/operator/RepartitionJoinWindowApp.java | 25 +-
.../test/operator/RepartitionWindowApp.java | 20 +-
.../samza/test/operator/SessionWindowApp.java | 17 +-
.../operator/TestRepartitionJoinWindowApp.java | 30 +-
.../test/operator/TestRepartitionWindowApp.java | 10 +-
.../samza/test/operator/TumblingWindowApp.java | 16 +-
.../test/processor/TestStreamApplication.java | 82 +--
.../test/processor/TestStreamProcessor.java | 18 +-
.../processor/TestZkLocalApplicationRunner.java | 317 +++++-----
.../apache/samza/test/table/TestLocalTable.java | 39 +-
.../table/TestLocalTableWithSideInputs.java | 13 +-
.../samza/test/table/TestRemoteTable.java | 27 +-
.../benchmark/SystemConsumerWithSamzaBench.java | 14 +-
124 files changed, 5280 insertions(+), 3631 deletions(-)
----------------------------------------------------------------------
[18/47] samza git commit: Added new KafkaProxy and KafkaConsumer for
default KafkaSystem
Posted by bo...@apache.org.
Added new KafkaProxy and KafkaConsumer for default KafkaSystem
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/72544606
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/72544606
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/72544606
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 72544606bfffc67aeaa7f509ca54cfd6db52e2b4
Parents: 4801709
Author: Boris S <bo...@apache.org>
Authored: Fri Aug 17 18:08:52 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Aug 17 18:08:52 2018 -0700
----------------------------------------------------------------------
.../clients/consumer/KafkaConsumerConfig.java | 152 ++++++
.../samza/system/kafka/KafkaConsumerProxy.java | 463 +++++++++++++++++++
.../samza/system/kafka/KafkaSystemFactory.scala | 54 ++-
.../system/kafka/NewKafkaSystemConsumer.java | 403 ++++++++++++++++
.../kafka/TestKafkaCheckpointManager.scala | 8 +-
5 files changed, 1064 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/72544606/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
new file mode 100644
index 0000000..97360e2
--- /dev/null
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -0,0 +1,152 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.kafka.clients.consumer;
+
+import java.util.Map;
+import java.util.Properties;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.ConfigException;
+import org.apache.samza.config.JobConfig;
+import scala.Option;
+
+
+/**
+ * The configuration class for KafkaConsumer
+ */
+public class KafkaConsumerConfig extends ConsumerConfig {
+
+ private static final String PRODUCER_CLIENT_ID_PREFIX = "kafka-producer";
+ private static final String CONSUMER_CLIENT_ID_PREFIX = "kafka-consumer";
+ private static final String SAMZA_OFFSET_LARGEST = "largest";
+ private static final String SAMZA_OFFSET_SMALLEST = "smallest";
+ private static final String KAFKA_OFFSET_LATEST = "latest";
+ private static final String KAFKA_OFFSET_EARLIEST = "earliest";
+ /*
+ * By default, KafkaConsumer will fetch ALL available messages for all the partitions.
+ * This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
+ */
+ private static final String KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT = "100";
+
+
+ public KafkaConsumerConfig(Properties props) {
+ super(props);
+ }
+
+ public static KafkaConsumerConfig getKafkaSystemConsumerConfig(Config config,
+ String systemName, String clientId, Map<String, String> injectProps) {
+
+ Config subConf = config.subset(String.format("systems.%s.consumer.", systemName), true);
+
+ String groupId = getConsumerGroupId(config);
+
+ Properties consumerProps = new Properties();
+ consumerProps.putAll(subConf);
+
+ consumerProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
+ consumerProps.setProperty(ConsumerConfig.CLIENT_ID_CONFIG, clientId);
+
+ /********************************************
+ * Open-source Kafka Consumer configuration *
+ *******************************************/
+ consumerProps.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); // Disable consumer auto-commit
+
+ consumerProps.setProperty(
+ ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
+ getAutoOffsetResetValue(consumerProps)); // Translate samza config value to kafka config value
+
+ // makesure bootstrap configs are in ?? SHOULD WE FAIL IF THEY ARE NOT?
+ if (! subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
+ // get it from the producer config
+ String bootstrapServer = config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
+ consumerProps.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer);
+ }
+
+ // Always use default partition assignment strategy. Do not allow override.
+ consumerProps.setProperty(
+ ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,
+ RangeAssignor.class.getName());
+
+
+ // NOT SURE THIS IS NEEDED TODO
+ String maxPollRecords = subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT);;
+ consumerProps.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
+
+ // put overrides
+ consumerProps.putAll(injectProps);
+
+ return new KafkaConsumerConfig(consumerProps);
+ }
+
+ // group id should be unique per job
+ static String getConsumerGroupId(Config config) {
+ JobConfig jobConfig = new JobConfig(config);
+ Option<String> jobIdOption = jobConfig.getJobId();
+ Option<String> jobNameOption = jobConfig.getName();
+ return (jobNameOption.isDefined()? jobNameOption.get() : "undefined_job_name") + "-"
+ + (jobIdOption.isDefined()? jobIdOption.get() : "undefined_job_id");
+ }
+ // client id should be unique per job
+ public static String getClientId(String id, Config config) {
+ if (config.get(JobConfig.JOB_NAME()) == null) {
+ throw new ConfigException("Missing job name");
+ }
+ String jobName = config.get(JobConfig.JOB_NAME());
+ String jobId = "1";
+ if (config.get(JobConfig.JOB_ID()) != null) {
+ jobId = config.get(JobConfig.JOB_ID());
+ }
+ return getClientId(id, jobName, jobId);
+ }
+
+ private static String getClientId(String id, String jobName, String jobId) {
+ return String.format(
+ "%s-%s-%s",
+ id.replaceAll("[^A-Za-z0-9]", "_"),
+ jobName.replaceAll("[^A-Za-z0-9]", "_"),
+ jobId.replaceAll("[^A-Za-z0-9]", "_"));
+ }
+
+ public static String getProducerClientId(Config config) {
+ return getClientId(PRODUCER_CLIENT_ID_PREFIX, config);
+ }
+
+ /**
+ * Settings for auto.reset in samza are different from settings in Kafka (auto.offset.reset) - need to convert
+ * "largest" -> "latest"
+ * "smallest" -> "earliest"
+ * "none" - will fail the kafka consumer, if offset is out of range
+ * @param properties All consumer related {@link Properties} parsed from samza config
+ * @return String representing the config value for "auto.offset.reset" property
+ */
+ static String getAutoOffsetResetValue(Properties properties) {
+ String autoOffsetReset = properties.getProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, KAFKA_OFFSET_LATEST);
+ switch (autoOffsetReset) {
+ case SAMZA_OFFSET_LARGEST:
+ return KAFKA_OFFSET_LATEST;
+ case SAMZA_OFFSET_SMALLEST:
+ return KAFKA_OFFSET_EARLIEST;
+ default:
+ return KAFKA_OFFSET_LATEST;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/samza/blob/72544606/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
new file mode 100644
index 0000000..66971af
--- /dev/null
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -0,0 +1,463 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.samza.system.kafka;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import kafka.common.KafkaException;
+import kafka.common.TopicAndPartition;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.InvalidOffsetException;
+import org.apache.kafka.common.Metric;
+import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.samza.SamzaException;
+import org.apache.samza.system.IncomingMessageEnvelope;
+import org.apache.samza.system.SystemStreamPartition;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Separate thread that reads messages from kafka and puts them int the BlockingEnvelopeMap
+ * This class is not thread safe. There will be only one instance of this class per LiKafkaSystemConsumer object
+ * We still need some synchronization around kafkaConsumer. See pollConsumer() method for details.
+ */
+public class KafkaConsumerProxy<K, V> {
+ private static final Logger LOG = LoggerFactory.getLogger(KafkaConsumerProxy.class);
+
+ private static final int SLEEP_MS_WHILE_NO_TOPIC_PARTITION = 100;
+
+ /* package private */ final Thread consumerPollThread;
+ private final Consumer<K, V> kafkaConsumer;
+ private final NewKafkaSystemConsumer.KafkaConsumerMessageSink sink;
+ private final KafkaSystemConsumerMetrics kafkaConsumerMetrics;
+ private final String metricName;
+ private final String systemName;
+ private final String clientId;
+ private final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
+ private final Map<SystemStreamPartition, MetricName> ssp2MetricName = new HashMap<>();
+ // list of all the SSPs we poll from with their next offsets correspondingly.
+ private final Map<SystemStreamPartition, Long> nextOffsets = new ConcurrentHashMap<>();
+ // lags behind the high water mark, as reported by the Kafka consumer.
+ private final Map<SystemStreamPartition, Long> latestLags = new HashMap<>();
+ private final NewKafkaSystemConsumer.ValueUnwrapper<V> valueUnwrapper;
+
+ private volatile boolean isRunning = false;
+ private volatile Throwable failureCause = null;
+ private CountDownLatch consumerPollThreadStartLatch = new CountDownLatch(1);
+
+ public KafkaConsumerProxy(Consumer<K, V> kafkaConsumer, String systemName, String clientId,
+ NewKafkaSystemConsumer.KafkaConsumerMessageSink messageSink, KafkaSystemConsumerMetrics samzaConsumerMetrics,
+ String metricName, NewKafkaSystemConsumer.ValueUnwrapper<V> valueUnwrapper) {
+
+ this.kafkaConsumer = kafkaConsumer;
+ this.systemName = systemName;
+ this.sink = messageSink;
+ this.kafkaConsumerMetrics = samzaConsumerMetrics;
+ this.metricName = metricName;
+ this.clientId = clientId;
+ this.valueUnwrapper = valueUnwrapper;
+
+ // TODO - see if we need new metrics (not host:port based)
+ this.kafkaConsumerMetrics.registerBrokerProxy(metricName, 0);
+
+ consumerPollThread = new Thread(createProxyThreadRunnable());
+ }
+
+ public void start() {
+ if (!consumerPollThread.isAlive()) {
+ LOG.info("Starting LiKafkaConsumerProxy polling thread for system " + systemName + " " + this.toString());
+ consumerPollThread.setDaemon(true);
+ consumerPollThread.setName(
+ "Samza LiKafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
+ consumerPollThread.start();
+
+ // we need to wait until the thread starts
+ while (!isRunning) {
+ try {
+ consumerPollThreadStartLatch.await(3000, TimeUnit.MILLISECONDS);
+ } catch (InterruptedException e) {
+ }
+ }
+ } else {
+ LOG.debug("Tried to start an already started LiKafkaConsumerProxy (%s). Ignoring.", this.toString());
+ }
+ }
+
+ // add new partition to the list of polled partitions
+ // this method is called only at the beginning, before the thread is started
+ public void addTopicPartition(SystemStreamPartition ssp, long nextOffset) {
+ LOG.info(String.format("Adding new topic and partition %s, offset = %s to queue for consumer %s", ssp, nextOffset,
+ this));
+ topicPartitions2SSP.put(NewKafkaSystemConsumer.toTopicPartition(ssp), ssp); //registered SSPs
+
+ // this is already vetted offset so there is no need to validate it
+ LOG.info(String.format("Got offset %s for new topic and partition %s.", nextOffset, ssp));
+
+ nextOffsets.put(ssp, nextOffset);
+
+ // we reuse existing metrics. They assume host and port for the broker
+ // for now fake the port with the consumer name
+ kafkaConsumerMetrics.setTopicPartitionValue(metricName, 0, nextOffsets.size());
+ }
+
+ /**
+ * creates a separate thread for pulling messages
+ */
+ private Runnable createProxyThreadRunnable() {
+ return () -> {
+ isRunning = true;
+
+ try {
+ consumerPollThreadStartLatch.countDown();
+ initializeLags();
+ while (isRunning) {
+ fetchMessages();
+ }
+ } catch (Throwable throwable) {
+ LOG.error(String.format("Error in LiKafkaConsumerProxy poll thread for system: %s.", systemName), throwable);
+ // SamzaLiKafkaSystemConsumer uses the failureCause to propagate the throwable to the container
+ failureCause = throwable;
+ isRunning = false;
+ }
+
+ if (!isRunning) {
+ LOG.info("Stopping the LiKafkaConsumerProxy poll thread for system: {}.", systemName);
+ }
+ };
+ }
+
+ private void initializeLags() {
+ // This is expensive, so only do it once at the beginning. After the first poll, we can rely on metrics for lag.
+ Map<TopicPartition, Long> endOffsets = kafkaConsumer.endOffsets(topicPartitions2SSP.keySet());
+ endOffsets.forEach((tp, offset) -> {
+ SystemStreamPartition ssp = topicPartitions2SSP.get(tp);
+ long startingOffset = nextOffsets.get(ssp);
+ // End offsets are the offset of the newest message + 1
+ // If the message we are about to consume is < end offset, we are starting with a lag.
+ long initialLag = endOffsets.get(tp) - startingOffset;
+
+ LOG.info("Initial lag is {} for SSP {}", initialLag, ssp);
+ latestLags.put(ssp, initialLag);
+ sink.setIsAtHighWatermark(ssp, initialLag == 0);
+ });
+
+ // initialize lag metrics
+ refreshLatencyMetrics();
+ }
+
+ // the actual polling of the messages from kafka
+ public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> pollConsumer(
+ Set<SystemStreamPartition> systemStreamPartitions, long timeout) {
+
+ if (topicPartitions2SSP.size() == 0) {
+ throw new SamzaException("cannot poll empty set of TopicPartitions");
+ }
+
+ // Since we need to poll only from some subset of TopicPartitions (passed as the argument),
+ // we need to pause the rest.
+ List<TopicPartition> topicPartitionsToPause = new ArrayList<>();
+ List<TopicPartition> topicPartitionsToPoll = new ArrayList<>();
+
+ for (Map.Entry<TopicPartition, SystemStreamPartition> e : topicPartitions2SSP.entrySet()) {
+ TopicPartition tp = e.getKey();
+ SystemStreamPartition ssp = e.getValue();
+ if (systemStreamPartitions.contains(ssp)) {
+ topicPartitionsToPoll.add(tp); // consume
+ } else {
+ topicPartitionsToPause.add(tp); // ignore
+ }
+ }
+
+ ConsumerRecords<K, V> records;
+ // make a call on the client
+ try {
+ // Currently, when doing checkpoint we are making a safeOffset request through this client, thus we need to synchronize
+ // them. In the future we may use this client for the actually checkpointing.
+ synchronized (kafkaConsumer) {
+ // Since we are not polling from ALL the subscribed topics, so we need to "change" the subscription temporarily
+ kafkaConsumer.pause(topicPartitionsToPause);
+ kafkaConsumer.resume(topicPartitionsToPoll);
+ records = kafkaConsumer.poll(timeout);
+ // resume original set of subscription - may be required for checkpointing
+ kafkaConsumer.resume(topicPartitionsToPause);
+ }
+ } catch (InvalidOffsetException e) {
+ LOG.error("LiKafkaConsumer with invalidOffsetException", e);
+ // If the consumer has thrown this exception it means that auto reset is not set for this consumer.
+ // So we just rethrow.
+ LOG.error("Caught InvalidOffsetException in pollConsumer", e);
+ throw e;
+ } catch (KafkaException e) {
+ // we may get InvalidOffsetException | AuthorizationException | KafkaException exceptions,
+ // but we still just rethrow, and log it up the stack.
+ LOG.error("Caught a Kafka exception in pollConsumer", e);
+ throw e;
+ }
+
+ return processResults(records);
+ }
+
+ private Map<SystemStreamPartition, List<IncomingMessageEnvelope>> processResults(ConsumerRecords<K, V> records) {
+ if (records == null) {
+ return Collections.emptyMap();
+ }
+
+ int capacity = (int) (records.count() / 0.75 + 1); // to avoid rehash, allocate more then 75% of expected capacity.
+ Map<SystemStreamPartition, List<IncomingMessageEnvelope>> results = new HashMap<>(capacity);
+ // Parse the returned records and convert them into the IncomingMessageEnvelope.
+ // Note. They have been already de-serialized by the consumer.
+ for (ConsumerRecord<K, V> r : records) {
+ int partition = r.partition();
+ String topic = r.topic();
+ TopicPartition tp = new TopicPartition(topic, partition);
+
+ updateMetrics(r, tp);
+
+ SystemStreamPartition ssp = topicPartitions2SSP.get(tp);
+ List<IncomingMessageEnvelope> listMsgs = results.get(ssp);
+ if (listMsgs == null) {
+ listMsgs = new ArrayList<>();
+ results.put(ssp, listMsgs);
+ }
+
+ // TODO - add calculation of the size of the message, when available from Kafka
+ int msgSize = 0;
+ // if (fetchLimitByBytesEnabled) {
+ msgSize = getRecordSize(r);
+ //}
+
+ final K key = r.key();
+ final Object value =
+ valueUnwrapper == null ? r.value() : valueUnwrapper.unwrapValue(ssp.getSystemStream(), r.value());
+ IncomingMessageEnvelope imEnvelope =
+ new IncomingMessageEnvelope(ssp, String.valueOf(r.offset()), key, value, msgSize);
+ listMsgs.add(imEnvelope);
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("# records per SSP:");
+ for (Map.Entry<SystemStreamPartition, List<IncomingMessageEnvelope>> e : results.entrySet()) {
+ List<IncomingMessageEnvelope> list = e.getValue();
+ LOG.debug(e.getKey() + " = " + ((list == null) ? 0 : list.size()));
+ }
+ }
+
+ return results;
+ }
+
+ private int getRecordSize(ConsumerRecord<K, V> r) {
+ int keySize = 0; //(r.key() == null) ? 0 : r.key().getSerializedKeySize();
+ return keySize; // + r.getSerializedMsgSize(); // TODO -enable when functionality available from Kafka
+
+ //int getMessageSize (Message message) {
+ // Approximate additional shallow heap overhead per message in addition to the raw bytes
+ // received from Kafka 4 + 64 + 4 + 4 + 4 = 80 bytes overhead.
+ // As this overhead is a moving target, and not very large
+ // compared to the message size its being ignore in the computation for now.
+ // int MESSAGE_SIZE_OVERHEAD = 4 + 64 + 4 + 4 + 4;
+
+ // return message.size() + MESSAGE_SIZE_OVERHEAD;
+ // }
+ }
+
+ private void updateMetrics(ConsumerRecord<K, V> r, TopicPartition tp) {
+ TopicAndPartition tap = NewKafkaSystemConsumer.toTopicAndPartition(tp);
+ SystemStreamPartition ssp = NewKafkaSystemConsumer.toSystemStreamPartition(systemName, tap);
+ long currentSSPLag = getLatestLag(ssp); // lag between the current offset and the highwatermark
+ if (currentSSPLag < 0) {
+ return;
+ }
+ long recordOffset = r.offset();
+ long highWatermark = recordOffset + currentSSPLag; // derived value for the highwatermark
+
+ int size = getRecordSize(r);
+ kafkaConsumerMetrics.incReads(tap);
+ kafkaConsumerMetrics.incBytesReads(tap, size);
+ kafkaConsumerMetrics.setOffsets(tap, recordOffset);
+ kafkaConsumerMetrics.incBrokerBytesReads(metricName, 0, size);
+ kafkaConsumerMetrics.setHighWatermarkValue(tap, highWatermark);
+ }
+
+ /*
+ This method put messages into blockingEnvelopeMap.
+ */
+ private void moveMessagesToTheirQueue(SystemStreamPartition ssp, List<IncomingMessageEnvelope> envelopes) {
+ long nextOffset = nextOffsets.get(ssp);
+
+ for (IncomingMessageEnvelope env : envelopes) {
+ sink.addMessage(ssp, env); // move message to the BlockingEnvelopeMap's queue
+
+ LOG.trace("IncomingMessageEnvelope. got envelope with offset:{} for ssp={}", env.getOffset(), ssp);
+ nextOffset = Long.valueOf(env.getOffset()) + 1;
+ }
+
+ nextOffsets.put(ssp, nextOffset);
+ }
+
+ private void populateMetricNames(Set<SystemStreamPartition> ssps) {
+ HashMap<String, String> tags = new HashMap<>();
+ tags.put("client-id", clientId);// this is required by the KafkaConsumer to get the metrics
+
+ for (SystemStreamPartition ssp : ssps) {
+ TopicPartition tp = NewKafkaSystemConsumer.toTopicPartition(ssp);
+ ssp2MetricName.put(ssp, new MetricName(tp + ".records-lag", "consumer-fetch-manager-metrics", "", tags));
+ }
+ }
+
+ /*
+ The only way to figure out lag for the LiKafkaConsumer is to look at the metrics after each poll() call.
+ One of the metrics (records-lag) shows how far behind the HighWatermark the consumer is.
+ This method populates the lag information for each SSP into latestLags member variable.
+ */
+ private void populateCurrentLags(Set<SystemStreamPartition> ssps) {
+
+ Map<MetricName, ? extends Metric> consumerMetrics = kafkaConsumer.metrics();
+
+ // populate the MetricNames first time
+ if (ssp2MetricName.isEmpty()) {
+ populateMetricNames(ssps);
+ }
+
+ for (SystemStreamPartition ssp : ssps) {
+ MetricName mn = ssp2MetricName.get(ssp);
+ Metric currentLagM = consumerMetrics.get(mn);
+
+ // In linkedin-kafka-client 5.*, high watermark is fixed to be the offset of last available message,
+ // so the lag is now at least 0, which is the same as Samza's definition.
+ // If the lag is not 0, then isAtHead is not true, and kafkaClient keeps polling.
+ long currentLag = (currentLagM != null) ? (long) currentLagM.value() : -1L;
+ /*
+ Metric averageLagM = consumerMetrics.get(new MetricName(tp + ".records-lag-avg", "consumer-fetch-manager-metrics", "", tags));
+ double averageLag = (averageLagM != null) ? averageLagM.value() : -1.0;
+ Metric maxLagM = consumerMetrics.get(new MetricName(tp + ".records-lag-max", "consumer-fetch-manager-metrics", "", tags));
+ double maxLag = (maxLagM != null) ? maxLagM.value() : -1.0;
+ */
+ latestLags.put(ssp, currentLag);
+
+ // calls the setIsAtHead for the BlockingEnvelopeMap
+ sink.setIsAtHighWatermark(ssp, currentLag == 0);
+ }
+ }
+
+ /*
+ Get the latest lag for a specific SSP.
+ */
+ public long getLatestLag(SystemStreamPartition ssp) {
+ Long lag = latestLags.get(ssp);
+ if (lag == null) {
+ throw new SamzaException("Unknown/unregistered ssp in latestLags request: " + ssp);
+ }
+ return lag;
+ }
+
+ /*
+ Using the consumer to poll the messages from the stream.
+ */
+ private void fetchMessages() {
+ Set<SystemStreamPartition> SSPsToFetch = new HashSet<>();
+ for (SystemStreamPartition ssp : nextOffsets.keySet()) {
+ if (sink.needsMoreMessages(ssp)) {
+ SSPsToFetch.add(ssp);
+ }
+ }
+ LOG.debug("pollConsumer {}", SSPsToFetch.size());
+ if (!SSPsToFetch.isEmpty()) {
+ kafkaConsumerMetrics.incBrokerReads(metricName, 0);
+
+ Map<SystemStreamPartition, List<IncomingMessageEnvelope>> response;
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("pollConsumer from following SSPs: {}; total#={}", SSPsToFetch, SSPsToFetch.size());
+ }
+ response = pollConsumer(SSPsToFetch, 500); // TODO should be default value from ConsumerConfig
+
+ // move the responses into the queue
+ for (Map.Entry<SystemStreamPartition, List<IncomingMessageEnvelope>> e : response.entrySet()) {
+ List<IncomingMessageEnvelope> envelopes = e.getValue();
+ if (envelopes != null) {
+ moveMessagesToTheirQueue(e.getKey(), envelopes);
+ }
+ }
+
+ populateCurrentLags(SSPsToFetch); // find current lags for for each SSP
+ } else { // nothing to read
+
+ LOG.debug("No topic/partitions need to be fetched for consumer {} right now. Sleeping {}ms.", kafkaConsumer,
+ SLEEP_MS_WHILE_NO_TOPIC_PARTITION);
+
+ kafkaConsumerMetrics.incBrokerSkippedFetchRequests(metricName, 0);
+
+ try {
+ Thread.sleep(SLEEP_MS_WHILE_NO_TOPIC_PARTITION);
+ } catch (InterruptedException e) {
+ LOG.warn("Sleep in fetchMessages was interrupted");
+ }
+ }
+ refreshLatencyMetrics();
+ }
+
+ private void refreshLatencyMetrics() {
+ for (Map.Entry<SystemStreamPartition, Long> e : nextOffsets.entrySet()) {
+ SystemStreamPartition ssp = e.getKey();
+ Long offset = e.getValue();
+ TopicAndPartition tp = NewKafkaSystemConsumer.toTopicAndPartition(ssp);
+ Long lag = latestLags.get(ssp);
+ LOG.trace("Latest offset of {} is {}; lag = {}", ssp, offset, lag);
+ if (lag != null && offset != null && lag >= 0) {
+ long streamEndOffset = offset.longValue() + lag.longValue();
+ // update the metrics
+ kafkaConsumerMetrics.setHighWatermarkValue(tp, streamEndOffset);
+ kafkaConsumerMetrics.setLagValue(tp, lag.longValue());
+ }
+ }
+ }
+
+ boolean isRunning() {
+ return isRunning;
+ }
+
+ Throwable getFailureCause() {
+ return failureCause;
+ }
+
+ public void stop(long timeout) {
+ LOG.info("Shutting down LiKafkaConsumerProxy poll thread:" + toString());
+
+ isRunning = false;
+ try {
+ consumerPollThread.join(timeout);
+ } catch (InterruptedException e) {
+ LOG.warn("Join in LiKafkaConsumerProxy has failed", e);
+ consumerPollThread.interrupt();
+ }
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/samza/blob/72544606/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index 9f0b5f2..c7f6aed 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -19,16 +19,21 @@
package org.apache.samza.system.kafka
+import java.util
import java.util.Properties
+
+import kafka.consumer.ConsumerConfig
import kafka.utils.ZkUtils
+import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.samza.SamzaException
import org.apache.samza.config.ApplicationConfig.ApplicationMode
-import org.apache.samza.util.{Logging, KafkaUtil, ExponentialSleepStrategy, ClientUtilTopicMetadataStore}
-import org.apache.samza.config.{KafkaConfig, ApplicationConfig, StreamConfig, Config}
+import org.apache.samza.util._
+import org.apache.samza.config.{ApplicationConfig, Config, KafkaConfig, StreamConfig}
import org.apache.samza.metrics.MetricsRegistry
import org.apache.samza.config.KafkaConfig.Config2Kafka
import org.apache.samza.config.TaskConfig.Config2Task
import org.apache.kafka.clients.producer.KafkaProducer
+import org.apache.kafka.common.serialization.ByteArrayDeserializer
import org.apache.samza.system.SystemFactory
import org.apache.samza.config.StorageConfig._
import org.apache.samza.system.SystemProducer
@@ -53,21 +58,35 @@ class KafkaSystemFactory extends SystemFactory with Logging {
// Kind of goofy to need a producer config for consumers, but we need metadata.
val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId)
val bootstrapServers = producerConfig.bootsrapServers
- val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)
+ //val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)
- val timeout = consumerConfig.socketTimeoutMs
- val bufferSize = consumerConfig.socketReceiveBufferBytes
- val fetchSize = new StreamFetchSizes(consumerConfig.fetchMessageMaxBytes, config.getFetchMessageMaxBytesTopics(systemName))
- val consumerMinSize = consumerConfig.fetchMinBytes
- val consumerMaxWait = consumerConfig.fetchWaitMaxMs
- val autoOffsetResetDefault = consumerConfig.autoOffsetReset
+ //val kafkaConfig = new KafkaConfig(config)
+
+
+ // val timeout = consumerConfig.socketTimeoutMs
+ //val bufferSize = consumerConfig.socketReceiveBufferBytes
+ //val fetchSize = new StreamFetchSizes(consumerConfig.fetchMessageMaxBytes, config.getFetchMessageMaxBytesTopics(systemName))
+ //val consumerMinSize = consumerConfig.fetchMinBytes
+ //val consumerMaxWait = consumerConfig.fetchWaitMaxMs
+ //val autoOffsetResetDefault = consumerConfig.autoOffsetReset
val autoOffsetResetTopics = config.getAutoOffsetResetTopics(systemName)
val fetchThreshold = config.getConsumerFetchThreshold(systemName).getOrElse("50000").toInt
val fetchThresholdBytes = config.getConsumerFetchThresholdBytes(systemName).getOrElse("-1").toLong
- val offsetGetter = new GetOffset(autoOffsetResetDefault, autoOffsetResetTopics)
- val metadataStore = new ClientUtilTopicMetadataStore(bootstrapServers, clientId, timeout)
+ //val offsetGetter = new GetOffset(autoOffsetResetDefault, autoOffsetResetTopics)
+ //val metadataStore = new ClientUtilTopicMetadataStore(bootstrapServers, clientId, timeout)
- new KafkaSystemConsumer(
+
+ val kafkaConsumer: KafkaConsumer[Array[Byte], Array[Byte]] =
+ NewKafkaSystemConsumer.getKafkaConsumerImpl(systemName, clientId, config)
+
+ def valueUnwrapper: NewKafkaSystemConsumer.ValueUnwrapper[Array[Byte]] = null;// TODO add real unrapper from
+ val kc = new NewKafkaSystemConsumer (
+ kafkaConsumer, systemName, config, clientId,
+ metrics, new SystemClock, false, valueUnwrapper)
+
+ kc
+ /*
+ new KafkaSystemConsumer(
systemName = systemName,
systemAdmin = getAdmin(systemName, config),
metrics = metrics,
@@ -82,7 +101,18 @@ class KafkaSystemFactory extends SystemFactory with Logging {
fetchThresholdBytes = fetchThresholdBytes,
fetchLimitByBytesEnabled = config.isConsumerFetchThresholdBytesEnabled(systemName),
offsetGetter = offsetGetter)
+ */
+ }
+
+ /*
+ def getKafkaConsumerImpl(systemName: String, config: KafkaConfig) = {
+ info("Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}", systemName, config)
+
+ val byteArrayDeserializer = new ByteArrayDeserializer
+ new KafkaConsumer[Array[Byte], Array[Byte]](config.configForVanillaConsumer(),
+ byteArrayDeserializer, byteArrayDeserializer)
}
+ */
def getProducer(systemName: String, config: Config, registry: MetricsRegistry): SystemProducer = {
val clientId = KafkaUtil.getClientId("samza-producer", config)
http://git-wip-us.apache.org/repos/asf/samza/blob/72544606/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
new file mode 100644
index 0000000..26db610
--- /dev/null
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -0,0 +1,403 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.samza.system.kafka;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
+import kafka.common.TopicAndPartition;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.samza.Partition;
+import org.apache.samza.SamzaException;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.system.IncomingMessageEnvelope;
+import org.apache.samza.system.SystemConsumer;
+import org.apache.samza.system.SystemStream;
+import org.apache.samza.system.SystemStreamPartition;
+import org.apache.samza.util.BlockingEnvelopeMap;
+import org.apache.samza.util.Clock;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Option;
+
+
+public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements SystemConsumer{
+
+ private static final Logger LOG = LoggerFactory.getLogger(NewKafkaSystemConsumer.class);
+
+ /**
+ * Provides a way to unwrap the value further. It is used for intermediate stream messages.
+ * @param <T> value type
+ */
+ public interface ValueUnwrapper<T> {
+ Object unwrapValue(SystemStream systemStream, T value);
+ }
+
+ private static final long FETCH_THRESHOLD = 50000;
+ private static final long FETCH_THRESHOLD_BYTES = -1L;
+ private final Consumer<K,V> kafkaConsumer;
+ private final String systemName;
+ private final KafkaSystemConsumerMetrics samzaConsumerMetrics;
+ private final String clientId;
+ private final String metricName;
+ private final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
+ private final AtomicBoolean stopped = new AtomicBoolean(false);
+ private final AtomicBoolean started = new AtomicBoolean(false);
+ private final Config config;
+ private final boolean fetchThresholdBytesEnabled;
+ private final ValueUnwrapper<V> valueUnwrapper;
+
+ // This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
+ private KafkaConsumerMessageSink messageSink;
+ // proxy is doing the actual reading
+ private KafkaConsumerProxy proxy;
+
+ /* package private */final Map<TopicPartition, String> topicPartitions2Offset = new HashMap<>();
+ /* package private */long perPartitionFetchThreshold;
+ /* package private */long perPartitionFetchThresholdBytes;
+
+ // TODO - consider new class for KafkaSystemConsumerMetrics
+
+ /**
+ * @param systemName
+ * @param config
+ * @param metrics
+ */
+ public NewKafkaSystemConsumer(
+ Consumer<K,V> kafkaConsumer,
+ String systemName,
+ Config config,
+ String clientId,
+ KafkaSystemConsumerMetrics metrics,
+ Clock clock,
+ boolean fetchThresholdBytesEnabled,
+ ValueUnwrapper<V> valueUnwrapper) {
+
+ super(metrics.registry(),clock, metrics.getClass().getName());
+
+ this.samzaConsumerMetrics = metrics;
+ this.clientId = clientId;
+ this.systemName = systemName;
+ this.config = config;
+ this.fetchThresholdBytesEnabled = fetchThresholdBytesEnabled;
+ this.metricName = systemName + " " + clientId;
+
+ this.kafkaConsumer = kafkaConsumer;
+ this.valueUnwrapper = valueUnwrapper;
+
+ LOG.info(String.format(
+ "Created SamzaLiKafkaSystemConsumer for system=%s, clientId=%s, metricName=%s with liKafkaConsumer=%s",
+ systemName, clientId, metricName, this.kafkaConsumer.toString()));
+ }
+
+ public static KafkaConsumer<byte[], byte[]> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
+
+ Map<String, String> injectProps = new HashMap<>();
+ injectProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ injectProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+
+ KafkaConsumerConfig consumerConfig =
+ KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
+
+ LOG.info("==============>Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}", systemName, consumerConfig.originals());
+ /*
+ Map<String, Object> kafkaConsumerConfig = consumerConfig.originals().entrySet().stream()
+ .collect(Collectors.toMap((kv)->kv.getKey(), (kv)->(Object)kv.getValue()));
+*/
+
+ return new KafkaConsumer<byte[], byte[]>(consumerConfig.originals());
+ }
+
+ /**
+ * return system name for this consumer
+ * @return system name
+ */
+ public String getSystemName() {
+ return systemName;
+ }
+
+ @Override
+ public void start() {
+ if (!started.compareAndSet(false, true)) {
+ LOG.warn("attempting to start the consumer for the second (or more) time.");
+ return;
+ }
+ if(stopped.get()) {
+ LOG.warn("attempting to start a stopped consumer");
+ return;
+ }
+LOG.info("==============>About to start consumer");
+ // initialize the subscriptions for all the registered TopicPartitions
+ startSubscription();
+ LOG.info("==============>subscription started");
+ // needs to be called after all the registrations are completed
+ setFetchThresholds();
+ LOG.info("==============>thresholds ste");
+ // Create the proxy to do the actual message reading. It is a separate thread that reads the messages from the stream
+ // and puts them into the sink.
+ createConsumerProxy();
+ LOG.info("==============>proxy started");
+ startConsumer();
+ LOG.info("==============>consumer started");
+ }
+
+ private void startSubscription() {
+ //subscribe to all the TopicPartitions
+ LOG.info("==============>startSubscription for TP: " + topicPartitions2SSP.keySet());
+ try {
+ synchronized (kafkaConsumer) {
+ // we are using assign (and not subscribe), so we need to specify both topic and partition
+ //topicPartitions2SSP.put(new TopicPartition("FAKE PARTITION", 0), new SystemStreamPartition("Some","Another", new Partition(0)));
+ //topicPartitions2Offset.put(new TopicPartition("FAKE PARTITION", 0), "1234");
+ kafkaConsumer.assign(topicPartitions2SSP.keySet());
+ }
+ } catch (Exception e) {
+ LOG.warn("startSubscription failed.", e);
+ throw new SamzaException(e);
+ }
+ }
+
+ private void createConsumerProxy() {
+ // create a sink for passing the messages between the proxy and the consumer
+ messageSink = new KafkaConsumerMessageSink();
+
+ // create the thread with the consumer
+ proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink,
+ samzaConsumerMetrics, metricName, valueUnwrapper);
+
+ LOG.info("==============>Created consumer proxy: " + proxy);
+ }
+
+ /*
+ Set the offsets to start from.
+ Add the TopicPartitions to the proxy.
+ Start the proxy thread.
+ */
+ private void startConsumer() {
+ //set the offset for each TopicPartition
+ topicPartitions2Offset.forEach((tp, startingOffsetString) -> {
+ long startingOffset = Long.valueOf(startingOffsetString);
+
+ try {
+ synchronized (kafkaConsumer) {
+ // TODO in the future we may need to add special handling here for BEGIN/END_OFFSET
+ // this will call liKafkaConsumer.seekToBegin/End()
+ kafkaConsumer.seek(tp, startingOffset); // this value should already be the 'upcoming' value
+ }
+ } catch (Exception e) {
+ // all other exceptions - non recoverable
+ LOG.error("Got Exception while seeking to " + startingOffsetString + " for " + tp, e);
+ throw new SamzaException(e);
+ }
+
+ LOG.info("==============>Changing Consumer's position for tp = " + tp + " to " + startingOffsetString);
+
+ // add the partition to the proxy
+ proxy.addTopicPartition(topicPartitions2SSP.get(tp), startingOffset);
+ });
+
+ // start the proxy thread
+ if (proxy != null && !proxy.isRunning()) {
+ proxy.start();
+ }
+ }
+
+ private void setFetchThresholds() {
+ // get the thresholds, and set defaults if not defined.
+ KafkaConfig kafkaConfig = new KafkaConfig(config);
+ Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName);
+ long fetchThreshold = FETCH_THRESHOLD;
+ if(fetchThresholdOption.isDefined()) {
+ fetchThreshold = Long.valueOf(fetchThresholdOption.get());
+ LOG.info("fetchThresholdOption is defined. fetchThreshold=" + fetchThreshold);
+ }
+ Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName);
+ long fetchThresholdBytes = FETCH_THRESHOLD_BYTES;
+ if(fetchThresholdBytesOption.isDefined()) {
+ fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get());
+ LOG.info("fetchThresholdBytesOption is defined. fetchThresholdBytes=" + fetchThresholdBytes);
+ }
+ LOG.info("fetchThresholdBytes = " + fetchThresholdBytes + "; fetchThreshold=" + fetchThreshold);
+ LOG.info("topicPartitions2Offset #=" + topicPartitions2Offset.size() + "; topicPartition2SSP #=" + topicPartitions2SSP.size());
+
+ if (topicPartitions2SSP.size() > 0) {
+ perPartitionFetchThreshold = fetchThreshold / topicPartitions2SSP.size();
+ LOG.info("perPartitionFetchThreshold=" + perPartitionFetchThreshold);
+ if(fetchThresholdBytesEnabled) {
+ // currently this feature cannot be enabled, because we do not have the size of the messages available.
+ // messages get double buffered, hence divide by 2
+ perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / topicPartitions2SSP.size();
+ LOG.info("perPartitionFetchThresholdBytes is enabled. perPartitionFetchThresholdBytes=" + perPartitionFetchThresholdBytes);
+ }
+ }
+ }
+
+ @Override
+ public void stop() {
+ if (!stopped.compareAndSet(false, true)) {
+ LOG.warn("attempting to stop stopped consumer.");
+ return;
+ }
+
+ LOG.warn("Stopping SamzaRawLiKafkaConsumer + " + this);
+ // stop the proxy (with 5 minutes timeout)
+ if(proxy != null)
+ proxy.stop(TimeUnit.MINUTES.toMillis(5));
+
+ try {
+ synchronized (kafkaConsumer) {
+ kafkaConsumer.close();
+ }
+ } catch (Exception e) {
+ LOG.warn("failed to stop SamzaRawLiKafkaConsumer + " + this, e);
+ }
+ }
+
+ /*
+ record the ssp and the offset. Do not submit it to the consumer yet.
+ */
+ @Override
+ public void register(SystemStreamPartition systemStreamPartition, String offset) {
+ if (!systemStreamPartition.getSystem().equals(systemName)) {
+ LOG.warn("ignoring SSP " + systemStreamPartition + ", because this consumer's system is " + systemName);
+ return;
+ }
+ super.register(systemStreamPartition, offset);
+
+ TopicPartition tp = toTopicPartition(systemStreamPartition);
+
+ topicPartitions2SSP.put(tp, systemStreamPartition);
+
+ LOG.info("==============>registering ssp = " + systemStreamPartition + " with offset " + offset);
+
+ String existingOffset = topicPartitions2Offset.get(tp);
+ // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages.
+ if (existingOffset == null || compareOffsets(existingOffset, offset) > 0) {
+ topicPartitions2Offset.put(tp, offset);
+ }
+
+ samzaConsumerMetrics.registerTopicAndPartition(toTopicAndPartition(tp));
+ }
+
+ /**
+ * Compare two String offsets.
+ * Note. There is a method in KafkaAdmin that does that, but that would require instantiation of systemadmin for each consumer.
+ * @param off1
+ * @param off2
+ * @return see {@link Long#compareTo(Long)}
+ */
+ public static int compareOffsets(String off1, String off2) {
+ return Long.valueOf(off1).compareTo(Long.valueOf(off2));
+ }
+
+ @Override
+ public String toString() {
+ return systemName + " " + clientId + "/" + super.toString();
+ }
+
+ @Override
+ public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
+ Set<SystemStreamPartition> systemStreamPartitions, long timeout)
+ throws InterruptedException {
+
+ // check if the proxy is running
+ if(!proxy.isRunning()) {
+ stop();
+ if (proxy.getFailureCause() != null) {
+ String message = "LiKafkaConsumerProxy has stopped";
+ if(proxy.getFailureCause() instanceof org.apache.kafka.common.errors.TopicAuthorizationException)
+ message += " due to TopicAuthorizationException Please refer to go/samzaacluserguide to correctly set up acls for your topic";
+ throw new SamzaException(message, proxy.getFailureCause());
+ } else {
+ LOG.warn("Failure cause not populated for LiKafkaConsumerProxy");
+ throw new SamzaException("LiKafkaConsumerProxy has stopped");
+ }
+ }
+
+ return super.poll(systemStreamPartitions, timeout);
+ }
+
+ public static TopicAndPartition toTopicAndPartition(TopicPartition tp) {
+ return new TopicAndPartition(tp.topic(), tp.partition());
+ }
+
+ public static TopicAndPartition toTopicAndPartition(SystemStreamPartition ssp) {
+ return new TopicAndPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
+ }
+
+ public static TopicPartition toTopicPartition(SystemStreamPartition ssp) {
+ return new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
+ }
+
+ public static SystemStreamPartition toSystemStreamPartition(String systemName, TopicAndPartition tp) {
+ return new SystemStreamPartition(systemName, tp.topic(), new Partition(tp.partition()));
+ }
+
+ ////////////////////////////////////
+ // inner class for the message sink
+ ////////////////////////////////////
+ public class KafkaConsumerMessageSink {
+
+ public void setIsAtHighWatermark(SystemStreamPartition ssp, boolean isAtHighWatermark) {
+ setIsAtHead(ssp, isAtHighWatermark);
+ }
+
+ boolean needsMoreMessages(SystemStreamPartition ssp) {
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("needsMoreMessages from following SSP: {}. fetchLimitByBytes enabled={}; messagesSizeInQueue={};"
+ + "(limit={}); messagesNumInQueue={}(limit={};", ssp, fetchThresholdBytesEnabled, getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes,
+ getNumMessagesInQueue(ssp), perPartitionFetchThreshold);
+ }
+
+ if (fetchThresholdBytesEnabled) {
+ return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes; // TODO Validate
+ } else {
+ return getNumMessagesInQueue(ssp) < perPartitionFetchThreshold;
+ }
+ }
+
+ void addMessage(SystemStreamPartition ssp, IncomingMessageEnvelope envelope) {
+ LOG.info("==============>Incoming message ssp = {}: envelope = {}.", ssp, envelope);
+
+ try {
+ put(ssp, envelope);
+ } catch (InterruptedException e) {
+ throw new SamzaException(
+ String.format("Interrupted while trying to add message with offset %s for ssp %s",
+ envelope.getOffset(),
+ ssp));
+ }
+ }
+ } // end of KafkaMessageSink class
+ ///////////////////////////////////////////////////////////////////////////
+}
http://git-wip-us.apache.org/repos/asf/samza/blob/72544606/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala b/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
index 065170c..8544dbf 100644
--- a/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
+++ b/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
@@ -88,12 +88,12 @@ class TestKafkaCheckpointManager extends KafkaServerTestHarness {
zkClient.close
// read before topic exists should result in a null checkpoint
- //val readCp = readCheckpoint(checkpointTopic, taskName)
- //assertNull(readCp)
+ val readCp = readCheckpoint(checkpointTopic, taskName)
+ assertNull(readCp)
writeCheckpoint(checkpointTopic, taskName, checkpoint1)
+
assertEquals(checkpoint1, readCheckpoint(checkpointTopic, taskName))
-try {Thread.sleep(20000)} catch { case e:Exception =>() }
// writing a second message and reading it returns a more recent checkpoint
writeCheckpoint(checkpointTopic, taskName, checkpoint2)
assertEquals(checkpoint2, readCheckpoint(checkpointTopic, taskName))
@@ -194,7 +194,7 @@ try {Thread.sleep(20000)} catch { case e:Exception =>() }
val systemFactory = Util.getObj(systemFactoryClassName, classOf[SystemFactory])
val spec = new KafkaStreamSpec("id", cpTopic, checkpointSystemName, 1, 1, props)
- System.out.println("CONFIG:" + config)
+ System.out.println("CONFIG = " + config)
new KafkaCheckpointManager(spec, systemFactory, failOnTopicValidation, config, new NoOpMetricsRegistry, serde)
}
[06/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/5e6f5fb5
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/5e6f5fb5
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/5e6f5fb5
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 5e6f5fb5f9a9ee12ce35ee8eb1836a058521df20
Parents: 410ce78 5f81b8d
Author: Boris Shkolnik <bs...@linkedin.com>
Authored: Wed Oct 25 09:50:37 2017 -0700
Committer: Boris Shkolnik <bs...@linkedin.com>
Committed: Wed Oct 25 09:50:37 2017 -0700
----------------------------------------------------------------------
build.gradle | 1 +
.../apache/samza/storage/kv/KeyValueStore.java | 69 ++++---------
.../apache/samza/task/StreamOperatorTask.java | 2 +-
.../samza/execution/TestExecutionPlanner.java | 8 +-
.../samza/system/kafka/KafkaStreamSpec.java | 9 ++
.../kafka/KafkaCheckpointManager.scala | 103 +++++++------------
.../kafka/KafkaCheckpointManagerFactory.scala | 21 +---
.../org/apache/samza/config/KafkaConfig.scala | 37 ++++++-
.../samza/system/kafka/KafkaSystemAdmin.scala | 9 +-
.../samza/system/kafka/KafkaSystemFactory.scala | 22 +++-
.../TestKafkaCheckpointManagerFactory.java | 51 +++++++++
.../kafka/TestKafkaSystemFactoryJava.java | 60 +++++++++++
.../kafka/TestKafkaCheckpointManager.scala | 6 +-
.../apache/samza/config/TestKafkaConfig.scala | 13 +++
.../kv/inmemory/InMemoryKeyValueStore.scala | 8 --
.../samza/storage/kv/RocksDbKeyValueStore.scala | 86 ++++++----------
.../storage/kv/TestRocksDbKeyValueStore.scala | 4 +-
.../apache/samza/storage/kv/CachedStore.scala | 2 +-
.../samza/storage/kv/MockKeyValueStore.scala | 8 --
19 files changed, 303 insertions(+), 216 deletions(-)
----------------------------------------------------------------------
[31/47] samza git commit: Replaced KafkaSystemConsumer,
based on SimpleConsumer, with NewKafkaSystemConsumer,
based on high level Kafka consumer
Posted by bo...@apache.org.
Replaced KafkaSystemConsumer, based on SimpleConsumer, with NewKafkaSystemConsumer, based on high level Kafka consumer
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/332a0481
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/332a0481
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/332a0481
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 332a04815bbc5d526b736d82e5f05262b0922d57
Parents: bab5bdd
Author: Boris S <bo...@apache.org>
Authored: Wed Sep 5 11:51:58 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Sep 5 11:51:58 2018 -0700
----------------------------------------------------------------------
.../samza/system/IncomingMessageEnvelope.java | 3 +-
.../ClusterBasedJobCoordinator.java | 2 +-
.../stream/CoordinatorStreamSystemConsumer.java | 4 +-
.../apache/samza/storage/StorageRecovery.java | 2 +-
.../samza/checkpoint/CheckpointTool.scala | 2 +-
.../apache/samza/checkpoint/OffsetManager.scala | 4 +-
.../samza/coordinator/JobModelManager.scala | 5 +-
.../samza/job/local/ProcessJobFactory.scala | 3 +-
.../samza/job/local/ThreadJobFactory.scala | 14 +-
.../samza/coordinator/TestJobCoordinator.scala | 4 +-
.../clients/consumer/KafkaConsumerConfig.java | 81 ++--
.../samza/system/kafka/KafkaConsumerProxy.java | 32 +-
.../kafka/KafkaSystemConsumerMetrics.scala | 69 ++-
.../samza/system/kafka/KafkaSystemFactory.scala | 47 +-
.../system/kafka/NewKafkaSystemConsumer.java | 93 ++--
.../samza/system/kafka/TestBrokerProxy.scala | 437 -------------------
.../test/integration/StreamTaskTestUtil.scala | 8 +-
17 files changed, 170 insertions(+), 640 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-api/src/main/java/org/apache/samza/system/IncomingMessageEnvelope.java
----------------------------------------------------------------------
diff --git a/samza-api/src/main/java/org/apache/samza/system/IncomingMessageEnvelope.java b/samza-api/src/main/java/org/apache/samza/system/IncomingMessageEnvelope.java
index 4d0ce2f..c5aed31 100644
--- a/samza-api/src/main/java/org/apache/samza/system/IncomingMessageEnvelope.java
+++ b/samza-api/src/main/java/org/apache/samza/system/IncomingMessageEnvelope.java
@@ -59,7 +59,8 @@ public class IncomingMessageEnvelope {
* @param message A deserialized message received from the partition offset.
* @param size size of the message and key in bytes.
*/
- public IncomingMessageEnvelope(SystemStreamPartition systemStreamPartition, String offset, Object key, Object message, int size) {
+ public IncomingMessageEnvelope(SystemStreamPartition systemStreamPartition, String offset,
+ Object key, Object message, int size) {
this.systemStreamPartition = systemStreamPartition;
this.offset = offset;
this.key = key;
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/java/org/apache/samza/clustermanager/ClusterBasedJobCoordinator.java
----------------------------------------------------------------------
diff --git a/samza-core/src/main/java/org/apache/samza/clustermanager/ClusterBasedJobCoordinator.java b/samza-core/src/main/java/org/apache/samza/clustermanager/ClusterBasedJobCoordinator.java
index 016d171..12e26f7 100644
--- a/samza-core/src/main/java/org/apache/samza/clustermanager/ClusterBasedJobCoordinator.java
+++ b/samza-core/src/main/java/org/apache/samza/clustermanager/ClusterBasedJobCoordinator.java
@@ -174,7 +174,7 @@ public class ClusterBasedJobCoordinator {
// build a JobModelManager and ChangelogStreamManager and perform partition assignments.
changelogStreamManager = new ChangelogStreamManager(coordinatorStreamManager);
- jobModelManager = JobModelManager.apply(coordinatorStreamManager, changelogStreamManager.readPartitionMapping());
+ jobModelManager = JobModelManager.apply(coordinatorStreamManager.getConfig(), changelogStreamManager.readPartitionMapping());
config = jobModelManager.jobModel().getConfig();
hasDurableStores = new StorageConfig(config).hasDurableStores();
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java b/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
index 0bdb874..38255a2 100644
--- a/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
+++ b/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
@@ -176,7 +176,7 @@ public class CoordinatorStreamSystemConsumer {
valueMap = messageSerde.fromBytes((byte[]) envelope.getMessage());
}
CoordinatorStreamMessage coordinatorStreamMessage = new CoordinatorStreamMessage(keyArray, valueMap);
- log.info("Received coordinator stream message: {}", coordinatorStreamMessage);
+ log.debug("Received coordinator stream message: {}", coordinatorStreamMessage);
// Remove any existing entry. Set.add() does not add if the element already exists.
if (bootstrappedMessages.remove(coordinatorStreamMessage)) {
log.debug("Removed duplicate message: {}", coordinatorStreamMessage);
@@ -194,7 +194,7 @@ public class CoordinatorStreamSystemConsumer {
}
bootstrappedStreamSet = Collections.unmodifiableSet(bootstrappedMessages);
- log.info("Bootstrapped configuration: {}", configMap);
+ log.debug("Bootstrapped configuration: {}", configMap);
isBootstrapped = true;
} catch (Exception e) {
throw new SamzaException(e);
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/java/org/apache/samza/storage/StorageRecovery.java
----------------------------------------------------------------------
diff --git a/samza-core/src/main/java/org/apache/samza/storage/StorageRecovery.java b/samza-core/src/main/java/org/apache/samza/storage/StorageRecovery.java
index f9c6c0c..c6dd9a7 100644
--- a/samza-core/src/main/java/org/apache/samza/storage/StorageRecovery.java
+++ b/samza-core/src/main/java/org/apache/samza/storage/StorageRecovery.java
@@ -131,7 +131,7 @@ public class StorageRecovery extends CommandLine {
coordinatorStreamManager.start();
coordinatorStreamManager.bootstrap();
ChangelogStreamManager changelogStreamManager = new ChangelogStreamManager(coordinatorStreamManager);
- JobModel jobModel = JobModelManager.apply(coordinatorStreamManager, changelogStreamManager.readPartitionMapping()).jobModel();
+ JobModel jobModel = JobModelManager.apply(coordinatorStreamManager.getConfig(), changelogStreamManager.readPartitionMapping()).jobModel();
containers = jobModel.getContainers();
coordinatorStreamManager.stop();
}
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/scala/org/apache/samza/checkpoint/CheckpointTool.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/checkpoint/CheckpointTool.scala b/samza-core/src/main/scala/org/apache/samza/checkpoint/CheckpointTool.scala
index 0ca8a3d..65fb419 100644
--- a/samza-core/src/main/scala/org/apache/samza/checkpoint/CheckpointTool.scala
+++ b/samza-core/src/main/scala/org/apache/samza/checkpoint/CheckpointTool.scala
@@ -170,7 +170,7 @@ class CheckpointTool(config: Config, newOffsets: TaskNameToCheckpointMap, manage
coordinatorStreamManager.start
coordinatorStreamManager.bootstrap
val changelogManager = new ChangelogStreamManager(coordinatorStreamManager)
- val jobModelManager = JobModelManager(coordinatorStreamManager, changelogManager.readPartitionMapping())
+ val jobModelManager = JobModelManager(coordinatorStreamManager.getConfig, changelogManager.readPartitionMapping())
val taskNames = jobModelManager
.jobModel
.getContainers
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala b/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
index d2b6667..53d5e98 100644
--- a/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
+++ b/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
@@ -304,7 +304,7 @@ class OffsetManager(
*/
private def loadOffsetsFromCheckpointManager {
if (checkpointManager != null) {
- info("Loading offsets from checkpoint manager.")
+ debug("Loading offsets from checkpoint manager.")
checkpointManager.start
val result = systemStreamPartitions
@@ -332,7 +332,7 @@ class OffsetManager(
* Loads last processed offsets for a single taskName.
*/
private def restoreOffsetsFromCheckpoint(taskName: TaskName): Map[TaskName, Map[SystemStreamPartition, String]] = {
- info("Loading checkpoints for taskName: %s." format taskName)
+ debug("Loading checkpoints for taskName: %s." format taskName)
val checkpoint = checkpointManager.readLastCheckpoint(taskName)
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala b/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
index f939736..f7ffd4e 100644
--- a/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
+++ b/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
@@ -64,12 +64,11 @@ object JobModelManager extends Logging {
* a) Reads the jobModel from coordinator stream using the job's configuration.
* b) Recomputes changelog partition mapping based on jobModel and job's configuration.
* c) Builds JobModelManager using the jobModel read from coordinator stream.
- * @param coordinatorStreamManager Coordinator stream manager.
+ * @param config Coordinator stream manager config
* @param changelogPartitionMapping The changelog partition-to-task mapping.
* @return JobModelManager
*/
- def apply(coordinatorStreamManager: CoordinatorStreamManager, changelogPartitionMapping: util.Map[TaskName, Integer]) = {
- val config = coordinatorStreamManager.getConfig
+ def apply(config: Config, changelogPartitionMapping: util.Map[TaskName, Integer]) = {
val localityManager = new LocalityManager(config, new MetricsRegistryMap())
// Map the name of each system to the corresponding SystemAdmin
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/scala/org/apache/samza/job/local/ProcessJobFactory.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ProcessJobFactory.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ProcessJobFactory.scala
index 642a484..64f516b 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ProcessJobFactory.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ProcessJobFactory.scala
@@ -50,7 +50,7 @@ class ProcessJobFactory extends StreamJobFactory with Logging {
coordinatorStreamManager.bootstrap
val changelogStreamManager = new ChangelogStreamManager(coordinatorStreamManager)
- val coordinator = JobModelManager(coordinatorStreamManager, changelogStreamManager.readPartitionMapping())
+ val coordinator = JobModelManager(coordinatorStreamManager.getConfig, changelogStreamManager.readPartitionMapping())
val jobModel = coordinator.jobModel
val taskPartitionMappings: util.Map[TaskName, Integer] = new util.HashMap[TaskName, Integer]
@@ -61,6 +61,7 @@ class ProcessJobFactory extends StreamJobFactory with Logging {
}
changelogStreamManager.writePartitionMapping(taskPartitionMappings)
+ coordinatorStreamManager.stop()
//create necessary checkpoint and changelog streams
val checkpointManager = new TaskConfigJava(jobModel.getConfig).getCheckpointManager(metricsRegistry)
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
index 34cc2a0..15aa5a6 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
@@ -19,11 +19,9 @@
package org.apache.samza.job.local
-import java.util.concurrent.{CountDownLatch, TimeUnit}
-
-import org.apache.samza.config.{Config, TaskConfigJava}
import org.apache.samza.config.JobConfig._
import org.apache.samza.config.ShellCommandConfig._
+import org.apache.samza.config.{Config, TaskConfigJava}
import org.apache.samza.container.{SamzaContainer, SamzaContainerListener, TaskName}
import org.apache.samza.coordinator.JobModelManager
import org.apache.samza.coordinator.stream.CoordinatorStreamManager
@@ -38,8 +36,8 @@ import scala.collection.JavaConversions._
import scala.collection.mutable
/**
- * Creates a new Thread job with the given config
- */
+ * Creates a new Thread job with the given config
+ */
class ThreadJobFactory extends StreamJobFactory with Logging {
def getJob(config: Config): StreamJob = {
info("Creating a ThreadJob, which is only meant for debugging.")
@@ -51,7 +49,8 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
coordinatorStreamManager.bootstrap
val changelogStreamManager = new ChangelogStreamManager(coordinatorStreamManager)
- val coordinator = JobModelManager(coordinatorStreamManager, changelogStreamManager.readPartitionMapping())
+ val coordinator = JobModelManager(coordinatorStreamManager.getConfig, changelogStreamManager.readPartitionMapping())
+ coordinatorStreamManager.stop()
val jobModel = coordinator.jobModel
val taskPartitionMappings: mutable.Map[TaskName, Integer] = mutable.Map[TaskName, Integer]()
@@ -85,7 +84,7 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
// Give developers a nice friendly warning if they've specified task.opts and are using a threaded job.
config.getTaskOpts match {
- case Some(taskOpts) => warn("%s was specified in config, but is not being used because job is being executed with ThreadJob. You probably want to run %s=%s." format (TASK_JVM_OPTS, STREAM_JOB_FACTORY_CLASS, classOf[ProcessJobFactory].getName))
+ case Some(taskOpts) => warn("%s was specified in config, but is not being used because job is being executed with ThreadJob. You probably want to run %s=%s." format(TASK_JVM_OPTS, STREAM_JOB_FACTORY_CLASS, classOf[ProcessJobFactory].getName))
case _ => None
}
@@ -117,7 +116,6 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
threadJob
} finally {
coordinator.stop
- coordinatorStreamManager.stop
jmxServer.stop
}
}
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/test/scala/org/apache/samza/coordinator/TestJobCoordinator.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/test/scala/org/apache/samza/coordinator/TestJobCoordinator.scala b/samza-core/src/test/scala/org/apache/samza/coordinator/TestJobCoordinator.scala
index 42610ae..b85b4a4 100644
--- a/samza-core/src/test/scala/org/apache/samza/coordinator/TestJobCoordinator.scala
+++ b/samza-core/src/test/scala/org/apache/samza/coordinator/TestJobCoordinator.scala
@@ -275,7 +275,9 @@ class TestJobCoordinator extends FlatSpec with PrivateMethodTester {
coordinatorStreamManager.start
coordinatorStreamManager.bootstrap
val changelogPartitionManager = new ChangelogStreamManager(coordinatorStreamManager)
- JobModelManager(coordinatorStreamManager, changelogPartitionManager.readPartitionMapping())
+ val jobModelManager = JobModelManager(coordinatorStreamManager.getConfig, changelogPartitionManager.readPartitionMapping())
+ coordinatorStreamManager.stop()
+ jobModelManager
}
@Before
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index 88437ee..843e03d 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -43,11 +43,13 @@ public class KafkaConsumerConfig extends ConsumerConfig {
private static final String PRODUCER_CLIENT_ID_PREFIX = "kafka-producer";
private static final String CONSUMER_CLIENT_ID_PREFIX = "kafka-consumer";
+ private static final String ADMIN_CLIENT_ID_PREFIX = "samza-admin";
private static final String SAMZA_OFFSET_LARGEST = "largest";
private static final String SAMZA_OFFSET_SMALLEST = "smallest";
private static final String KAFKA_OFFSET_LATEST = "latest";
private static final String KAFKA_OFFSET_EARLIEST = "earliest";
private static final String KAFKA_OFFSET_NONE = "none";
+
/*
* By default, KafkaConsumer will fetch ALL available messages for all the partitions.
* This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
@@ -59,8 +61,8 @@ public class KafkaConsumerConfig extends ConsumerConfig {
super(props);
}
- public static KafkaConsumerConfig getKafkaSystemConsumerConfig(Config config,
- String systemName, String clientId, Map<String, String> injectProps) {
+ public static KafkaConsumerConfig getKafkaSystemConsumerConfig(Config config, String systemName, String clientId,
+ Map<String, String> injectProps) {
Config subConf = config.subset(String.format("systems.%s.consumer.", systemName), true);
@@ -72,17 +74,20 @@ public class KafkaConsumerConfig extends ConsumerConfig {
consumerProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
consumerProps.setProperty(ConsumerConfig.CLIENT_ID_CONFIG, clientId);
- //Open-source Kafka Consumer configuration
- consumerProps.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); // Disable consumer auto-commit
+ //Kafka client configuration
+
+ // Disable consumer auto-commit because Samza controls commits
+ consumerProps.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
- consumerProps.setProperty(
- ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
- getAutoOffsetResetValue(consumerProps)); // Translate samza config value to kafka config value
+ // Translate samza config value to kafka config value
+ consumerProps.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
+ getAutoOffsetResetValue(consumerProps));
// make sure bootstrap configs are in ?? SHOULD WE FAIL IF THEY ARE NOT?
- if (! subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
+ if (!subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
// get it from the producer config
- String bootstrapServer = config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
+ String bootstrapServer =
+ config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
if (StringUtils.isEmpty(bootstrapServer)) {
throw new SamzaException("Missing " + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + " config for " + systemName);
}
@@ -90,25 +95,22 @@ public class KafkaConsumerConfig extends ConsumerConfig {
}
// Always use default partition assignment strategy. Do not allow override.
- consumerProps.setProperty(
- ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,
- RangeAssignor.class.getName());
-
+ consumerProps.setProperty(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG, RangeAssignor.class.getName());
// the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
// default to byte[]
- if ( !config.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
- LOG.info("default key serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
+ if (!config.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
+ LOG.info("setting default key serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
}
- if ( !config.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
- LOG.info("default value serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
+ if (!config.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
+ LOG.info("setting default value serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
}
-
// NOT SURE THIS IS NEEDED TODO
- String maxPollRecords = subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT);;
+ String maxPollRecords =
+ subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT);
consumerProps.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
// put overrides
@@ -122,38 +124,37 @@ public class KafkaConsumerConfig extends ConsumerConfig {
JobConfig jobConfig = new JobConfig(config);
Option<String> jobIdOption = jobConfig.getJobId();
Option<String> jobNameOption = jobConfig.getName();
- return (jobNameOption.isDefined()? jobNameOption.get() : "undefined_job_name") + "-"
- + (jobIdOption.isDefined()? jobIdOption.get() : "undefined_job_id");
+ return (jobNameOption.isDefined() ? jobNameOption.get() : "undefined_job_name") + "-" + (jobIdOption.isDefined()
+ ? jobIdOption.get() : "undefined_job_id");
}
+
// client id should be unique per job
- public static String getClientId(String id, Config config) {
+ public static String getClientId(Config config) {
+ return getClientId(CONSUMER_CLIENT_ID_PREFIX, config);
+ }
+ public static String getProducerClientId(Config config) {
+ return getClientId(PRODUCER_CLIENT_ID_PREFIX, config);
+ }
+ public static String getAdminClientId(Config config) {
+ return getClientId(ADMIN_CLIENT_ID_PREFIX, config);
+ }
+
+ private static String getClientId(String id, Config config) {
if (config.get(JobConfig.JOB_NAME()) == null) {
throw new ConfigException("Missing job name");
}
String jobName = config.get(JobConfig.JOB_NAME());
- String jobId = "1";
- if (config.get(JobConfig.JOB_ID()) != null) {
- jobId = config.get(JobConfig.JOB_ID());
- }
- return getClientId(id, jobName, jobId);
- }
+ String jobId = (config.get(JobConfig.JOB_ID()) != null) ? config.get(JobConfig.JOB_ID()) : "1";
- private static String getClientId(String id, String jobName, String jobId) {
- return String.format(
- "%s-%s-%s",
- id.replaceAll("[^A-Za-z0-9]", "_"),
- jobName.replaceAll("[^A-Za-z0-9]", "_"),
+ return String.format("%s-%s-%s", id.replaceAll("[^A-Za-z0-9]", "_"), jobName.replaceAll("[^A-Za-z0-9]", "_"),
jobId.replaceAll("[^A-Za-z0-9]", "_"));
}
- public static String getProducerClientId(Config config) {
- return getClientId(PRODUCER_CLIENT_ID_PREFIX, config);
- }
-
/**
* Settings for auto.reset in samza are different from settings in Kafka (auto.offset.reset) - need to convert
* "largest" -> "latest"
* "smallest" -> "earliest"
+ * "none" -> "none"
* "none" - will fail the kafka consumer, if offset is out of range
* @param properties All consumer related {@link Properties} parsed from samza config
* @return String representing the config value for "auto.offset.reset" property
@@ -162,9 +163,8 @@ public class KafkaConsumerConfig extends ConsumerConfig {
String autoOffsetReset = properties.getProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, KAFKA_OFFSET_LATEST);
// accept kafka values directly
- if (autoOffsetReset.equals(KAFKA_OFFSET_EARLIEST) ||
- autoOffsetReset.equals(KAFKA_OFFSET_LATEST) ||
- autoOffsetReset.equals(KAFKA_OFFSET_NONE)) {
+ if (autoOffsetReset.equals(KAFKA_OFFSET_EARLIEST) || autoOffsetReset.equals(KAFKA_OFFSET_LATEST)
+ || autoOffsetReset.equals(KAFKA_OFFSET_NONE)) {
return autoOffsetReset;
}
@@ -177,5 +177,4 @@ public class KafkaConsumerConfig extends ConsumerConfig {
return KAFKA_OFFSET_LATEST;
}
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index cddfdfd..a6272cd 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -86,7 +86,7 @@ public class KafkaConsumerProxy<K, V> {
this.clientId = clientId;
// TODO - see if we need new metrics (not host:port based)
- this.kafkaConsumerMetrics.registerBrokerProxy(metricName, 0);
+ this.kafkaConsumerMetrics.registerClientProxy(metricName);
consumerPollThread = new Thread(createProxyThreadRunnable());
}
@@ -132,7 +132,7 @@ public class KafkaConsumerProxy<K, V> {
// we reuse existing metrics. They assume host and port for the broker
// for now fake the port with the consumer name
- kafkaConsumerMetrics.setTopicPartitionValue(metricName, 0, nextOffsets.size());
+ kafkaConsumerMetrics.setTopicPartitionValue(metricName, nextOffsets.size());
}
/**
@@ -258,16 +258,10 @@ public class KafkaConsumerProxy<K, V> {
results.put(ssp, listMsgs);
}
- // TODO - add calculation of the size of the message, when available from Kafka
- int msgSize = 0;
- // if (fetchLimitByBytesEnabled) {
- msgSize = getRecordSize(r);
- //}
-
final K key = r.key();
final Object value = r.value();
IncomingMessageEnvelope imEnvelope =
- new IncomingMessageEnvelope(ssp, String.valueOf(r.offset()), key, value, msgSize);
+ new IncomingMessageEnvelope(ssp, String.valueOf(r.offset()), key, value, getRecordSize(r));
listMsgs.add(imEnvelope);
}
if (LOG.isDebugEnabled()) {
@@ -282,18 +276,8 @@ public class KafkaConsumerProxy<K, V> {
}
private int getRecordSize(ConsumerRecord<K, V> r) {
- int keySize = 0; //(r.key() == null) ? 0 : r.key().getSerializedKeySize();
- return keySize; // + r.getSerializedMsgSize(); // TODO -enable when functionality available from Kafka
-
- //int getMessageSize (Message message) {
- // Approximate additional shallow heap overhead per message in addition to the raw bytes
- // received from Kafka 4 + 64 + 4 + 4 + 4 = 80 bytes overhead.
- // As this overhead is a moving target, and not very large
- // compared to the message size its being ignore in the computation for now.
- // int MESSAGE_SIZE_OVERHEAD = 4 + 64 + 4 + 4 + 4;
-
- // return message.size() + MESSAGE_SIZE_OVERHEAD;
- // }
+ int keySize = (r.key() == null) ? 0 : r.serializedKeySize();
+ return keySize + r.serializedValueSize();
}
private void updateMetrics(ConsumerRecord<K, V> r, TopicPartition tp) {
@@ -310,7 +294,7 @@ public class KafkaConsumerProxy<K, V> {
kafkaConsumerMetrics.incReads(tap);
kafkaConsumerMetrics.incBytesReads(tap, size);
kafkaConsumerMetrics.setOffsets(tap, recordOffset);
- kafkaConsumerMetrics.incBrokerBytesReads(metricName, 0, size);
+ kafkaConsumerMetrics.incClientBytesReads(metricName, size);
kafkaConsumerMetrics.setHighWatermarkValue(tap, highWatermark);
}
@@ -398,7 +382,7 @@ public class KafkaConsumerProxy<K, V> {
}
LOG.debug("pollConsumer {}", SSPsToFetch.size());
if (!SSPsToFetch.isEmpty()) {
- kafkaConsumerMetrics.incBrokerReads(metricName, 0);
+ kafkaConsumerMetrics.incClientReads(metricName);
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> response;
if (LOG.isDebugEnabled()) {
@@ -420,7 +404,7 @@ public class KafkaConsumerProxy<K, V> {
LOG.debug("No topic/partitions need to be fetched for consumer {} right now. Sleeping {}ms.", kafkaConsumer,
SLEEP_MS_WHILE_NO_TOPIC_PARTITION);
- kafkaConsumerMetrics.incBrokerSkippedFetchRequests(metricName, 0);
+ kafkaConsumerMetrics.incClientSkippedFetchRequests(metricName);
try {
Thread.sleep(SLEEP_MS_WHILE_NO_TOPIC_PARTITION);
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
index 1aa66dc..415bd38 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
@@ -19,13 +19,10 @@
package org.apache.samza.system.kafka
-import org.apache.samza.metrics.MetricsHelper
-import org.apache.samza.metrics.MetricsRegistryMap
-import org.apache.samza.metrics.MetricsRegistry
import java.util.concurrent.ConcurrentHashMap
+
import kafka.common.TopicAndPartition
-import org.apache.samza.metrics.Counter
-import org.apache.samza.metrics.Gauge
+import org.apache.samza.metrics._
class KafkaSystemConsumerMetrics(val systemName: String = "unknown", val registry: MetricsRegistry = new MetricsRegistryMap) extends MetricsHelper {
val offsets = new ConcurrentHashMap[TopicAndPartition, Counter]
@@ -34,68 +31,66 @@ class KafkaSystemConsumerMetrics(val systemName: String = "unknown", val registr
val lag = new ConcurrentHashMap[TopicAndPartition, Gauge[Long]]
val highWatermark = new ConcurrentHashMap[TopicAndPartition, Gauge[Long]]
- /*
- TODO Fix
- * (String, Int) = (host, port) of BrokerProxy.
- */
-
- val reconnects = new ConcurrentHashMap[(String, Int), Counter]
- val brokerBytesRead = new ConcurrentHashMap[(String, Int), Counter]
- val brokerReads = new ConcurrentHashMap[(String, Int), Counter]
- val brokerSkippedFetchRequests = new ConcurrentHashMap[(String, Int), Counter]
- val topicPartitions = new ConcurrentHashMap[(String, Int), Gauge[Int]]
+ val clientBytesRead = new ConcurrentHashMap[String, Counter]
+ val clientReads = new ConcurrentHashMap[String, Counter]
+ val clientSkippedFetchRequests = new ConcurrentHashMap[String, Counter]
+ val topicPartitions = new ConcurrentHashMap[String, Gauge[Int]]
def registerTopicAndPartition(tp: TopicAndPartition) = {
if (!offsets.contains(tp)) {
- offsets.put(tp, newCounter("%s-%s-offset-change" format (tp.topic, tp.partition)))
- bytesRead.put(tp, newCounter("%s-%s-bytes-read" format (tp.topic, tp.partition)))
- reads.put(tp, newCounter("%s-%s-messages-read" format (tp.topic, tp.partition)))
- highWatermark.put(tp, newGauge("%s-%s-high-watermark" format (tp.topic, tp.partition), -1L))
- lag.put(tp, newGauge("%s-%s-messages-behind-high-watermark" format (tp.topic, tp.partition), 0L))
+ offsets.put(tp, newCounter("%s-%s-offset-change" format(tp.topic, tp.partition)))
+ bytesRead.put(tp, newCounter("%s-%s-bytes-read" format(tp.topic, tp.partition)))
+ reads.put(tp, newCounter("%s-%s-messages-read" format(tp.topic, tp.partition)))
+ highWatermark.put(tp, newGauge("%s-%s-high-watermark" format(tp.topic, tp.partition), -1L))
+ lag.put(tp, newGauge("%s-%s-messages-behind-high-watermark" format(tp.topic, tp.partition), 0L))
}
}
- def registerBrokerProxy(host: String, port: Int) {
- reconnects.put((host, port), newCounter("%s-%s-reconnects" format (host, port)))
- brokerBytesRead.put((host, port), newCounter("%s-%s-bytes-read" format (host, port)))
- brokerReads.put((host, port), newCounter("%s-%s-messages-read" format (host, port)))
- brokerSkippedFetchRequests.put((host, port), newCounter("%s-%s-skipped-fetch-requests" format (host, port)))
- topicPartitions.put((host, port), newGauge("%s-%s-topic-partitions" format (host, port), 0))
+ def registerClientProxy(clientName: String) {
+ clientBytesRead.put(clientName, newCounter("%s-%s-bytes-read" format clientName))
+ clientReads.put((clientName), newCounter("%s-%s-messages-read" format clientName))
+ clientSkippedFetchRequests.put((clientName), newCounter("%s-%s-skipped-fetch-requests" format clientName))
+ topicPartitions.put(clientName, newGauge("%s-%s-topic-partitions" format clientName, 0))
}
// java friendlier interfaces
// Gauges
- def setTopicPartitionValue(host: String, port: Int, value: Int) {
- topicPartitions.get((host,port)).set(value)
+ def setTopicPartitionValue(clientName: String, value: Int) {
+ topicPartitions.get(clientName).set(value)
}
+
def setLagValue(topicAndPartition: TopicAndPartition, value: Long) {
lag.get((topicAndPartition)).set(value);
}
+
def setHighWatermarkValue(topicAndPartition: TopicAndPartition, value: Long) {
highWatermark.get((topicAndPartition)).set(value);
}
// Counters
- def incBrokerReads(host: String, port: Int) {
- brokerReads.get((host,port)).inc
+ def incClientReads(clientName: String) {
+ clientReads.get(clientName).inc
}
+
def incReads(topicAndPartition: TopicAndPartition) {
reads.get(topicAndPartition).inc;
}
+
def incBytesReads(topicAndPartition: TopicAndPartition, inc: Long) {
bytesRead.get(topicAndPartition).inc(inc);
}
- def incBrokerBytesReads(host: String, port: Int, incBytes: Long) {
- brokerBytesRead.get((host,port)).inc(incBytes)
+
+ def incClientBytesReads(clientName: String, incBytes: Long) {
+ clientBytesRead.get(clientName).inc(incBytes)
}
- def incBrokerSkippedFetchRequests(host: String, port: Int) {
- brokerSkippedFetchRequests.get((host,port)).inc()
+
+ def incClientSkippedFetchRequests(clientName: String) {
+ clientSkippedFetchRequests.get(clientName).inc()
}
+
def setOffsets(topicAndPartition: TopicAndPartition, offset: Long) {
offsets.get(topicAndPartition).set(offset)
}
- def incReconnects(host: String, port: Int) {
- reconnects.get((host,port)).inc()
- }
+
override def getPrefix = systemName + "-"
}
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index 6a5eda9..892d400 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -19,27 +19,21 @@
package org.apache.samza.system.kafka
-import java.util
import java.util.Properties
-import kafka.consumer.ConsumerConfig
import kafka.utils.ZkUtils
-import org.apache.kafka.clients.consumer.KafkaConsumer
+import org.apache.kafka.clients.consumer.KafkaConsumerConfig
+import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.samza.SamzaException
import org.apache.samza.config.ApplicationConfig.ApplicationMode
-import org.apache.samza.util._
-import org.apache.samza.config.{ApplicationConfig, Config, KafkaConfig, StreamConfig}
-import org.apache.samza.metrics.MetricsRegistry
import org.apache.samza.config.KafkaConfig.Config2Kafka
-import org.apache.samza.config.TaskConfig.Config2Task
-import org.apache.kafka.clients.producer.KafkaProducer
-import org.apache.kafka.common.serialization.ByteArrayDeserializer
-import org.apache.samza.system.SystemFactory
import org.apache.samza.config.StorageConfig._
-import org.apache.samza.system.SystemProducer
-import org.apache.samza.system.SystemAdmin
import org.apache.samza.config.SystemConfig.Config2System
-import org.apache.samza.system.SystemConsumer
+import org.apache.samza.config.TaskConfig.Config2Task
+import org.apache.samza.config.{ApplicationConfig, Config, KafkaConfig, StreamConfig}
+import org.apache.samza.metrics.MetricsRegistry
+import org.apache.samza.system.{SystemAdmin, SystemConsumer, SystemFactory, SystemProducer}
+import org.apache.samza.util._
object KafkaSystemFactory extends Logging {
def getInjectedProducerProperties(systemName: String, config: Config) = if (config.isChangelogSystem(systemName)) {
@@ -51,8 +45,9 @@ object KafkaSystemFactory extends Logging {
}
class KafkaSystemFactory extends SystemFactory with Logging {
+
def getConsumer(systemName: String, config: Config, registry: MetricsRegistry): SystemConsumer = {
- val clientId = KafkaUtil.getClientId("samza-consumer", config)
+ val clientId = KafkaConsumerConfig.getClientId( config)
val metrics = new KafkaSystemConsumerMetrics(systemName, registry)
NewKafkaSystemConsumer.getNewKafkaSystemConsumer(
@@ -60,10 +55,12 @@ class KafkaSystemFactory extends SystemFactory with Logging {
}
def getProducer(systemName: String, config: Config, registry: MetricsRegistry): SystemProducer = {
- val clientId = KafkaUtil.getClientId("samza-producer", config)
+ val clientId = KafkaConsumerConfig.getProducerClientId(config)
val injectedProps = KafkaSystemFactory.getInjectedProducerProperties(systemName, config)
val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId, injectedProps)
- val getProducer = () => { new KafkaProducer[Array[Byte], Array[Byte]](producerConfig.getProducerProperties) }
+ val getProducer = () => {
+ new KafkaProducer[Array[Byte], Array[Byte]](producerConfig.getProducerProperties)
+ }
val metrics = new KafkaSystemProducerMetrics(systemName, registry)
// Unlike consumer, no need to use encoders here, since they come for free
@@ -79,7 +76,7 @@ class KafkaSystemFactory extends SystemFactory with Logging {
}
def getAdmin(systemName: String, config: Config): SystemAdmin = {
- val clientId = KafkaUtil.getClientId("samza-admin", config)
+ val clientId = KafkaConsumerConfig.getClientId(config)
val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId)
val bootstrapServers = producerConfig.bootsrapServers
val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)
@@ -94,13 +91,13 @@ class KafkaSystemFactory extends SystemFactory with Logging {
val coordinatorStreamReplicationFactor = config.getCoordinatorReplicationFactor.toInt
val storeToChangelog = config.getKafkaChangelogEnabledStores()
// Construct the meta information for each topic, if the replication factor is not defined, we use 2 as the number of replicas for the change log stream.
- val topicMetaInformation = storeToChangelog.map{case (storeName, topicName) =>
- {
- val replicationFactor = config.getChangelogStreamReplicationFactor(storeName).toInt
- val changelogInfo = ChangelogInfo(replicationFactor, config.getChangelogKafkaProperties(storeName))
- info("Creating topic meta information for topic: %s with replication factor: %s" format (topicName, replicationFactor))
- (topicName, changelogInfo)
- }}
+ val topicMetaInformation = storeToChangelog.map { case (storeName, topicName) => {
+ val replicationFactor = config.getChangelogStreamReplicationFactor(storeName).toInt
+ val changelogInfo = ChangelogInfo(replicationFactor, config.getChangelogKafkaProperties(storeName))
+ info("Creating topic meta information for topic: %s with replication factor: %s" format(topicName, replicationFactor))
+ (topicName, changelogInfo)
+ }
+ }
val deleteCommittedMessages = config.deleteCommittedMessages(systemName).exists(isEnabled => isEnabled.toBoolean)
val intermediateStreamProperties: Map[String, Properties] = getIntermediateStreamProperties(config)
@@ -125,7 +122,7 @@ class KafkaSystemFactory extends SystemFactory with Logging {
"segment.bytes" -> segmentBytes)) { case (props, (k, v)) => props.put(k, v); props }
}
- def getIntermediateStreamProperties(config : Config): Map[String, Properties] = {
+ def getIntermediateStreamProperties(config: Config): Map[String, Properties] = {
val appConfig = new ApplicationConfig(config)
if (appConfig.getAppMode == ApplicationMode.BATCH) {
val streamConfig = new StreamConfig(config)
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index b33db42..717b45d 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -53,12 +53,12 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
private static final long FETCH_THRESHOLD = 50000;
private static final long FETCH_THRESHOLD_BYTES = -1L;
+
private final Consumer<K, V> kafkaConsumer;
private final String systemName;
private final KafkaSystemConsumerMetrics samzaConsumerMetrics;
private final String clientId;
private final String metricName;
- /* package private */final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
private final AtomicBoolean stopped = new AtomicBoolean(false);
private final AtomicBoolean started = new AtomicBoolean(false);
private final Config config;
@@ -66,15 +66,16 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
// This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
/* package private */ KafkaConsumerMessageSink messageSink;
+
// proxy is doing the actual reading
private KafkaConsumerProxy proxy;
/* package private */final Map<TopicPartition, String> topicPartitions2Offset = new HashMap<>();
+ /* package private */final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
+
/* package private */ long perPartitionFetchThreshold;
/* package private */ long perPartitionFetchThresholdBytes;
- // TODO - consider new class for KafkaSystemConsumerMetrics
-
/**
* @param systemName
* @param config
@@ -85,32 +86,28 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
super(metrics.registry(), clock, metrics.getClass().getName());
+ this.kafkaConsumer = kafkaConsumer;
this.samzaConsumerMetrics = metrics;
this.clientId = clientId;
this.systemName = systemName;
this.config = config;
this.metricName = systemName + " " + clientId;
- this.kafkaConsumer = kafkaConsumer;
-
this.fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName);
- LOG.info(String.format(
- "Created SamzaKafkaSystemConsumer for system=%s, clientId=%s, metricName=%s with KafkaConsumer=%s", systemName,
- clientId, metricName, this.kafkaConsumer.toString()));
+ LOG.info("Created SamzaKafkaSystemConsumer for system={}, clientId={}, metricName={}, KafkaConsumer={}", systemName,
+ clientId, metricName, this.kafkaConsumer.toString());
}
public static <K, V> NewKafkaSystemConsumer getNewKafkaSystemConsumer(String systemName, Config config,
String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) {
-
-
// extract consumer configs and create kafka consumer
KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
-
+ LOG.info("Created kafka consumer for system {}, clientId {}: {}", systemName, clientId, kafkaConsumer);
NewKafkaSystemConsumer kc = new NewKafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
- System.out.println("kc=" + kc + "!!!!!!!!!!!!!!!!!GETTING FOR NKC for " + systemName);
+ LOG.info("Created samza system consumer {}", kc.toString());
return kc;
}
@@ -126,12 +123,11 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
Map<String, String> injectProps = new HashMap<>();
- // extract kafka consumer configs
+ // extract kafka client configs
KafkaConsumerConfig consumerConfig =
KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
- LOG.info("==============>Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}",
- systemName, consumerConfig.originals());
+ LOG.info("KafkaClient properties for systemName {}: {}", systemName, consumerConfig.originals());
return new KafkaConsumer<>(consumerConfig.originals());
}
@@ -146,29 +142,23 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
LOG.warn("attempting to start a stopped consumer");
return;
}
- LOG.info("==============>About to start consumer");
// initialize the subscriptions for all the registered TopicPartitions
startSubscription();
- LOG.info("==============>subscription started");
// needs to be called after all the registrations are completed
setFetchThresholds();
- LOG.info("==============>thresholds ste");
// Create the proxy to do the actual message reading. It is a separate thread that reads the messages from the stream
// and puts them into the sink.
createConsumerProxy();
- LOG.info("==============>proxy started");
startConsumer();
- LOG.info("==============>consumer started");
+ LOG.info("consumer {} started", this);
}
private void startSubscription() {
- //subscribe to all the TopicPartitions
- LOG.info("==============>startSubscription for TP: " + topicPartitions2SSP.keySet());
+ //subscribe to all the registered TopicPartitions
+ LOG.info("consumer {}, subscribes to {} ", this, topicPartitions2SSP.keySet());
try {
synchronized (kafkaConsumer) {
// we are using assign (and not subscribe), so we need to specify both topic and partition
- //topicPartitions2SSP.put(new TopicPartition("FAKE PARTITION", 0), new SystemStreamPartition("Some","Another", new Partition(0)));
- //topicPartitions2Offset.put(new TopicPartition("FAKE PARTITION", 0), "1234");
kafkaConsumer.assign(topicPartitions2SSP.keySet());
}
} catch (Exception e) {
@@ -184,7 +174,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
// create the thread with the consumer
proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, samzaConsumerMetrics, metricName);
- LOG.info("==============>Created consumer proxy: " + proxy);
+ LOG.info("Created consumer proxy: " + proxy);
}
/*
@@ -194,6 +184,10 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
*/
void startConsumer() {
//set the offset for each TopicPartition
+ if (topicPartitions2Offset.size() <= 0) {
+ LOG.warn("Consumer {} is not subscribed to any SSPs", this);
+ }
+
topicPartitions2Offset.forEach((tp, startingOffsetString) -> {
long startingOffset = Long.valueOf(startingOffsetString);
@@ -209,16 +203,15 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
throw new SamzaException(e);
}
- LOG.info("==============>Changing Consumer's position for tp = " + tp + " to " + startingOffsetString);
+ LOG.info("Changing consumer's starting offset for tp = " + tp + " to " + startingOffsetString);
// add the partition to the proxy
proxy.addTopicPartition(topicPartitions2SSP.get(tp), startingOffset);
});
- System.out.println("#####################started " + this + "; kc=" + kafkaConsumer);
// start the proxy thread
if (proxy != null && !proxy.isRunning()) {
- System.out.println("#####################starting proxy " + proxy);
+ LOG.info("Starting proxy: " + proxy);
proxy.start();
}
}
@@ -226,29 +219,34 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
private void setFetchThresholds() {
// get the thresholds, and set defaults if not defined.
KafkaConfig kafkaConfig = new KafkaConfig(config);
+
Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName);
long fetchThreshold = FETCH_THRESHOLD;
if (fetchThresholdOption.isDefined()) {
fetchThreshold = Long.valueOf(fetchThresholdOption.get());
- LOG.info("fetchThresholdOption is defined. fetchThreshold=" + fetchThreshold);
+ LOG.info("fetchThresholdOption is configured. fetchThreshold=" + fetchThreshold);
}
+
Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName);
long fetchThresholdBytes = FETCH_THRESHOLD_BYTES;
if (fetchThresholdBytesOption.isDefined()) {
fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get());
- LOG.info("fetchThresholdBytesOption is defined. fetchThresholdBytes=" + fetchThresholdBytes);
+ LOG.info("fetchThresholdBytesOption is configured. fetchThresholdBytes=" + fetchThresholdBytes);
}
+
+ int numTPs = topicPartitions2SSP.size();
+ assert (numTPs == topicPartitions2Offset.size());
+
LOG.info("fetchThresholdBytes = " + fetchThresholdBytes + "; fetchThreshold=" + fetchThreshold);
- LOG.info("topicPartitions2Offset #=" + topicPartitions2Offset.size() + "; topicPartition2SSP #="
- + topicPartitions2SSP.size());
+ LOG.info("number of topicPartitions " + numTPs);
- if (topicPartitions2SSP.size() > 0) {
- perPartitionFetchThreshold = fetchThreshold / topicPartitions2SSP.size();
+ if (numTPs > 0) {
+ perPartitionFetchThreshold = fetchThreshold / numTPs;
LOG.info("perPartitionFetchThreshold=" + perPartitionFetchThreshold);
if (fetchThresholdBytesEnabled) {
// currently this feature cannot be enabled, because we do not have the size of the messages available.
// messages get double buffered, hence divide by 2
- perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / topicPartitions2SSP.size();
+ perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / numTPs;
LOG.info("perPartitionFetchThresholdBytes is enabled. perPartitionFetchThresholdBytes="
+ perPartitionFetchThresholdBytes);
}
@@ -257,23 +255,22 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
@Override
public void stop() {
- System.out.println("kc=" + this + "!!!!!!!!!!!!!!!!!!!!!! stopping "+ "; kc=" + kafkaConsumer);
- System.out.println("kc=" + this + "!!!!!!!!!!!!!!!!!!!!!!TPs = " + topicPartitions2Offset);
+ LOG.info("Stopping Samza kafkaConsumer " + this);
if (!stopped.compareAndSet(false, true)) {
LOG.warn("attempting to stop stopped consumer.");
return;
}
- LOG.warn("Stopping SamzaRawLiKafkaConsumer + " + this);
// stop the proxy (with 5 minutes timeout)
if (proxy != null) {
- System.out.println("##################### stopping proxy " + proxy);
+ LOG.info("Stopping proxy " + proxy);
proxy.stop(TimeUnit.MINUTES.toMillis(5));
}
try {
synchronized (kafkaConsumer) {
+ LOG.info("Closing kafka consumer " + kafkaConsumer);
kafkaConsumer.close();
}
} catch (Exception e) {
@@ -304,7 +301,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
topicPartitions2SSP.put(tp, systemStreamPartition);
- LOG.info("============>registering ssp = " + systemStreamPartition + " with offset " + offset + "; kc=" + this);
+ LOG.info("Registering ssp = " + systemStreamPartition + " with offset " + offset);
String existingOffset = topicPartitions2Offset.get(tp);
// register the older (of the two) offset in the consumer, to guarantee we do not miss any messages.
@@ -328,7 +325,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
@Override
public String toString() {
- return systemName + " " + clientId + "/" + super.toString();
+ return systemName + "/" + clientId + "/" + super.toString();
}
@Override
@@ -339,21 +336,15 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
if (!proxy.isRunning()) {
stop();
if (proxy.getFailureCause() != null) {
- String message = "LiKafkaConsumerProxy has stopped";
- if (proxy.getFailureCause() instanceof org.apache.kafka.common.errors.TopicAuthorizationException) {
- message +=
- " due to TopicAuthorizationException Please refer to go/samzaacluserguide to correctly set up acls for your topic";
- }
+ String message = "KafkaConsumerProxy has stopped";
throw new SamzaException(message, proxy.getFailureCause());
} else {
- LOG.warn("Failure cause not populated for LiKafkaConsumerProxy");
+ LOG.warn("Failure cause is not populated for KafkaConsumerProxy");
throw new SamzaException("LiKafkaConsumerProxy has stopped");
}
}
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> res = super.poll(systemStreamPartitions, timeout);
- //LOG.info("=============================>. Res for " + systemStreamPartitions);
- //LOG.info("=============================>. Res:" + res.toString());
return res;
}
@@ -399,14 +390,14 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
}
if (fetchThresholdBytesEnabled) {
- return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes; // TODO Validate
+ return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes;
} else {
return getNumMessagesInQueue(ssp) < perPartitionFetchThreshold;
}
}
void addMessage(SystemStreamPartition ssp, IncomingMessageEnvelope envelope) {
- LOG.info("==============>Incoming message ssp = {}: envelope = {}.", ssp, envelope);
+ LOG.trace("Incoming message ssp = {}: envelope = {}.", ssp, envelope);
try {
put(ssp, envelope);
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
deleted file mode 100644
index a3f76e7..0000000
--- a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- */
-package org.apache.samza.system.kafka
-
-import java.nio.ByteBuffer
-import java.util.concurrent.CountDownLatch
-
-import kafka.api.{PartitionOffsetsResponse, _}
-import kafka.common.TopicAndPartition
-import kafka.consumer.SimpleConsumer
-import kafka.message.{ByteBufferMessageSet, Message, MessageAndOffset, MessageSet}
-import org.apache.kafka.common.protocol.Errors
-import org.apache.samza.SamzaException
-import org.apache.samza.util.Logging
-import org.junit.Assert._
-import org.junit._
-import org.mockito.Matchers._
-import org.mockito.Mockito._
-import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
-import org.mockito.{Matchers, Mockito}
-
-import scala.collection.JavaConverters._
-
-class TestBrokerProxy extends Logging {
- /*
- val tp2 = new TopicAndPartition("Redbird", 2013)
- var fetchTp1 = true // control whether fetching tp1 messages or not
-
- @Test def brokerProxyRetrievesMessagesCorrectly() = {
- val (bp, tp, sink) = getMockBrokerProxy()
-
- bp.start
- bp.addTopicPartition(tp, Option("0"))
- // Add tp2, which should never receive messages since sink disables it.
- bp.addTopicPartition(tp2, Option("0"))
- Thread.sleep(1000)
- assertEquals(2, sink.receivedMessages.size)
- assertEquals(42, sink.receivedMessages(0)._2.offset)
- assertEquals(84, sink.receivedMessages(1)._2.offset)
- }
-
- @Test def brokerProxySkipsFetchForEmptyRequests() = {
- val (bp, tp, sink) = getMockBrokerProxy()
-
- bp.start
- // Only add tp2, which should never receive messages since sink disables it.
- bp.addTopicPartition(tp2, Option("0"))
- Thread.sleep(1000)
- assertEquals(0, sink.receivedMessages.size)
- assertTrue(bp.metrics.brokerSkippedFetchRequests.get((bp.host, bp.port)).getCount > 0)
- assertEquals(0, bp.metrics.brokerReads.get((bp.host, bp.port)).getCount)
- }
-
- @Test def brokerProxyThrowsExceptionOnDuplicateTopicPartitions() = {
- val (bp, tp, _) = getMockBrokerProxy()
- bp.start
- bp.addTopicPartition(tp, Option("0"))
-
- try {
- bp.addTopicPartition(tp, Option("1"))
- fail("Should have thrown an exception")
- } catch {
- case se: SamzaException => assertEquals(se.getMessage, "Already consuming TopicPartition [Redbird,2012]")
- case other: Exception => fail("Got some other exception than what we were expecting: " + other)
- }
- }
-
- def getMockBrokerProxy() = {
- val sink = new MessageSink {
- val receivedMessages = new scala.collection.mutable.ListBuffer[(TopicAndPartition, MessageAndOffset, Boolean)]()
-
- def abdicate(tp: TopicAndPartition, nextOffset: Long) {}
-
- def refreshDropped() {}
-
- def addMessage(tp: TopicAndPartition, msg: MessageAndOffset, highWatermark: Long) {
- receivedMessages += ((tp, msg, msg.offset.equals(highWatermark)))
- }
-
- def setIsAtHighWatermark(tp: TopicAndPartition, isAtHighWatermark: Boolean) {
- }
-
- // Never need messages for tp2.
- def needsMoreMessages(tp: TopicAndPartition): Boolean = !tp.equals(tp2) && fetchTp1
- }
-
- val system = "daSystem"
- val host = "host"
- val port = 2222
- val tp = new TopicAndPartition("Redbird", 2012)
- val metrics = new KafkaSystemConsumerMetrics(system)
-
- metrics.registerBrokerProxy(host, port)
- metrics.registerTopicAndPartition(tp)
- metrics.topicPartitions.get((host, port)).set(1)
-
- val bp = new BrokerProxy(
- host,
- port,
- system,
- "daClientId",
- metrics,
- sink,
- offsetGetter = new GetOffset("fail", Map("Redbird" -> "largest"))) {
-
- override val sleepMSWhileNoTopicPartitions = 100
- // Speed up for test
- var alreadyCreatedConsumer = false
-
- // Scala traits and Mockito mocks don't mix, unfortunately.
- override def createSimpleConsumer() = {
- if (alreadyCreatedConsumer) {
- System.err.println("Should only be creating one consumer in this test!")
- throw new InterruptedException("Should only be creating one consumer in this test!")
- }
- alreadyCreatedConsumer = true
-
- new DefaultFetchSimpleConsumer("a", 1, 2, 3, "b", new StreamFetchSizes(42)) {
- val sc = Mockito.mock(classOf[SimpleConsumer])
- val mockOffsetResponse = {
- val offsetResponse = Mockito.mock(classOf[OffsetResponse])
- val partitionOffsetResponse = {
- val por = Mockito.mock(classOf[PartitionOffsetsResponse])
- when(por.offsets).thenReturn(List(1l).toSeq)
- por
- }
-
- val map = scala.Predef.Map[TopicAndPartition, PartitionOffsetsResponse](tp -> partitionOffsetResponse, tp2 -> partitionOffsetResponse)
- when(offsetResponse.partitionErrorAndOffsets).thenReturn(map)
- offsetResponse
- }
-
- when(sc.getOffsetsBefore(any(classOf[OffsetRequest]))).thenReturn(mockOffsetResponse)
-
- val fetchResponse = {
- val fetchResponse = Mockito.mock(classOf[FetchResponse])
-
- val messageSet = {
- val messageSet = Mockito.mock(classOf[ByteBufferMessageSet])
-
- def getMessage() = new Message(Mockito.mock(classOf[ByteBuffer]))
- val messages = List(new MessageAndOffset(getMessage, 42), new MessageAndOffset(getMessage, 84))
-
- when(messageSet.sizeInBytes).thenReturn(43)
- when(messageSet.size).thenReturn(44)
- when(messageSet.iterator).thenReturn(messages.iterator)
- when(messageSet.head).thenReturn(messages.head)
- messageSet
- }
-
- val fetchResponsePartitionData = FetchResponsePartitionData(Errors.NONE, 500, messageSet)
- val map = scala.Predef.Map[TopicAndPartition, FetchResponsePartitionData](tp -> fetchResponsePartitionData)
-
- when(fetchResponse.data).thenReturn(map.toSeq)
- when(fetchResponse.messageSet(any(classOf[String]), any(classOf[Int]))).thenReturn(messageSet)
- fetchResponse
- }
- when(sc.fetch(any(classOf[FetchRequest]))).thenReturn(fetchResponse)
-
- override def close() = sc.close()
-
- override def send(request: TopicMetadataRequest): TopicMetadataResponse = sc.send(request)
-
- override def fetch(request: FetchRequest): FetchResponse = {
- // Verify that we only get fetch requests for one tp, even though
- // two were registered. This is to verify that
- // sink.needsMoreMessages works.
- assertEquals(1, request.requestInfo.size)
- sc.fetch(request)
- }
-
- when(sc.earliestOrLatestOffset(any(classOf[TopicAndPartition]), any(classOf[Long]), any(classOf[Int]))).thenReturn(100)
-
- override def getOffsetsBefore(request: OffsetRequest): OffsetResponse = sc.getOffsetsBefore(request)
-
- override def commitOffsets(request: OffsetCommitRequest): OffsetCommitResponse = sc.commitOffsets(request)
-
- override def fetchOffsets(request: OffsetFetchRequest): OffsetFetchResponse = sc.fetchOffsets(request)
-
- override def earliestOrLatestOffset(topicAndPartition: TopicAndPartition, earliestOrLatest: Long, consumerId: Int): Long = sc.earliestOrLatestOffset(topicAndPartition, earliestOrLatest, consumerId)
- }
- }
-
- }
-
- (bp, tp, sink)
- }
-
- @Test def brokerProxyUpdateLatencyMetrics() = {
- val (bp, tp, _) = getMockBrokerProxy()
-
- bp.start
- bp.addTopicPartition(tp, Option("0"))
- Thread.sleep(1000)
- // update when fetching messages
- assertEquals(500, bp.metrics.highWatermark.get(tp).getValue)
- assertEquals(415, bp.metrics.lag.get(tp).getValue)
-
- fetchTp1 = false
- Thread.sleep(1000)
- // update when not fetching messages
- assertEquals(100, bp.metrics.highWatermark.get(tp).getValue)
- assertEquals(15, bp.metrics.lag.get(tp).getValue)
-
- fetchTp1 = true
- }
-
- @Test def brokerProxyCorrectlyHandlesOffsetOutOfRange(): Unit = {
- // Need to wait for the thread to do some work before ending the test
- val countdownLatch = new CountDownLatch(1)
- var failString: String = null
-
- val mockMessageSink = mock(classOf[MessageSink])
- when(mockMessageSink.needsMoreMessages(any())).thenReturn(true)
-
- val doNothingMetrics = new KafkaSystemConsumerMetrics()
-
- val tp = new TopicAndPartition("topic", 42)
-
- val mockOffsetGetter = mock(classOf[GetOffset])
- // This will be used by the simple consumer below, and this is the response that simple consumer needs
- when(mockOffsetGetter.isValidOffset(any(classOf[DefaultFetchSimpleConsumer]), Matchers.eq(tp), Matchers.eq("0"))).thenReturn(true)
- when(mockOffsetGetter.getResetOffset(any(classOf[DefaultFetchSimpleConsumer]), Matchers.eq(tp))).thenReturn(1492l)
-
- var callsToCreateSimpleConsumer = 0
- val mockSimpleConsumer = mock(classOf[DefaultFetchSimpleConsumer])
-
- // Create an answer that first indicates offset out of range on first invocation and on second
- // verifies that the parameters have been updated to what we expect them to be
- val answer = new Answer[FetchResponse]() {
- var invocationCount = 0
-
- def answer(invocation: InvocationOnMock): FetchResponse = {
- val arguments = invocation.getArguments()(0).asInstanceOf[List[Object]](0).asInstanceOf[(String, Long)]
-
- if (invocationCount == 0) {
- if (arguments !=(tp, 0)) {
- failString = "First invocation did not have the right arguments: " + arguments
- countdownLatch.countDown()
- }
- val mfr = mock(classOf[FetchResponse])
- when(mfr.hasError).thenReturn(true)
- when(mfr.error("topic", 42)).thenReturn(Errors.OFFSET_OUT_OF_RANGE)
-
- val messageSet = mock(classOf[MessageSet])
- when(messageSet.iterator).thenReturn(Iterator.empty)
- val response = mock(classOf[FetchResponsePartitionData])
- when(response.error).thenReturn(Errors.OFFSET_OUT_OF_RANGE)
- val responseMap = Map(tp -> response)
- when(mfr.data).thenReturn(responseMap.toSeq)
- invocationCount += 1
- mfr
- } else {
- if (arguments !=(tp, 1492)) {
- failString = "On second invocation, arguments were not correct: " + arguments
- }
- countdownLatch.countDown()
- Thread.currentThread().interrupt()
- null
- }
- }
- }
-
- when(mockSimpleConsumer.defaultFetch(any())).thenAnswer(answer)
-
- // So now we have a fetch response that will fail. Prime the mockGetOffset to send us to a new offset
-
- val bp = new BrokerProxy("host", 423, "system", "clientID", doNothingMetrics, mockMessageSink, Int.MaxValue, 1024000, new StreamFetchSizes(256 * 1024), 524288, 1000, mockOffsetGetter) {
-
- override def createSimpleConsumer() = {
- if (callsToCreateSimpleConsumer > 1) {
- failString = "Tried to create more than one simple consumer"
- countdownLatch.countDown()
- }
- callsToCreateSimpleConsumer += 1
- mockSimpleConsumer
- }
- }
-
- bp.addTopicPartition(tp, Option("0"))
- bp.start
- countdownLatch.await()
- bp.stop
- if (failString != null) {
- fail(failString)
- }
- }
-
- /**
- * TODO fix
- * Test that makes sure that BrokerProxy abdicates all TopicAndPartitions
- * that it owns when a consumer failure occurs.
- */
- @Test def brokerProxyAbdicatesOnConnectionFailure(): Unit = {
- val countdownLatch = new CountDownLatch(1)
- var abdicated: Option[TopicAndPartition] = None
- @volatile var refreshDroppedCount = 0
- val mockMessageSink = new MessageSink {
- override def setIsAtHighWatermark(tp: TopicAndPartition, isAtHighWatermark: Boolean) {
- }
-
- override def addMessage(tp: TopicAndPartition, msg: MessageAndOffset, highWatermark: Long) {
- }
-
- override def abdicate(tp: TopicAndPartition, nextOffset: Long) {
- abdicated = Some(tp)
- countdownLatch.countDown
- }
-
- override def refreshDropped() {
- refreshDroppedCount += 1
- }
-
- override def needsMoreMessages(tp: TopicAndPartition): Boolean = {
- true
- }
- }
-
- val doNothingMetrics = new KafkaSystemConsumerMetrics()
- val tp = new TopicAndPartition("topic", 42)
- val mockOffsetGetter = mock(classOf[GetOffset])
- val mockSimpleConsumer = mock(classOf[DefaultFetchSimpleConsumer])
-
- when(mockOffsetGetter.isValidOffset(any(classOf[DefaultFetchSimpleConsumer]), Matchers.eq(tp), Matchers.eq("0"))).thenReturn(true)
- when(mockOffsetGetter.getResetOffset(any(classOf[DefaultFetchSimpleConsumer]), Matchers.eq(tp))).thenReturn(1492l)
- when(mockSimpleConsumer.defaultFetch(any())).thenThrow(new SamzaException("Pretend this is a ClosedChannelException. Can't use ClosedChannelException because it's checked, and Mockito doesn't like that."))
-
- val bp = new BrokerProxy("host", 567, "system", "clientID", doNothingMetrics, mockMessageSink, Int.MaxValue, 1024000, new StreamFetchSizes(256 * 1024), 524288, 1000, mockOffsetGetter) {
- override def createSimpleConsumer() = {
- mockSimpleConsumer
- }
- }
-
- val waitForRefresh = () => {
- val currentRefreshDroppedCount = refreshDroppedCount
- while (refreshDroppedCount == currentRefreshDroppedCount) {
- Thread.sleep(100)
- }
- }
-
- bp.addTopicPartition(tp, Option("0"))
- bp.start
- // BP should refresh on startup.
- waitForRefresh()
- countdownLatch.await()
- // BP should continue refreshing after it's abdicated all TopicAndPartitions.
- waitForRefresh()
- bp.stop
- assertEquals(tp, abdicated.getOrElse(null))
- }
-
- @Test def brokerProxyAbdicatesHardErrors(): Unit = {
- val doNothingMetrics = new KafkaSystemConsumerMetrics
- val mockMessageSink = new MessageSink {
- override def needsMoreMessages(tp: TopicAndPartition): Boolean = true
- override def abdicate(tp: TopicAndPartition, nextOffset: Long) {}
- override def addMessage(tp: TopicAndPartition, msg: MessageAndOffset, highWatermark: Long) {}
- override def refreshDropped() {throw new OutOfMemoryError("Test - OOME")}
- override def setIsAtHighWatermark(tp: TopicAndPartition, isAtHighWatermark: Boolean): Unit = {}
- }
- val mockOffsetGetter = mock(classOf[GetOffset])
- val mockSimpleConsumer = mock(classOf[DefaultFetchSimpleConsumer])
-
- val bp = new BrokerProxy("host", 658, "system", "clientID", doNothingMetrics, mockMessageSink, Int.MaxValue, 1024000, new StreamFetchSizes(256 * 1024), 524288, 1000, mockOffsetGetter) {
- override def createSimpleConsumer() = {
- mockSimpleConsumer
- }
- }
- var caughtError = false
- try {
- bp.thread.run
- } catch {
- case e: SamzaException => {
- assertEquals(e.getMessage, "Got out of memory error in broker proxy thread.")
- info("Received OutOfMemoryError in broker proxy.")
- caughtError = true
- }
- }
- assertEquals(true, caughtError)
- val mockMessageSink2 = new MessageSink {
- override def needsMoreMessages(tp: TopicAndPartition): Boolean = true
- override def abdicate(tp: TopicAndPartition, nextOffset: Long): Unit = {}
- override def addMessage(tp: TopicAndPartition, msg: MessageAndOffset, highWatermark: Long): Unit = {}
- override def refreshDropped(): Unit = {throw new StackOverflowError("Test - SOE")}
- override def setIsAtHighWatermark(tp: TopicAndPartition, isAtHighWatermark: Boolean): Unit = {}
- }
- caughtError = false
- val bp2 = new BrokerProxy("host", 689, "system", "clientID2", doNothingMetrics, mockMessageSink2, Int.MaxValue, 1024000, new StreamFetchSizes(256 * 1024), 524288, 1000, mockOffsetGetter) {
- override def createSimpleConsumer() = {
- mockSimpleConsumer
- }
- }
- try {
- bp2.thread.run
- } catch {
- case e: SamzaException => {
- assertEquals(e.getMessage, "Got stack overflow error in broker proxy thread.")
- info("Received StackOverflowError in broker proxy.")
- caughtError = true
- }
- }
- assertEquals(true, caughtError)
- }
-
- @Test
- def brokerProxyStopCloseConsumer: Unit = {
- val mockSimpleConsumer = mock(classOf[DefaultFetchSimpleConsumer])
- val bp = new BrokerProxy("host", 0, "system", "clientID", new KafkaSystemConsumerMetrics(), null){
- override def createSimpleConsumer() = {
- mockSimpleConsumer
- }
- }
- bp.start
- bp.stop
- verify(mockSimpleConsumer).close
- }
- */
-}
http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
----------------------------------------------------------------------
diff --git a/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala b/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
index 2ea9a5f..8405c63 100644
--- a/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
+++ b/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
@@ -223,16 +223,16 @@ class StreamTaskTestUtil {
* interrupt, which is forwarded on to ThreadJob, and marked as a failure).
*/
def stopJob(job: StreamJob) {
- // make sure we don't kill the job before it was started
+ // make sure we don't kill the job before it was started.
+ // eventProcesses guarantees all the consumers have been initialized
val tasks = TestTask.tasks
val task = tasks.values.toList.head
task.eventProcessed.await(60, TimeUnit.SECONDS)
- System.out.println("THREAD: JOB KILL BEFORE")
+ assertEquals(0, task.eventProcessed.getCount)
+
// Shutdown task.
job.kill
- System.out.println("THREAD: JOB KILL")
val status = job.waitForFinish(60000)
- System.out.println("THREAD: JOB KILL WAIT")
assertEquals(ApplicationStatus.UnsuccessfulFinish, status)
}
[44/47] samza git commit: added License message
Posted by bo...@apache.org.
added License message
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/5120740a
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/5120740a
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/5120740a
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 5120740aa04ab5dcb24ffd3ff5f7dc5114a32653
Parents: 32c9282
Author: Boris S <bo...@apache.org>
Authored: Wed Sep 12 15:58:55 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Sep 12 15:58:55 2018 -0700
----------------------------------------------------------------------
.../samza/system/kafka/KafkaSystemConsumer.java | 2 --
.../clients/consumer/TestKafkaConsumerConfig.java | 16 ++++++++++++++++
2 files changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/5120740a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
index 9cdfce1..9101a89 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
@@ -363,12 +363,10 @@ public class KafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements Sy
}
boolean needsMoreMessages(SystemStreamPartition ssp) {
- if (LOG.isDebugEnabled()) {
LOG.debug("needsMoreMessages from following SSP: {}. fetchLimitByBytes enabled={}; messagesSizeInQueue={};"
+ "(limit={}); messagesNumInQueue={}(limit={};", ssp, fetchThresholdBytesEnabled,
getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes, getNumMessagesInQueue(ssp),
perPartitionFetchThreshold);
- }
if (fetchThresholdBytesEnabled) {
return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes;
http://git-wip-us.apache.org/repos/asf/samza/blob/5120740a/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java b/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
index ee300d0..264098b 100644
--- a/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
+++ b/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.kafka.clients.consumer;
import java.util.Collections;
[22/47] samza git commit: added test
Posted by bo...@apache.org.
added test
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/89f79829
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/89f79829
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/89f79829
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 89f79829107ed21dd88058922b6038835af1cfbd
Parents: 34ae8ba
Author: Boris S <bo...@apache.org>
Authored: Thu Aug 30 10:30:55 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Thu Aug 30 10:30:55 2018 -0700
----------------------------------------------------------------------
.../clients/consumer/KafkaConsumerConfig.java | 22 ++
.../apache/samza/system/kafka/BrokerProxy.scala | 332 -------------------
.../samza/system/kafka/KafkaConsumerProxy.java | 6 +-
.../system/kafka/KafkaSystemConsumer.scala | 309 -----------------
.../kafka/KafkaSystemConsumerMetrics.scala | 1 +
.../system/kafka/NewKafkaSystemConsumer.java | 19 +-
.../kafka/TestKafkaCheckpointManager.scala | 3 +-
.../samza/system/kafka/TestBrokerProxy.scala | 3 +
.../system/kafka/TestKafkaSystemConsumer.scala | 191 -----------
.../kafka/TestNewKafkaSystemConsumer.java | 203 ++++++++++++
10 files changed, 237 insertions(+), 852 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index b29a041..88437ee 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -23,9 +23,14 @@ package org.apache.kafka.clients.consumer;
import java.util.Map;
import java.util.Properties;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.apache.samza.SamzaException;
import org.apache.samza.config.Config;
import org.apache.samza.config.ConfigException;
import org.apache.samza.config.JobConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import scala.Option;
@@ -34,6 +39,8 @@ import scala.Option;
*/
public class KafkaConsumerConfig extends ConsumerConfig {
+ public static final Logger LOG = LoggerFactory.getLogger(KafkaConsumerConfig.class);
+
private static final String PRODUCER_CLIENT_ID_PREFIX = "kafka-producer";
private static final String CONSUMER_CLIENT_ID_PREFIX = "kafka-consumer";
private static final String SAMZA_OFFSET_LARGEST = "largest";
@@ -76,6 +83,9 @@ public class KafkaConsumerConfig extends ConsumerConfig {
if (! subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
// get it from the producer config
String bootstrapServer = config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
+ if (StringUtils.isEmpty(bootstrapServer)) {
+ throw new SamzaException("Missing " + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + " config for " + systemName);
+ }
consumerProps.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer);
}
@@ -85,6 +95,18 @@ public class KafkaConsumerConfig extends ConsumerConfig {
RangeAssignor.class.getName());
+ // the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
+ // default to byte[]
+ if ( !config.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
+ LOG.info("default key serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
+ consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ }
+ if ( !config.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
+ LOG.info("default value serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
+ consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ }
+
+
// NOT SURE THIS IS NEEDED TODO
String maxPollRecords = subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT);;
consumerProps.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/main/scala/org/apache/samza/system/kafka/BrokerProxy.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/BrokerProxy.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/BrokerProxy.scala
deleted file mode 100644
index 423b68a..0000000
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/BrokerProxy.scala
+++ /dev/null
@@ -1,332 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- */
-
-package org.apache.samza.system.kafka
-
-import java.lang.Thread.UncaughtExceptionHandler
-import java.nio.channels.ClosedByInterruptException
-import java.util.concurrent.{ConcurrentHashMap, CountDownLatch}
-
-import kafka.api._
-import kafka.common.{ErrorMapping, NotLeaderForPartitionException, TopicAndPartition, UnknownTopicOrPartitionException}
-import kafka.consumer.ConsumerConfig
-import kafka.message.MessageSet
-import org.apache.samza.SamzaException
-import org.apache.samza.util.ExponentialSleepStrategy
-import org.apache.samza.util.KafkaUtil
-import org.apache.samza.util.Logging
-
-import scala.collection.JavaConverters._
-import scala.collection.concurrent
-
-/**
- * A BrokerProxy consolidates Kafka fetches meant for a particular broker and retrieves them all at once, providing
- * a way for consumers to retrieve those messages by topic and partition.
- */
-class BrokerProxy(
- val host: String,
- val port: Int,
- val system: String,
- val clientID: String,
- val metrics: KafkaSystemConsumerMetrics,
- val messageSink: MessageSink,
- val timeout: Int = ConsumerConfig.SocketTimeout,
- val bufferSize: Int = ConsumerConfig.SocketBufferSize,
- val fetchSize: StreamFetchSizes = new StreamFetchSizes,
- val consumerMinSize:Int = ConsumerConfig.MinFetchBytes,
- val consumerMaxWait:Int = ConsumerConfig.MaxFetchWaitMs,
- offsetGetter: GetOffset = new GetOffset("fail")) extends Toss with Logging {
-
- /**
- * How long should the fetcher thread sleep before checking if any TopicPartitions has been added to its purview
- */
- val sleepMSWhileNoTopicPartitions = 100
-
- /** What's the next offset for a particular partition? **/
- val nextOffsets:concurrent.Map[TopicAndPartition, Long] = new ConcurrentHashMap[TopicAndPartition, Long]().asScala
-
- /** Block on the first call to get message if the fetcher has not yet returned its initial results **/
- // TODO: It should be sufficient to just use the count down latch and await on it for each of the calls, but
- // VisualVM was showing the consumer thread spending all its time in the await method rather than returning
- // immediately, even though the process was proceeding normally. Hence the extra boolean. Should be investigated.
- val firstCallBarrier = new CountDownLatch(1)
- var firstCall = true
-
- var simpleConsumer = createSimpleConsumer()
-
- metrics.registerBrokerProxy(host, port)
-
- def createSimpleConsumer() = {
- val hostString = "%s:%d" format (host, port)
- info("Creating new SimpleConsumer for host %s for system %s" format (hostString, system))
-
- val sc = new DefaultFetchSimpleConsumer(host, port, timeout, bufferSize, clientID, fetchSize, consumerMinSize, consumerMaxWait)
- sc
- }
-
- def addTopicPartition(tp: TopicAndPartition, nextOffset: Option[String]) = {
- debug("Adding new topic and partition %s to queue for %s" format (tp, host))
-
- if (nextOffsets.asJava.containsKey(tp)) {
- toss("Already consuming TopicPartition %s" format tp)
- }
-
- val offset = if (nextOffset.isDefined && offsetGetter.isValidOffset(simpleConsumer, tp, nextOffset.get)) {
- nextOffset
- .get
- .toLong
- } else {
- warn("It appears that we received an invalid or empty offset %s for %s. Attempting to use Kafka's auto.offset.reset setting. This can result in data loss if processing continues." format (nextOffset, tp))
-
- offsetGetter.getResetOffset(simpleConsumer, tp)
- }
-
- debug("Got offset %s for new topic and partition %s." format (offset, tp))
-
- nextOffsets += tp -> offset
-
- metrics.topicPartitions.get((host, port)).set(nextOffsets.size)
- }
-
- def removeTopicPartition(tp: TopicAndPartition) = {
- if (nextOffsets.asJava.containsKey(tp)) {
- val offset = nextOffsets.remove(tp)
- metrics.topicPartitions.get((host, port)).set(nextOffsets.size)
- debug("Removed %s" format tp)
- offset
- } else {
- warn("Asked to remove topic and partition %s, but not in map (keys = %s)" format (tp, nextOffsets.keys.mkString(",")))
- None
- }
- }
-
- val thread = new Thread(new Runnable {
- def run {
- var reconnect = false
-
- try {
- (new ExponentialSleepStrategy).run(
- loop => {
- if (reconnect) {
- metrics.reconnects.get((host, port)).inc
- simpleConsumer.close()
- simpleConsumer = createSimpleConsumer()
- }
-
- while (!Thread.currentThread.isInterrupted) {
- messageSink.refreshDropped
- if (nextOffsets.size == 0) {
- debug("No TopicPartitions to fetch. Sleeping.")
- Thread.sleep(sleepMSWhileNoTopicPartitions)
- } else {
- fetchMessages
-
- // If we got here, fetchMessages didn't throw an exception, i.e. it was successful.
- // In that case, reset the loop delay, so that the next time an error occurs,
- // we start with a short retry delay.
- loop.reset
- }
- }
- },
-
- (exception, loop) => {
- warn("Restarting consumer due to %s. Releasing ownership of all partitions, and restarting consumer. Turn on debugging to get a full stack trace." format exception)
- debug("Exception detail:", exception)
- abdicateAll
- reconnect = true
- })
- } catch {
- case e: InterruptedException => info("Got interrupt exception in broker proxy thread.")
- case e: ClosedByInterruptException => info("Got closed by interrupt exception in broker proxy thread.")
- case e: OutOfMemoryError => throw new SamzaException("Got out of memory error in broker proxy thread.")
- case e: StackOverflowError => throw new SamzaException("Got stack overflow error in broker proxy thread.")
- }
-
- if (Thread.currentThread.isInterrupted) info("Shutting down due to interrupt.")
- }
- }, "BrokerProxy thread pointed at %s:%d for client %s" format (host, port, clientID))
-
- private def fetchMessages(): Unit = {
- val topicAndPartitionsToFetch = nextOffsets.filterKeys(messageSink.needsMoreMessages(_)).toList
-
- if (topicAndPartitionsToFetch.size > 0) {
- metrics.brokerReads.get((host, port)).inc
- val response: FetchResponse = simpleConsumer.defaultFetch(topicAndPartitionsToFetch: _*)
- firstCall = false
- firstCallBarrier.countDown()
-
- // Split response into errors and non errors, processing the errors first
- val (nonErrorResponses, errorResponses) = response.data.toSet.partition(_._2.error.code() == ErrorMapping.NoError)
-
- handleErrors(errorResponses, response)
-
- nonErrorResponses.foreach { case (tp, data) => moveMessagesToTheirQueue(tp, data) }
- } else {
- refreshLatencyMetrics
-
- debug("No topic/partitions need to be fetched for %s:%s right now. Sleeping %sms." format (host, port, sleepMSWhileNoTopicPartitions))
-
- metrics.brokerSkippedFetchRequests.get((host, port)).inc
-
- Thread.sleep(sleepMSWhileNoTopicPartitions)
- }
- }
-
- /**
- * Releases ownership for a single TopicAndPartition. The
- * KafkaSystemConsumer will try and find a new broker for the
- * TopicAndPartition.
- */
- def abdicate(tp: TopicAndPartition) = removeTopicPartition(tp) match {
- // Need to be mindful of a tp that was removed by another thread
- case Some(offset) => messageSink.abdicate(tp, offset)
- case None => warn("Tried to abdicate for topic partition not in map. Removed in interim?")
- }
-
- /**
- * Releases all TopicAndPartition ownership for this BrokerProxy thread. The
- * KafkaSystemConsumer will try and find a new broker for the
- * TopicAndPartition.
- */
- def abdicateAll {
- info("Abdicating all topic partitions.")
- val immutableNextOffsetsCopy = nextOffsets.toMap
- immutableNextOffsetsCopy.keySet.foreach(abdicate(_))
- }
-
- def handleErrors(errorResponses: Set[(TopicAndPartition, FetchResponsePartitionData)], response: FetchResponse) = {
- // FetchResponse should really return Option and a list of the errors so we don't have to find them ourselves
- case class Error(tp: TopicAndPartition, code: Short, exception: Exception)
-
- // Now subdivide the errors into three types: non-recoverable, not leader (== abdicate) and offset out of range (== get new offset)
-
- // Convert FetchResponse into easier-to-work-with Errors
- val errors = for (
- (topicAndPartition, responseData) <- errorResponses;
- error <- Option(response.error(topicAndPartition.topic, topicAndPartition.partition)) // Scala's being cranky about referring to error.getKey values...
- ) yield new Error(topicAndPartition, error.code(), error.exception())
-
- val (notLeaderOrUnknownTopic, otherErrors) = errors.partition { case (e) => e.code == ErrorMapping.NotLeaderForPartitionCode || e.code == ErrorMapping.UnknownTopicOrPartitionCode }
- val (offsetOutOfRangeErrors, remainingErrors) = otherErrors.partition(_.code == ErrorMapping.OffsetOutOfRangeCode)
-
- // Can recover from two types of errors: not leader (go find the new leader) and offset out of range (go get the new offset)
- // However, we want to bail as quickly as possible if there are non recoverable errors so that the state of the other
- // topic-partitions remains the same. That way, when we've rebuilt the simple consumer, we can come around and
- // handle the recoverable errors.
- remainingErrors.foreach(e => {
- warn("Got non-recoverable error codes during multifetch. Throwing an exception to trigger reconnect. Errors: %s" format remainingErrors.mkString(","))
- KafkaUtil.maybeThrowException(e.exception) })
-
- notLeaderOrUnknownTopic.foreach(e => {
- warn("Received (UnknownTopicOr|NotLeaderFor)Partition exception %s for %s. Abdicating" format(e.code, e.tp))
- abdicate(e.tp)
- })
-
- offsetOutOfRangeErrors.foreach(e => {
- warn("Received OffsetOutOfRange exception for %s. Current offset = %s" format (e.tp, nextOffsets.getOrElse(e.tp, "not found in map, likely removed in the interim")))
-
- try {
- val newOffset = offsetGetter.getResetOffset(simpleConsumer, e.tp)
- // Put the new offset into the map (if the tp still exists). Will catch it on the next go-around
- nextOffsets.replace(e.tp, newOffset)
- } catch {
- // UnknownTopic or NotLeader are routine events and handled via abdication. All others, bail.
- case _ @ (_:UnknownTopicOrPartitionException | _: NotLeaderForPartitionException) => warn("Received (UnknownTopicOr|NotLeaderFor)Partition exception %s for %s. Abdicating" format(e.code, e.tp))
- abdicate(e.tp)
- }
- })
- }
-
- def moveMessagesToTheirQueue(tp: TopicAndPartition, data: FetchResponsePartitionData) = {
- val messageSet: MessageSet = data.messages
- var nextOffset = nextOffsets(tp)
-
- messageSink.setIsAtHighWatermark(tp, data.hw == 0 || data.hw == nextOffset)
- require(messageSet != null)
- for (message <- messageSet.iterator) {
- messageSink.addMessage(tp, message, data.hw) // TODO: Verify this is correct
-
- nextOffset = message.nextOffset
-
- val bytesSize = message.message.payloadSize + message.message.keySize
- metrics.reads.get(tp).inc
- metrics.bytesRead.get(tp).inc(bytesSize)
- metrics.brokerBytesRead.get((host, port)).inc(bytesSize)
- metrics.offsets.get(tp).set(nextOffset)
- }
-
- nextOffsets.replace(tp, nextOffset) // use replace rather than put in case this tp was removed while we were fetching.
-
- // Update high water mark
- val hw = data.hw
- if (hw >= 0) {
- metrics.highWatermark.get(tp).set(hw)
- metrics.lag.get(tp).set(hw - nextOffset)
- } else {
- debug("Got a high water mark less than 0 (%d) for %s, so skipping." format (hw, tp))
- }
- }
- override def toString() = "BrokerProxy for %s:%d" format (host, port)
-
- def start {
- if (!thread.isAlive) {
- info("Starting " + toString)
- thread.setDaemon(true)
- thread.setName("Samza BrokerProxy " + thread.getName)
- thread.setUncaughtExceptionHandler(new UncaughtExceptionHandler {
- override def uncaughtException(t: Thread, e: Throwable) = error("Uncaught exception in broker proxy:", e)
- })
- thread.start
- } else {
- debug("Tried to start an already started broker proxy (%s). Ignoring." format toString)
- }
- }
-
- def stop {
- info("Shutting down " + toString)
-
- if (simpleConsumer != null) {
- info("closing simple consumer...")
- simpleConsumer.close
- }
-
- thread.interrupt
- thread.join
- }
-
- private def refreshLatencyMetrics {
- nextOffsets.foreach{
- case (topicAndPartition, offset) => {
- val latestOffset = simpleConsumer.earliestOrLatestOffset(topicAndPartition, -1, Request.OrdinaryConsumerId)
- trace("latest offset of %s is %s" format (topicAndPartition, latestOffset))
- if (latestOffset >= 0) {
- // only update the registered topicAndpartitions
- if(metrics.highWatermark.containsKey(topicAndPartition)) {
- metrics.highWatermark.get(topicAndPartition).set(latestOffset)
- }
- if(metrics.lag.containsKey(topicAndPartition)) {
- metrics.lag.get(topicAndPartition).set(latestOffset - offset)
- }
- }
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 01b345a..e61e0ff 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -47,8 +47,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * Separate thread that reads messages from kafka and puts them int the BlockingEnvelopeMap
- * This class is not thread safe. There will be only one instance of this class per LiKafkaSystemConsumer object
+ * Separate thread that reads messages from kafka and puts them into the BlockingEnvelopeMap.
+ * This class is not thread safe. There will be only one instance of this class per LiKafkaSystemConsumer object.
* We still need some synchronization around kafkaConsumer. See pollConsumer() method for details.
*/
public class KafkaConsumerProxy<K, V> {
@@ -65,7 +65,7 @@ public class KafkaConsumerProxy<K, V> {
private final String clientId;
private final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
private final Map<SystemStreamPartition, MetricName> ssp2MetricName = new HashMap<>();
- // list of all the SSPs we poll from with their next offsets correspondingly.
+ // list of all the SSPs we poll from, with their next offsets correspondingly.
private final Map<SystemStreamPartition, Long> nextOffsets = new ConcurrentHashMap<>();
// lags behind the high water mark, as reported by the Kafka consumer.
private final Map<SystemStreamPartition, Long> latestLags = new HashMap<>();
http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.scala
deleted file mode 100644
index fd84c4a..0000000
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.scala
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.samza.system.kafka
-
-import kafka.common.TopicAndPartition
-import org.apache.samza.util.Logging
-import kafka.message.Message
-import kafka.message.MessageAndOffset
-import org.apache.samza.Partition
-import org.apache.kafka.common.utils.Utils
-import org.apache.samza.util.Clock
-import kafka.serializer.DefaultDecoder
-import kafka.serializer.Decoder
-import org.apache.samza.util.BlockingEnvelopeMap
-import org.apache.samza.system.SystemStreamPartition
-import org.apache.samza.system.IncomingMessageEnvelope
-import kafka.consumer.ConsumerConfig
-import org.apache.samza.util.TopicMetadataStore
-import kafka.api.PartitionMetadata
-import kafka.api.TopicMetadata
-import org.apache.samza.util.ExponentialSleepStrategy
-import java.util.concurrent.ConcurrentHashMap
-import scala.collection.JavaConverters._
-import org.apache.samza.system.SystemAdmin
-
-object KafkaSystemConsumer {
-
- // Approximate additional shallow heap overhead per message in addition to the raw bytes
- // received from Kafka 4 + 64 + 4 + 4 + 4 = 80 bytes overhead.
- // As this overhead is a moving target, and not very large
- // compared to the message size its being ignore in the computation for now.
- val MESSAGE_SIZE_OVERHEAD = 4 + 64 + 4 + 4 + 4;
-
- def toTopicAndPartition(systemStreamPartition: SystemStreamPartition) = {
- val topic = systemStreamPartition.getStream
- val partitionId = systemStreamPartition.getPartition.getPartitionId
- TopicAndPartition(topic, partitionId)
- }
-}
-
-/**
- * Maintain a cache of BrokerProxies, returning the appropriate one for the
- * requested topic and partition.
- */
-private[kafka] class KafkaSystemConsumer(
- systemName: String,
- systemAdmin: SystemAdmin,
- metrics: KafkaSystemConsumerMetrics,
- metadataStore: TopicMetadataStore,
- clientId: String,
- timeout: Int = ConsumerConfig.ConsumerTimeoutMs,
- bufferSize: Int = ConsumerConfig.SocketBufferSize,
- fetchSize: StreamFetchSizes = new StreamFetchSizes,
- consumerMinSize: Int = ConsumerConfig.MinFetchBytes,
- consumerMaxWait: Int = ConsumerConfig.MaxFetchWaitMs,
-
- /**
- * Defines a low water mark for how many messages we buffer before we start
- * executing fetch requests against brokers to get more messages. This value
- * is divided equally among all registered SystemStreamPartitions. For
- * example, if fetchThreshold is set to 50000, and there are 50
- * SystemStreamPartitions registered, then the per-partition threshold is
- * 1000. As soon as a SystemStreamPartition's buffered message count drops
- * below 1000, a fetch request will be executed to get more data for it.
- *
- * Increasing this parameter will decrease the latency between when a queue
- * is drained of messages and when new messages are enqueued, but also leads
- * to an increase in memory usage since more messages will be held in memory.
- */
- fetchThreshold: Int = 50000,
- /**
- * Defines a low water mark for how many bytes we buffer before we start
- * executing fetch requests against brokers to get more messages. This
- * value is divided by 2 because the messages are buffered twice, once in
- * KafkaConsumer and then in SystemConsumers. This value
- * is divided equally among all registered SystemStreamPartitions.
- * However this is a soft limit per partition, as the
- * bytes are cached at the message boundaries, and the actual usage can be
- * 1000 bytes + size of max message in the partition for a given stream.
- * The bytes if the size of the bytebuffer in Message. Hence, the
- * Object overhead is not taken into consideration. In this codebase
- * it seems to be quite small. Hence, even for 500000 messages this is around 4MB x 2 = 8MB,
- * which is not considerable.
- *
- * For example,
- * if fetchThresholdBytes is set to 100000 bytes, and there are 50
- * SystemStreamPartitions registered, then the per-partition threshold is
- * (100000 / 2) / 50 = 1000 bytes.
- * As this is a soft limit, the actual usage can be 1000 bytes + size of max message.
- * As soon as a SystemStreamPartition's buffered messages bytes drops
- * below 1000, a fetch request will be executed to get more data for it.
- *
- * Increasing this parameter will decrease the latency between when a queue
- * is drained of messages and when new messages are enqueued, but also leads
- * to an increase in memory usage since more messages will be held in memory.
- *
- * The default value is -1, which means this is not used. When the value
- * is > 0, then the fetchThreshold which is count based is ignored.
- */
- fetchThresholdBytes: Long = -1,
- /**
- * if(fetchThresholdBytes > 0) true else false
- */
- fetchLimitByBytesEnabled: Boolean = false,
- offsetGetter: GetOffset = new GetOffset("fail"),
- deserializer: Decoder[Object] = new DefaultDecoder().asInstanceOf[Decoder[Object]],
- keyDeserializer: Decoder[Object] = new DefaultDecoder().asInstanceOf[Decoder[Object]],
- retryBackoff: ExponentialSleepStrategy = new ExponentialSleepStrategy,
- clock: () => Long = { System.currentTimeMillis }) extends BlockingEnvelopeMap(
- metrics.registry,
- new Clock {
- def currentTimeMillis = clock()
- },
- classOf[KafkaSystemConsumerMetrics].getName) with Toss with Logging {
-
- type HostPort = (String, Int)
- val brokerProxies = scala.collection.mutable.Map[HostPort, BrokerProxy]()
- val topicPartitionsAndOffsets: scala.collection.concurrent.Map[TopicAndPartition, String] = new ConcurrentHashMap[TopicAndPartition, String]().asScala
- var perPartitionFetchThreshold = fetchThreshold
- var perPartitionFetchThresholdBytes = 0L
-
- def start() {
- if (topicPartitionsAndOffsets.size > 0) {
- perPartitionFetchThreshold = fetchThreshold / topicPartitionsAndOffsets.size
- // messages get double buffered, hence divide by 2
- if(fetchLimitByBytesEnabled) {
- perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / topicPartitionsAndOffsets.size
- }
- }
-
- systemAdmin.start()
- refreshBrokers
- }
-
- override def register(systemStreamPartition: SystemStreamPartition, offset: String) {
- super.register(systemStreamPartition, offset)
-
- val topicAndPartition = KafkaSystemConsumer.toTopicAndPartition(systemStreamPartition)
- val existingOffset = topicPartitionsAndOffsets.getOrElseUpdate(topicAndPartition, offset)
- // register the older offset in the consumer
- if (systemAdmin.offsetComparator(existingOffset, offset) >= 0) {
- topicPartitionsAndOffsets.replace(topicAndPartition, offset)
- }
-
- metrics.registerTopicAndPartition(KafkaSystemConsumer.toTopicAndPartition(systemStreamPartition))
- }
-
- def stop() {
- systemAdmin.stop()
- brokerProxies.values.foreach(_.stop)
- }
-
- protected def createBrokerProxy(host: String, port: Int): BrokerProxy = {
- info("Creating new broker proxy for host: %s and port: %s" format(host, port))
- new BrokerProxy(host, port, systemName, clientId, metrics, sink, timeout, bufferSize, fetchSize, consumerMinSize, consumerMaxWait, offsetGetter)
- }
-
- protected def getPartitionMetadata(topicMetadata: TopicMetadata, partition: Int): Option[PartitionMetadata] = {
- topicMetadata.partitionsMetadata.find(_.partitionId == partition)
- }
-
- protected def getLeaderHostPort(partitionMetadata: Option[PartitionMetadata]): Option[(String, Int)] = {
- // Whatever we do, we can't say Broker, even though we're
- // manipulating it here. Broker is a private type and Scala doesn't seem
- // to care about that as long as you don't explicitly declare its type.
- val brokerOption = partitionMetadata.flatMap(_.leader)
-
- brokerOption match {
- case Some(broker) => Some(broker.host, broker.port)
- case _ => None
- }
- }
-
- def refreshBrokers {
- var tpToRefresh = topicPartitionsAndOffsets.keySet.toList
- info("Refreshing brokers for: %s" format topicPartitionsAndOffsets)
- retryBackoff.run(
- loop => {
- val topics = tpToRefresh.map(_.topic).toSet
- val topicMetadata = TopicMetadataCache.getTopicMetadata(topics, systemName, (topics: Set[String]) => metadataStore.getTopicInfo(topics))
-
- // addTopicPartition one at a time, leaving the to-be-done list intact in case of exceptions.
- // This avoids trying to re-add the same topic partition repeatedly
- def refresh() = {
- val head = tpToRefresh.head
- // refreshBrokers can be called from abdicate and refreshDropped,
- // both of which are triggered from BrokerProxy threads. To prevent
- // accidentally creating multiple objects for the same broker, or
- // accidentally not updating the topicPartitionsAndOffsets variable,
- // we need to lock.
- this.synchronized {
- // Check if we still need this TopicAndPartition inside the
- // critical section. If we don't, then notAValidEvent it.
- topicPartitionsAndOffsets.get(head) match {
- case Some(nextOffset) =>
- val partitionMetadata = getPartitionMetadata(topicMetadata(head.topic), head.partition)
- getLeaderHostPort(partitionMetadata) match {
- case Some((host, port)) =>
- debug("Got partition metadata for %s: %s" format(head, partitionMetadata.get))
- val brokerProxy = brokerProxies.getOrElseUpdate((host, port), createBrokerProxy(host, port))
- brokerProxy.addTopicPartition(head, Option(nextOffset))
- brokerProxy.start
- debug("Claimed topic-partition (%s) for (%s)".format(head, brokerProxy))
- topicPartitionsAndOffsets -= head
- case None => info("No metadata available for: %s. Will try to refresh and add to a consumer thread later." format head)
- }
- case _ => debug("Ignoring refresh for %s because we already added it from another thread." format head)
- }
- }
- tpToRefresh.tail
- }
-
- while (!tpToRefresh.isEmpty) {
- tpToRefresh = refresh()
- }
-
- loop.done
- },
-
- (exception, loop) => {
- warn("While refreshing brokers for %s: %s. Retrying." format (tpToRefresh.head, exception))
- debug("Exception detail:", exception)
- })
- }
-
- val sink = new MessageSink {
- var lastDroppedRefresh = clock()
-
- def refreshDropped() {
- if (topicPartitionsAndOffsets.size > 0 && clock() - lastDroppedRefresh > 10000) {
- refreshBrokers
- lastDroppedRefresh = clock()
- }
- }
-
- def setIsAtHighWatermark(tp: TopicAndPartition, isAtHighWatermark: Boolean) {
- setIsAtHead(toSystemStreamPartition(tp), isAtHighWatermark)
- }
-
- def needsMoreMessages(tp: TopicAndPartition) = {
- if(fetchLimitByBytesEnabled) {
- getMessagesSizeInQueue(toSystemStreamPartition(tp)) < perPartitionFetchThresholdBytes
- } else {
- getNumMessagesInQueue(toSystemStreamPartition(tp)) < perPartitionFetchThreshold
- }
- }
-
- def getMessageSize(message: Message): Integer = {
- message.size + KafkaSystemConsumer.MESSAGE_SIZE_OVERHEAD
- }
-
- def addMessage(tp: TopicAndPartition, msg: MessageAndOffset, highWatermark: Long) = {
- trace("Incoming message %s: %s." format (tp, msg))
-
- val systemStreamPartition = toSystemStreamPartition(tp)
- val isAtHead = highWatermark == msg.offset
- val offset = msg.offset.toString
- val key = if (msg.message.key != null) {
- keyDeserializer.fromBytes(Utils.readBytes(msg.message.key))
- } else {
- null
- }
- val message = if (!msg.message.isNull) {
- deserializer.fromBytes(Utils.readBytes(msg.message.payload))
- } else {
- null
- }
-
- if(fetchLimitByBytesEnabled ) {
- val ime = new IncomingMessageEnvelope(systemStreamPartition, offset, key, message, getMessageSize(msg.message))
- ime.setTimestamp(if (!msg.message.isNull) msg.message.timestamp else 0L)
- put(systemStreamPartition, ime)
- } else {
- val ime = new IncomingMessageEnvelope(systemStreamPartition, offset, key, message)
- ime.setTimestamp(if (!msg.message.isNull) msg.message.timestamp else 0L)
- put(systemStreamPartition, ime)
- }
-
- setIsAtHead(systemStreamPartition, isAtHead)
- }
-
- def abdicate(tp: TopicAndPartition, nextOffset: Long) {
- info("Abdicating for %s" format (tp))
- topicPartitionsAndOffsets += tp -> nextOffset.toString
- refreshBrokers
- }
-
- private def toSystemStreamPartition(tp: TopicAndPartition) = {
- new SystemStreamPartition(systemName, tp.topic, new Partition(tp.partition))
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
index 51545a0..1aa66dc 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
@@ -35,6 +35,7 @@ class KafkaSystemConsumerMetrics(val systemName: String = "unknown", val registr
val highWatermark = new ConcurrentHashMap[TopicAndPartition, Gauge[Long]]
/*
+ TODO Fix
* (String, Int) = (host, port) of BrokerProxy.
*/
http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index dd7e584..b745628 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -66,14 +66,14 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
private final KafkaSystemConsumerMetrics samzaConsumerMetrics;
private final String clientId;
private final String metricName;
- private final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
+ /* package private */final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
private final AtomicBoolean stopped = new AtomicBoolean(false);
private final AtomicBoolean started = new AtomicBoolean(false);
private final Config config;
private final boolean fetchThresholdBytesEnabled;
// This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
- private KafkaConsumerMessageSink messageSink;
+ /* package private */ KafkaConsumerMessageSink messageSink;
// proxy is doing the actual reading
private KafkaConsumerProxy proxy;
@@ -142,17 +142,6 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
Map<String, String> injectProps = new HashMap<>();
- // the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
- // default to byte[]
- if ( !config.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
- LOG.info("default key serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
- injectProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
- }
- if ( !config.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
- LOG.info("default value serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
- injectProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
- }
-
// extract kafka consumer configs
KafkaConsumerConfig consumerConfig =
KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
@@ -203,7 +192,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
}
}
- private void createConsumerProxy() {
+ void createConsumerProxy() {
// create a sink for passing the messages between the proxy and the consumer
messageSink = new KafkaConsumerMessageSink();
@@ -219,7 +208,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
Add the TopicPartitions to the proxy.
Start the proxy thread.
*/
- private void startConsumer() {
+ void startConsumer() {
//set the offset for each TopicPartition
topicPartitions2Offset.forEach((tp, startingOffsetString) -> {
long startingOffset = Long.valueOf(startingOffsetString);
http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala b/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
index 8544dbf..8d92f4d 100644
--- a/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
+++ b/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
@@ -92,8 +92,8 @@ class TestKafkaCheckpointManager extends KafkaServerTestHarness {
assertNull(readCp)
writeCheckpoint(checkpointTopic, taskName, checkpoint1)
-
assertEquals(checkpoint1, readCheckpoint(checkpointTopic, taskName))
+
// writing a second message and reading it returns a more recent checkpoint
writeCheckpoint(checkpointTopic, taskName, checkpoint2)
assertEquals(checkpoint2, readCheckpoint(checkpointTopic, taskName))
@@ -194,7 +194,6 @@ class TestKafkaCheckpointManager extends KafkaServerTestHarness {
val systemFactory = Util.getObj(systemFactoryClassName, classOf[SystemFactory])
val spec = new KafkaStreamSpec("id", cpTopic, checkpointSystemName, 1, 1, props)
- System.out.println("CONFIG = " + config)
new KafkaCheckpointManager(spec, systemFactory, failOnTopicValidation, config, new NoOpMetricsRegistry, serde)
}
http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
index d510076..a3f76e7 100644
--- a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
+++ b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
@@ -41,6 +41,7 @@ import org.mockito.{Matchers, Mockito}
import scala.collection.JavaConverters._
class TestBrokerProxy extends Logging {
+ /*
val tp2 = new TopicAndPartition("Redbird", 2013)
var fetchTp1 = true // control whether fetching tp1 messages or not
@@ -305,6 +306,7 @@ class TestBrokerProxy extends Logging {
}
/**
+ * TODO fix
* Test that makes sure that BrokerProxy abdicates all TopicAndPartitions
* that it owns when a consumer failure occurs.
*/
@@ -431,4 +433,5 @@ class TestBrokerProxy extends Logging {
bp.stop
verify(mockSimpleConsumer).close
}
+ */
}
http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.scala b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.scala
deleted file mode 100644
index 8656d10..0000000
--- a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.scala
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.samza.system.kafka
-
-import kafka.api.TopicMetadata
-import kafka.api.PartitionMetadata
-import kafka.cluster.Broker
-import kafka.common.TopicAndPartition
-import kafka.message.Message
-import kafka.message.MessageAndOffset
-import org.apache.kafka.common.protocol.Errors
-import org.apache.samza.system.IncomingMessageEnvelope
-import org.apache.samza.system.SystemStreamPartition
-import org.apache.samza.Partition
-import org.apache.samza.util.TopicMetadataStore
-import org.junit.Test
-import org.junit.Assert._
-import org.apache.samza.system.SystemAdmin
-import org.mockito.Mockito._
-import org.mockito.Matchers._
-
-class TestKafkaSystemConsumer {
- val systemAdmin: SystemAdmin = mock(classOf[KafkaSystemAdmin])
- private val SSP: SystemStreamPartition = new SystemStreamPartition("test", "test", new Partition(0))
- private val envelope: IncomingMessageEnvelope = new IncomingMessageEnvelope(SSP, null, null, null)
- private val envelopeWithSize: IncomingMessageEnvelope = new IncomingMessageEnvelope(SSP, null, null, null, 100)
- private val clientId = "TestClientId"
-
- @Test
- def testFetchThresholdShouldDivideEvenlyAmongPartitions {
- val metadataStore = new MockMetadataStore
- val consumer = new KafkaSystemConsumer("", systemAdmin, new KafkaSystemConsumerMetrics, metadataStore, clientId, fetchThreshold = 50000) {
- override def refreshBrokers {
- }
- }
-
- for (i <- 0 until 50) {
- consumer.register(new SystemStreamPartition("test-system", "test-stream", new Partition(i)), "0")
- }
-
- consumer.start
-
- assertEquals(1000, consumer.perPartitionFetchThreshold)
- }
-
- @Test
- def testBrokerCreationShouldTriggerStart {
- val systemName = "test-system"
- val streamName = "test-stream"
- val metrics = new KafkaSystemConsumerMetrics
- // Lie and tell the store that the partition metadata is empty. We can't
- // use partition metadata because it has Broker in its constructor, which
- // is package private to Kafka.
- val metadataStore = new MockMetadataStore(Map(streamName -> TopicMetadata(streamName, Seq.empty, Errors.NONE)))
- var hosts = List[String]()
- var getHostPortCount = 0
- val consumer = new KafkaSystemConsumer(systemName, systemAdmin, metrics, metadataStore, clientId) {
- override def getLeaderHostPort(partitionMetadata: Option[PartitionMetadata]): Option[(String, Int)] = {
- // Generate a unique host every time getHostPort is called.
- getHostPortCount += 1
- Some("localhost-%s" format getHostPortCount, 0)
- }
-
- override def createBrokerProxy(host: String, port: Int): BrokerProxy = {
- new BrokerProxy(host, port, systemName, "", metrics, sink) {
- override def addTopicPartition(tp: TopicAndPartition, nextOffset: Option[String]) = {
- // Skip this since we normally do verification of offsets, which
- // tries to connect to Kafka. Rather than mock that, just forget it.
- nextOffsets.size
- }
-
- override def start {
- hosts :+= host
- }
- }
- }
- }
-
- consumer.register(new SystemStreamPartition(systemName, streamName, new Partition(0)), "1")
- assertEquals(0, hosts.size)
- consumer.start
- assertEquals(List("localhost-1"), hosts)
- // Should trigger a refresh with a new host.
- consumer.sink.abdicate(new TopicAndPartition(streamName, 0), 2)
- assertEquals(List("localhost-1", "localhost-2"), hosts)
- }
-
- @Test
- def testConsumerRegisterOlderOffsetOfTheSamzaSSP {
- when(systemAdmin.offsetComparator(anyString, anyString)).thenCallRealMethod()
-
- val metadataStore = new MockMetadataStore
- val consumer = new KafkaSystemConsumer("", systemAdmin, new KafkaSystemConsumerMetrics, metadataStore, clientId, fetchThreshold = 50000)
- val ssp0 = new SystemStreamPartition("test-system", "test-stream", new Partition(0))
- val ssp1 = new SystemStreamPartition("test-system", "test-stream", new Partition(1))
- val ssp2 = new SystemStreamPartition("test-system", "test-stream", new Partition(2))
-
- consumer.register(ssp0, "0")
- consumer.register(ssp0, "5")
- consumer.register(ssp1, "2")
- consumer.register(ssp1, "3")
- consumer.register(ssp2, "0")
-
- assertEquals("0", consumer.topicPartitionsAndOffsets(KafkaSystemConsumer.toTopicAndPartition(ssp0)))
- assertEquals("2", consumer.topicPartitionsAndOffsets(KafkaSystemConsumer.toTopicAndPartition(ssp1)))
- assertEquals("0", consumer.topicPartitionsAndOffsets(KafkaSystemConsumer.toTopicAndPartition(ssp2)))
- }
-
- @Test
- def testFetchThresholdBytesShouldDivideEvenlyAmongPartitions {
- val metadataStore = new MockMetadataStore
- val consumer = new KafkaSystemConsumer("", systemAdmin, new KafkaSystemConsumerMetrics, metadataStore, clientId,
- fetchThreshold = 50000, fetchThresholdBytes = 60000L, fetchLimitByBytesEnabled = true) {
- override def refreshBrokers {
- }
- }
-
- for (i <- 0 until 10) {
- consumer.register(new SystemStreamPartition("test-system", "test-stream", new Partition(i)), "0")
- }
-
- consumer.start
-
- assertEquals(5000, consumer.perPartitionFetchThreshold)
- assertEquals(3000, consumer.perPartitionFetchThresholdBytes)
- }
-
- @Test
- def testFetchThresholdBytes {
- val metadataStore = new MockMetadataStore
- val consumer = new KafkaSystemConsumer("test-system", systemAdmin, new KafkaSystemConsumerMetrics, metadataStore, clientId,
- fetchThreshold = 50000, fetchThresholdBytes = 60000L, fetchLimitByBytesEnabled = true) {
- override def refreshBrokers {
- }
- }
-
- for (i <- 0 until 10) {
- consumer.register(new SystemStreamPartition("test-system", "test-stream", new Partition(i)), "0")
- }
-
- consumer.start
-
- val msg = Array[Byte](5, 112, 9, 126)
- val msgAndOffset: MessageAndOffset = MessageAndOffset(new Message(msg), 887654)
- // 4 data + 18 Message overhead + 80 IncomingMessageEnvelope overhead
- consumer.sink.addMessage(new TopicAndPartition("test-stream", 0), msgAndOffset, 887354)
-
- assertEquals(106, consumer.getMessagesSizeInQueue(new SystemStreamPartition("test-system", "test-stream", new Partition(0))))
- }
-
- @Test
- def testFetchThresholdBytesDisabled {
- val metadataStore = new MockMetadataStore
- val consumer = new KafkaSystemConsumer("", systemAdmin, new KafkaSystemConsumerMetrics, metadataStore, clientId,
- fetchThreshold = 50000, fetchThresholdBytes = 60000L) {
- override def refreshBrokers {
- }
- }
-
- for (i <- 0 until 10) {
- consumer.register(new SystemStreamPartition("test-system", "test-stream", new Partition(i)), "0")
- }
-
- consumer.start
-
- assertEquals(5000, consumer.perPartitionFetchThreshold)
- assertEquals(0, consumer.perPartitionFetchThresholdBytes)
- assertEquals(0, consumer.getMessagesSizeInQueue(new SystemStreamPartition("test-system", "test-stream", new Partition(0))))
- }
-}
-
-class MockMetadataStore(var metadata: Map[String, TopicMetadata] = Map()) extends TopicMetadataStore {
- def getTopicInfo(topics: Set[String]): Map[String, TopicMetadata] = metadata
-}
http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
new file mode 100644
index 0000000..f7f63f3
--- /dev/null
+++ b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
@@ -0,0 +1,203 @@
+package org.apache.samza.system.kafka;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
+import org.apache.kafka.common.serialization.ByteArraySerializer;
+import org.apache.samza.Partition;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.config.MapConfig;
+import org.apache.samza.system.IncomingMessageEnvelope;
+import org.apache.samza.system.SystemStreamPartition;
+import org.apache.samza.util.Clock;
+import org.apache.samza.util.NoOpMetricsRegistry;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+
+public class TestNewKafkaSystemConsumer {
+ public final String TEST_SYSTEM = "test-system";
+ public final String TEST_STREAM = "test-stream";
+ public final String TEST_CLIENT_ID = "testClientId";
+ public final String BOOTSTRAP_SERVER = "127.0.0.1:8888";
+ public final String FETCH_THRESHOLD_MSGS = "50000";
+ public final String FETCH_THRESHOLD_BYTES = "100000";
+
+ @Before
+ public void setUp() {
+
+ }
+
+ private NewKafkaSystemConsumer setupConsumer(String fetchMsg, String fetchBytes) {
+ final Map<String, String> map = new HashMap<>();
+
+ map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD(), TEST_SYSTEM), fetchMsg);
+ map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD_BYTES(), TEST_SYSTEM), fetchBytes);
+ map.put(String.format("systems.%s.consumer.%s", TEST_SYSTEM, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG),
+ BOOTSTRAP_SERVER);
+
+ Config config = new MapConfig(map);
+ KafkaConsumerConfig consumerConfig =
+ KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, TEST_SYSTEM, TEST_CLIENT_ID, Collections.emptyMap());
+ final KafkaConsumer<byte[], byte[]> kafkaConsumer = new MockKafkaConsumer(consumerConfig.originals());
+
+ MockNewKafkaSystmeCosumer newKafkaSystemConsumer =
+ new MockNewKafkaSystmeCosumer(kafkaConsumer, TEST_SYSTEM, config, TEST_CLIENT_ID,
+ new KafkaSystemConsumerMetrics(TEST_SYSTEM, new NoOpMetricsRegistry()), System::currentTimeMillis);
+
+ return newKafkaSystemConsumer;
+ }
+
+ @Test
+ public void testConfigValidations() {
+
+ final NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+ consumer.start();
+ // should be no failures
+ }
+
+ @Test
+ public void testFetchThresholdShouldDivideEvenlyAmongPartitions() {
+ final NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+ final int partitionsNum = 50;
+ for (int i = 0; i < partitionsNum; i++) {
+ consumer.register(new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(i)), "0");
+ }
+
+ consumer.start();
+
+ Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum, consumer.perPartitionFetchThreshold);
+ Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_BYTES) / 2 / partitionsNum,
+ consumer.perPartitionFetchThresholdBytes);
+ }
+
+ @Test
+ public void testConsumerRegisterOlderOffsetOfTheSamzaSSP() {
+
+ NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+ SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+ SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+ SystemStreamPartition ssp2 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(2));
+
+ consumer.register(ssp0, "0");
+ consumer.register(ssp0, "5");
+ consumer.register(ssp1, "2");
+ consumer.register(ssp1, "3");
+ consumer.register(ssp2, "0");
+
+ assertEquals("0", consumer.topicPartitions2Offset.get(NewKafkaSystemConsumer.toTopicPartition(ssp0)));
+ assertEquals("2", consumer.topicPartitions2Offset.get(NewKafkaSystemConsumer.toTopicPartition(ssp1)));
+ assertEquals("0", consumer.topicPartitions2Offset.get(NewKafkaSystemConsumer.toTopicPartition(ssp2)));
+ }
+
+ @Test
+ public void testFetchThresholdBytes() {
+
+ SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+ SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+ int partitionsNum = 2;
+ int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size
+ int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 1; // fake size
+ int ime11Size = 20;
+ ByteArraySerializer bytesSerde = new ByteArraySerializer();
+ IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
+ bytesSerde.serialize("", "value0".getBytes()), ime0Size);
+ IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
+ bytesSerde.serialize("", "value1".getBytes()), ime1Size);
+ IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
+ bytesSerde.serialize("", "value11".getBytes()), ime11Size);
+ NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+ consumer.register(ssp0, "0");
+ consumer.register(ssp1, "0");
+ consumer.start();
+ consumer.messageSink.addMessage(ssp0, ime0);
+ // queue for ssp0 should be full now, because we added message of size FETCH_THRESHOLD_MSGS/partitionsNum
+ Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp0));
+ consumer.messageSink.addMessage(ssp1, ime1);
+ // queue for ssp1 should be less then full now, because we added message of size (FETCH_THRESHOLD_MSGS/partitionsNum - 1)
+ Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
+ consumer.messageSink.addMessage(ssp1, ime11);
+ // queue for ssp1 should full now, because we added message of size 20 on top
+ Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
+
+ Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
+ Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
+ Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
+ Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
+ }
+
+ @Test
+ public void testFetchThresholdBytesDiabled() {
+ // Pass 0 as fetchThresholdByBytes, which disables checking for limit by size
+
+ SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+ SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+ int partitionsNum = 2;
+ int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size, upto the limit
+ int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 100; // fake size, below the limit
+ int ime11Size = 20;// event with the second message still below the size limit
+ ByteArraySerializer bytesSerde = new ByteArraySerializer();
+ IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
+ bytesSerde.serialize("", "value0".getBytes()), ime0Size);
+ IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
+ bytesSerde.serialize("", "value1".getBytes()), ime1Size);
+ IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
+ bytesSerde.serialize("", "value11".getBytes()), ime11Size);
+
+ // limit by number of messages 4/2 = 2 per partition
+ // limit by number of bytes - disabled
+ NewKafkaSystemConsumer consumer = setupConsumer("4", "0"); // should disable
+
+ consumer.register(ssp0, "0");
+ consumer.register(ssp1, "0");
+ consumer.start();
+ consumer.messageSink.addMessage(ssp0, ime0);
+ // should be full by size, but not full by number of messages (1 of 2)
+ Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp0));
+ consumer.messageSink.addMessage(ssp1, ime1);
+ // not full neither by size nor by messages
+ Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
+ consumer.messageSink.addMessage(ssp1, ime11);
+ // not full by size, but should be full by messages
+ Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
+
+ Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
+ Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
+ Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
+ Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
+ }
+
+ // mock kafkaConsumer and SystemConsumer
+ static class MockKafkaConsumer extends KafkaConsumer {
+ public MockKafkaConsumer(Map<String, Object> configs) {
+ super(configs);
+ }
+ }
+
+ static class MockNewKafkaSystmeCosumer extends NewKafkaSystemConsumer {
+ public MockNewKafkaSystmeCosumer(Consumer kafkaConsumer, String systemName, Config config, String clientId,
+ KafkaSystemConsumerMetrics metrics, Clock clock) {
+ super(kafkaConsumer, systemName, config, clientId, metrics, clock);
+ }
+
+ @Override
+ void createConsumerProxy() {
+ this.messageSink = new KafkaConsumerMessageSink();
+ }
+
+ @Override
+ void startConsumer() {
+ }
+ }
+}
[20/47] samza git commit: added new samza kafka system consumer using
new kafka consumer
Posted by bo...@apache.org.
added new samza kafka system consumer using new kafka consumer
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/c0ea25cb
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/c0ea25cb
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/c0ea25cb
Branch: refs/heads/NewKafkaSystemConsumer
Commit: c0ea25cbc674a1d67546f7f47a6f36f6ee58bdc6
Parents: 7254460
Author: Boris S <bo...@apache.org>
Authored: Wed Aug 29 10:52:30 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Aug 29 10:52:30 2018 -0700
----------------------------------------------------------------------
.../clients/consumer/KafkaConsumerConfig.java | 15 ++-
.../samza/system/kafka/KafkaConsumerProxy.java | 7 +-
.../samza/system/kafka/KafkaSystemFactory.scala | 59 +-----------
.../system/kafka/NewKafkaSystemConsumer.java | 97 +++++++++++++-------
4 files changed, 80 insertions(+), 98 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/c0ea25cb/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index 97360e2..b29a041 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -40,6 +40,7 @@ public class KafkaConsumerConfig extends ConsumerConfig {
private static final String SAMZA_OFFSET_SMALLEST = "smallest";
private static final String KAFKA_OFFSET_LATEST = "latest";
private static final String KAFKA_OFFSET_EARLIEST = "earliest";
+ private static final String KAFKA_OFFSET_NONE = "none";
/*
* By default, KafkaConsumer will fetch ALL available messages for all the partitions.
* This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
@@ -64,16 +65,14 @@ public class KafkaConsumerConfig extends ConsumerConfig {
consumerProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
consumerProps.setProperty(ConsumerConfig.CLIENT_ID_CONFIG, clientId);
- /********************************************
- * Open-source Kafka Consumer configuration *
- *******************************************/
+ //Open-source Kafka Consumer configuration
consumerProps.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); // Disable consumer auto-commit
consumerProps.setProperty(
ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
getAutoOffsetResetValue(consumerProps)); // Translate samza config value to kafka config value
- // makesure bootstrap configs are in ?? SHOULD WE FAIL IF THEY ARE NOT?
+ // make sure bootstrap configs are in ?? SHOULD WE FAIL IF THEY ARE NOT?
if (! subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
// get it from the producer config
String bootstrapServer = config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
@@ -139,6 +138,14 @@ public class KafkaConsumerConfig extends ConsumerConfig {
*/
static String getAutoOffsetResetValue(Properties properties) {
String autoOffsetReset = properties.getProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, KAFKA_OFFSET_LATEST);
+
+ // accept kafka values directly
+ if (autoOffsetReset.equals(KAFKA_OFFSET_EARLIEST) ||
+ autoOffsetReset.equals(KAFKA_OFFSET_LATEST) ||
+ autoOffsetReset.equals(KAFKA_OFFSET_NONE)) {
+ return autoOffsetReset;
+ }
+
switch (autoOffsetReset) {
case SAMZA_OFFSET_LARGEST:
return KAFKA_OFFSET_LATEST;
http://git-wip-us.apache.org/repos/asf/samza/blob/c0ea25cb/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 66971af..01b345a 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -69,7 +69,6 @@ public class KafkaConsumerProxy<K, V> {
private final Map<SystemStreamPartition, Long> nextOffsets = new ConcurrentHashMap<>();
// lags behind the high water mark, as reported by the Kafka consumer.
private final Map<SystemStreamPartition, Long> latestLags = new HashMap<>();
- private final NewKafkaSystemConsumer.ValueUnwrapper<V> valueUnwrapper;
private volatile boolean isRunning = false;
private volatile Throwable failureCause = null;
@@ -77,7 +76,7 @@ public class KafkaConsumerProxy<K, V> {
public KafkaConsumerProxy(Consumer<K, V> kafkaConsumer, String systemName, String clientId,
NewKafkaSystemConsumer.KafkaConsumerMessageSink messageSink, KafkaSystemConsumerMetrics samzaConsumerMetrics,
- String metricName, NewKafkaSystemConsumer.ValueUnwrapper<V> valueUnwrapper) {
+ String metricName) {
this.kafkaConsumer = kafkaConsumer;
this.systemName = systemName;
@@ -85,7 +84,6 @@ public class KafkaConsumerProxy<K, V> {
this.kafkaConsumerMetrics = samzaConsumerMetrics;
this.metricName = metricName;
this.clientId = clientId;
- this.valueUnwrapper = valueUnwrapper;
// TODO - see if we need new metrics (not host:port based)
this.kafkaConsumerMetrics.registerBrokerProxy(metricName, 0);
@@ -257,8 +255,7 @@ public class KafkaConsumerProxy<K, V> {
//}
final K key = r.key();
- final Object value =
- valueUnwrapper == null ? r.value() : valueUnwrapper.unwrapValue(ssp.getSystemStream(), r.value());
+ final Object value = r.value();
IncomingMessageEnvelope imEnvelope =
new IncomingMessageEnvelope(ssp, String.valueOf(r.offset()), key, value, msgSize);
listMsgs.add(imEnvelope);
http://git-wip-us.apache.org/repos/asf/samza/blob/c0ea25cb/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index c7f6aed..6a5eda9 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -55,64 +55,9 @@ class KafkaSystemFactory extends SystemFactory with Logging {
val clientId = KafkaUtil.getClientId("samza-consumer", config)
val metrics = new KafkaSystemConsumerMetrics(systemName, registry)
- // Kind of goofy to need a producer config for consumers, but we need metadata.
- val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId)
- val bootstrapServers = producerConfig.bootsrapServers
- //val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)
-
- //val kafkaConfig = new KafkaConfig(config)
-
-
- // val timeout = consumerConfig.socketTimeoutMs
- //val bufferSize = consumerConfig.socketReceiveBufferBytes
- //val fetchSize = new StreamFetchSizes(consumerConfig.fetchMessageMaxBytes, config.getFetchMessageMaxBytesTopics(systemName))
- //val consumerMinSize = consumerConfig.fetchMinBytes
- //val consumerMaxWait = consumerConfig.fetchWaitMaxMs
- //val autoOffsetResetDefault = consumerConfig.autoOffsetReset
- val autoOffsetResetTopics = config.getAutoOffsetResetTopics(systemName)
- val fetchThreshold = config.getConsumerFetchThreshold(systemName).getOrElse("50000").toInt
- val fetchThresholdBytes = config.getConsumerFetchThresholdBytes(systemName).getOrElse("-1").toLong
- //val offsetGetter = new GetOffset(autoOffsetResetDefault, autoOffsetResetTopics)
- //val metadataStore = new ClientUtilTopicMetadataStore(bootstrapServers, clientId, timeout)
-
-
- val kafkaConsumer: KafkaConsumer[Array[Byte], Array[Byte]] =
- NewKafkaSystemConsumer.getKafkaConsumerImpl(systemName, clientId, config)
-
- def valueUnwrapper: NewKafkaSystemConsumer.ValueUnwrapper[Array[Byte]] = null;// TODO add real unrapper from
- val kc = new NewKafkaSystemConsumer (
- kafkaConsumer, systemName, config, clientId,
- metrics, new SystemClock, false, valueUnwrapper)
-
- kc
- /*
- new KafkaSystemConsumer(
- systemName = systemName,
- systemAdmin = getAdmin(systemName, config),
- metrics = metrics,
- metadataStore = metadataStore,
- clientId = clientId,
- timeout = timeout,
- bufferSize = bufferSize,
- fetchSize = fetchSize,
- consumerMinSize = consumerMinSize,
- consumerMaxWait = consumerMaxWait,
- fetchThreshold = fetchThreshold,
- fetchThresholdBytes = fetchThresholdBytes,
- fetchLimitByBytesEnabled = config.isConsumerFetchThresholdBytesEnabled(systemName),
- offsetGetter = offsetGetter)
- */
- }
-
- /*
- def getKafkaConsumerImpl(systemName: String, config: KafkaConfig) = {
- info("Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}", systemName, config)
-
- val byteArrayDeserializer = new ByteArrayDeserializer
- new KafkaConsumer[Array[Byte], Array[Byte]](config.configForVanillaConsumer(),
- byteArrayDeserializer, byteArrayDeserializer)
+ NewKafkaSystemConsumer.getNewKafkaSystemConsumer(
+ systemName, config, clientId, metrics, new SystemClock)
}
- */
def getProducer(systemName: String, config: Config, registry: MetricsRegistry): SystemProducer = {
val clientId = KafkaUtil.getClientId("samza-producer", config)
http://git-wip-us.apache.org/repos/asf/samza/blob/c0ea25cb/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index 26db610..dd7e584 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -21,6 +21,7 @@
package org.apache.samza.system.kafka;
+import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -40,29 +41,24 @@ import org.apache.samza.Partition;
import org.apache.samza.SamzaException;
import org.apache.samza.config.Config;
import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.config.StreamConfig;
import org.apache.samza.system.IncomingMessageEnvelope;
import org.apache.samza.system.SystemConsumer;
import org.apache.samza.system.SystemStream;
import org.apache.samza.system.SystemStreamPartition;
import org.apache.samza.util.BlockingEnvelopeMap;
import org.apache.samza.util.Clock;
+import org.apache.samza.util.KafkaUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Option;
+import scala.collection.JavaConversions;
public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements SystemConsumer{
private static final Logger LOG = LoggerFactory.getLogger(NewKafkaSystemConsumer.class);
- /**
- * Provides a way to unwrap the value further. It is used for intermediate stream messages.
- * @param <T> value type
- */
- public interface ValueUnwrapper<T> {
- Object unwrapValue(SystemStream systemStream, T value);
- }
-
private static final long FETCH_THRESHOLD = 50000;
private static final long FETCH_THRESHOLD_BYTES = -1L;
private final Consumer<K,V> kafkaConsumer;
@@ -75,7 +71,6 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
private final AtomicBoolean started = new AtomicBoolean(false);
private final Config config;
private final boolean fetchThresholdBytesEnabled;
- private final ValueUnwrapper<V> valueUnwrapper;
// This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
private KafkaConsumerMessageSink messageSink;
@@ -99,9 +94,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
Config config,
String clientId,
KafkaSystemConsumerMetrics metrics,
- Clock clock,
- boolean fetchThresholdBytesEnabled,
- ValueUnwrapper<V> valueUnwrapper) {
+ Clock clock) {
super(metrics.registry(),clock, metrics.getClass().getName());
@@ -109,41 +102,64 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
this.clientId = clientId;
this.systemName = systemName;
this.config = config;
- this.fetchThresholdBytesEnabled = fetchThresholdBytesEnabled;
this.metricName = systemName + " " + clientId;
this.kafkaConsumer = kafkaConsumer;
- this.valueUnwrapper = valueUnwrapper;
+
+ this.fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName);
LOG.info(String.format(
"Created SamzaLiKafkaSystemConsumer for system=%s, clientId=%s, metricName=%s with liKafkaConsumer=%s",
systemName, clientId, metricName, this.kafkaConsumer.toString()));
}
- public static KafkaConsumer<byte[], byte[]> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
+ public static <K, V> NewKafkaSystemConsumer getNewKafkaSystemConsumer(
+ String systemName,
+ Config config,
+ String clientId,
+ KafkaSystemConsumerMetrics metrics,
+ Clock clock) {
+
+ // extract consumer configs and create kafka consumer
+ KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
+
+ return new NewKafkaSystemConsumer(kafkaConsumer,
+ systemName,
+ config,
+ clientId,
+ metrics,
+ clock);
+ }
+
+ /**
+ * create kafka consumer
+ * @param systemName
+ * @param clientId
+ * @param config
+ * @return kafka consumer
+ */
+ private static <K, V> KafkaConsumer<K, V> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
Map<String, String> injectProps = new HashMap<>();
- injectProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
- injectProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ // the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
+ // default to byte[]
+ if ( !config.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
+ LOG.info("default key serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
+ injectProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ }
+ if ( !config.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
+ LOG.info("default value serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
+ injectProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ }
+
+ // extract kafka consumer configs
KafkaConsumerConfig consumerConfig =
KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
LOG.info("==============>Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}", systemName, consumerConfig.originals());
- /*
- Map<String, Object> kafkaConsumerConfig = consumerConfig.originals().entrySet().stream()
- .collect(Collectors.toMap((kv)->kv.getKey(), (kv)->(Object)kv.getValue()));
-*/
-
- return new KafkaConsumer<byte[], byte[]>(consumerConfig.originals());
- }
- /**
- * return system name for this consumer
- * @return system name
- */
- public String getSystemName() {
- return systemName;
+ return new KafkaConsumer<>(consumerConfig.originals());
}
@Override
@@ -156,7 +172,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
LOG.warn("attempting to start a stopped consumer");
return;
}
-LOG.info("==============>About to start consumer");
+ LOG.info("==============>About to start consumer");
// initialize the subscriptions for all the registered TopicPartitions
startSubscription();
LOG.info("==============>subscription started");
@@ -193,7 +209,7 @@ LOG.info("==============>About to start consumer");
// create the thread with the consumer
proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink,
- samzaConsumerMetrics, metricName, valueUnwrapper);
+ samzaConsumerMetrics, metricName);
LOG.info("==============>Created consumer proxy: " + proxy);
}
@@ -363,6 +379,23 @@ LOG.info("==============>About to start consumer");
return new SystemStreamPartition(systemName, tp.topic(), new Partition(tp.partition()));
}
+ /**
+ * return system name for this consumer
+ * @return system name
+ */
+ public String getSystemName() {
+ return systemName;
+ }
+
+ private static Set<SystemStream> getIntermediateStreams(Config config) {
+ StreamConfig streamConfig = new StreamConfig(config);
+ Collection<String> streamIds = JavaConversions.asJavaCollection(streamConfig.getStreamIds());
+ return streamIds.stream()
+ .filter(streamConfig::getIsIntermediateStream)
+ .map(id -> streamConfig.streamIdToSystemStream(id))
+ .collect(Collectors.toSet());
+ }
+
////////////////////////////////////
// inner class for the message sink
////////////////////////////////////
[41/47] samza git commit: make private kakfaconsumerProxy
Posted by bo...@apache.org.
make private kakfaconsumerProxy
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/053fe3bb
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/053fe3bb
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/053fe3bb
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 053fe3bb3b8914b1f8895abe2be2cf00943395c7
Parents: f81cf14
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 11 11:36:11 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 11 11:36:11 2018 -0700
----------------------------------------------------------------------
.../scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/053fe3bb/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 92f9183..4b99fcc 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -74,7 +74,7 @@ public class KafkaConsumerProxy<K, V> {
private volatile Throwable failureCause = null;
private final CountDownLatch consumerPollThreadStartLatch = new CountDownLatch(1);
- public KafkaConsumerProxy(Consumer<K, V> kafkaConsumer, String systemName, String clientId,
+ /* package private */KafkaConsumerProxy(Consumer<K, V> kafkaConsumer, String systemName, String clientId,
KafkaSystemConsumer.KafkaConsumerMessageSink messageSink, KafkaSystemConsumerMetrics samzaConsumerMetrics,
String metricName) {
[09/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/dd39d089
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/dd39d089
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/dd39d089
Branch: refs/heads/NewKafkaSystemConsumer
Commit: dd39d089437905abb93aa1074474b28a18292a54
Parents: 1ad58d4 7a2e192
Author: Boris S <bo...@apache.org>
Authored: Wed Nov 22 11:46:37 2017 -0800
Committer: Boris S <bo...@apache.org>
Committed: Wed Nov 22 11:46:37 2017 -0800
----------------------------------------------------------------------
.gitignore | 3 +-
NOTICE | 2 -
build.gradle | 35 +-
.../versioned/jobs/configuration-table.html | 14 +-
gradle/dependency-versions.gradle | 4 +-
.../samza/checkpoint/CheckpointManager.java | 6 +-
.../samza/container/SamzaContainerContext.java | 7 +-
.../operators/functions/WatermarkFunction.java | 7 +-
.../org/apache/samza/sql/udfs/ScalarUdf.java | 48 ++
.../org/apache/samza/system/ControlMessage.java | 31 +
.../apache/samza/system/WatermarkMessage.java | 26 +
.../azure/AzureCheckpointManager.java | 236 +++++++
.../azure/AzureCheckpointManagerFactory.java | 33 +
.../checkpoint/azure/TaskCheckpointEntity.java | 43 ++
.../org/apache/samza/config/AzureConfig.java | 2 +-
.../coordinator/AzureCoordinationUtils.java | 2 +-
.../samza/coordinator/AzureJobCoordinator.java | 2 +-
.../producer/EventHubSystemProducer.java | 2 +-
.../azure/ITestAzureCheckpointManager.java | 181 +++++
.../consumer/TestEventHubSystemConsumer.java | 56 +-
.../producer/SwapFirstLastByteInterceptor.java | 36 +
.../producer/TestEventHubSystemProducer.java | 57 ++
.../apache/samza/PartitionChangeException.java | 31 +
.../ClusterBasedJobCoordinator.java | 108 ++-
.../clustermanager/ContainerProcessManager.java | 26 +-
.../clustermanager/SamzaApplicationState.java | 7 +-
.../org/apache/samza/config/TaskConfigJava.java | 2 +-
.../StreamPartitionCountMonitor.java | 116 ++--
.../org/apache/samza/execution/StreamEdge.java | 1 +
.../apache/samza/operators/StreamGraphImpl.java | 7 +
.../samza/operators/impl/OperatorImpl.java | 68 +-
.../operators/impl/PartitionByOperatorImpl.java | 7 +-
.../operators/impl/WindowOperatorImpl.java | 13 +
.../samza/runtime/LocalApplicationRunner.java | 2 +-
.../apache/samza/storage/StorageRecovery.java | 2 +-
.../samza/checkpoint/CheckpointTool.scala | 15 +-
.../org/apache/samza/config/JobConfig.scala | 2 -
.../org/apache/samza/config/StorageConfig.scala | 8 +
.../org/apache/samza/config/StreamConfig.scala | 30 +-
.../org/apache/samza/config/TaskConfig.scala | 5 +
.../apache/samza/container/SamzaContainer.scala | 2 +-
.../samza/coordinator/JobModelManager.scala | 49 +-
.../samza/storage/TaskStorageManager.scala | 54 +-
.../main/scala/org/apache/samza/util/Util.scala | 1 -
.../MockClusterResourceManagerFactory.java | 32 +
.../clustermanager/MockContainerListener.java | 1 +
.../TestClusterBasedJobCoordinator.java | 108 +++
.../clustermanager/TestContainerAllocator.java | 12 +-
.../TestContainerProcessManager.java | 22 +-
.../TestHostAwareContainerAllocator.java | 16 +-
.../apache/samza/config/TestTaskConfigJava.java | 20 +
.../coordinator/JobModelManagerTestUtil.java | 4 +-
.../samza/coordinator/TestJobModelManager.java | 24 +-
.../apache/samza/execution/TestStreamEdge.java | 1 +
.../samza/operators/TestStreamGraphImpl.java | 38 ++
.../samza/operators/TestWindowOperator.java | 565 ----------------
.../operators/impl/TestOperatorImplGraph.java | 10 +-
.../operators/impl/TestWindowOperator.java | 677 +++++++++++++++++++
.../samza/storage/MockSystemConsumer.java | 59 --
.../apache/samza/storage/MockSystemFactory.java | 45 --
.../samza/storage/TestStorageRecovery.java | 37 +-
.../apache/samza/system/MockSystemFactory.java | 181 +++++
.../zk/TestZkBarrierForVersionUpgrade.java | 23 +-
.../samza/checkpoint/TestCheckpointTool.scala | 23 +-
.../samza/container/TestSamzaContainer.scala | 13 +-
.../samza/container/TestTaskInstance.scala | 10 +-
.../samza/coordinator/TestJobCoordinator.scala | 65 +-
.../TestStreamPartitionCountMonitor.scala | 82 ++-
.../processor/StreamProcessorTestUtils.scala | 3 +-
.../samza/serializers/TestCheckpointSerde.scala | 8 +
.../samza/storage/TestTaskStorageManager.scala | 210 ++++--
.../TestRangeSystemStreamPartitionMatcher.scala | 1 -
.../TestRegexSystemStreamPartitionMatcher.scala | 1 -
.../scala/org/apache/samza/util/TestUtil.scala | 16 +
.../checkpoint/kafka/KafkaCheckpointLogKey.java | 110 +++
.../kafka/KafkaCheckpointLogKeySerde.java | 68 ++
.../samza/system/kafka/KafkaStreamSpec.java | 4 +
.../kafka/KafkaCheckpointLogKey.scala | 171 -----
.../kafka/KafkaCheckpointManager.scala | 385 ++++++-----
.../kafka/KafkaCheckpointManagerFactory.scala | 81 +--
.../system/kafka/KafkaSystemConsumer.scala | 8 +-
.../kafka/KafkaSystemConsumerMetrics.scala | 2 -
.../kafka/TestKafkaCheckpointLogKeySerde.java | 53 ++
.../kafka/TestKafkaCheckpointManagerJava.java | 247 +++++++
.../kafka/TeskKafkaCheckpointLogKey.scala | 61 --
.../kafka/TestKafkaCheckpointManager.scala | 388 ++++-------
.../system/kafka/TestKafkaSystemAdmin.scala | 2 +-
.../samza/storage/kv/RocksDbKeyValueReader.java | 3 +-
.../apache/samza/monitor/LocalStoreMonitor.java | 1 +
.../samza/monitor/LocalStoreMonitorMetrics.java | 4 +
.../apache/samza/sql/avro/AvroRelConverter.java | 183 +++++
.../samza/sql/avro/AvroRelConverterFactory.java | 44 ++
.../samza/sql/avro/AvroRelSchemaProvider.java | 28 +
.../samza/sql/avro/AvroTypeFactoryImpl.java | 132 ++++
...ConfigBasedAvroRelSchemaProviderFactory.java | 63 ++
.../org/apache/samza/sql/data/Expression.java | 38 ++
.../samza/sql/data/RexToJavaCompiler.java | 224 ++++++
.../sql/data/SamzaSqlExecutionContext.java | 61 ++
.../samza/sql/data/SamzaSqlRelMessage.java | 123 ++++
.../org/apache/samza/sql/fn/FlattenUdf.java | 36 +
.../impl/ConfigBasedSourceResolverFactory.java | 71 ++
.../samza/sql/impl/ConfigBasedUdfResolver.java | 97 +++
.../samza/sql/interfaces/RelSchemaProvider.java | 36 +
.../interfaces/RelSchemaProviderFactory.java | 33 +
.../samza/sql/interfaces/SamzaRelConverter.java | 46 ++
.../interfaces/SamzaRelConverterFactory.java | 39 ++
.../samza/sql/interfaces/SourceResolver.java | 34 +
.../sql/interfaces/SourceResolverFactory.java | 36 +
.../sql/interfaces/SqlSystemStreamConfig.java | 74 ++
.../samza/sql/interfaces/UdfMetadata.java | 61 ++
.../samza/sql/interfaces/UdfResolver.java | 35 +
.../org/apache/samza/sql/planner/Checker.java | 93 +++
.../apache/samza/sql/planner/QueryPlanner.java | 153 +++++
.../sql/planner/SamzaSqlOperatorTable.java | 101 +++
.../sql/planner/SamzaSqlScalarFunctionImpl.java | 84 +++
.../sql/planner/SamzaSqlUdfOperatorTable.java | 62 ++
.../samza/sql/runner/SamzaSqlApplication.java | 56 ++
.../sql/runner/SamzaSqlApplicationConfig.java | 245 +++++++
.../sql/runner/SamzaSqlApplicationRunner.java | 133 ++++
.../apache/samza/sql/testutil/ConfigUtil.java | 62 ++
.../org/apache/samza/sql/testutil/JsonUtil.java | 91 +++
.../samza/sql/testutil/ReflectionUtils.java | 62 ++
.../samza/sql/testutil/SamzaSqlQueryParser.java | 188 +++++
.../samza/sql/testutil/SqlFileParser.java | 103 +++
.../samza/sql/translator/FilterTranslator.java | 62 ++
.../samza/sql/translator/ProjectTranslator.java | 108 +++
.../samza/sql/translator/QueryTranslator.java | 96 +++
.../samza/sql/translator/ScanTranslator.java | 70 ++
.../samza/sql/translator/TranslatorContext.java | 162 +++++
.../apache/samza/sql/TestQueryTranslator.java | 103 +++
.../sql/TestSamzaSqlApplicationConfig.java | 92 +++
.../samza/sql/TestSamzaSqlFileParser.java | 58 ++
.../samza/sql/TestSamzaSqlQueryParser.java | 70 ++
.../samza/sql/TestSamzaSqlRelMessage.java | 46 ++
.../samza/sql/avro/TestAvroRelConversion.java | 239 +++++++
.../samza/sql/avro/schemas/ComplexRecord.avsc | 143 ++++
.../samza/sql/avro/schemas/ComplexRecord.java | 92 +++
.../apache/samza/sql/avro/schemas/MyFixed.java | 29 +
.../samza/sql/avro/schemas/SimpleRecord.avsc | 39 ++
.../samza/sql/avro/schemas/SimpleRecord.java | 52 ++
.../samza/sql/avro/schemas/SubRecord.java | 53 ++
.../samza/sql/avro/schemas/TestEnumType.java | 31 +
.../samza/sql/e2e/TestSamzaSqlEndToEnd.java | 137 ++++
.../sql/system/ConsoleLoggingSystemFactory.java | 83 +++
.../samza/sql/system/SimpleSystemAdmin.java | 61 ++
.../samza/sql/system/TestAvroSystemFactory.java | 156 +++++
.../samza/sql/testutil/MyTestArrayUdf.java | 37 +
.../apache/samza/sql/testutil/MyTestUdf.java | 45 ++
.../samza/sql/testutil/SamzaSqlTestConfig.java | 103 +++
samza-sql/src/test/resources/log4j.xml | 43 ++
.../performance/TestKeyValuePerformance.scala | 2 +-
.../samza/processor/TestZkStreamProcessor.java | 11 +-
.../processor/TestZkStreamProcessorBase.java | 3 +-
.../TestZkStreamProcessorFailures.java | 8 +-
.../processor/TestZkStreamProcessorSession.java | 5 +-
.../operator/TestRepartitionJoinWindowApp.java | 13 +-
.../processor/TestZkLocalApplicationRunner.java | 15 +-
settings.gradle | 5 +-
158 files changed, 8640 insertions(+), 1843 deletions(-)
----------------------------------------------------------------------
[33/47] samza git commit: cleanup names
Posted by bo...@apache.org.
cleanup names
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/f14d6081
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/f14d6081
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/f14d6081
Branch: refs/heads/NewKafkaSystemConsumer
Commit: f14d6081f25f1738d0a31c9d2798f8bdd52a7c75
Parents: ceb0f6a
Author: Boris S <bo...@apache.org>
Authored: Wed Sep 5 14:26:28 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Sep 5 14:26:28 2018 -0700
----------------------------------------------------------------------
.../samza/system/kafka/KafkaConsumerProxy.java | 16 ++++++++--------
.../samza/system/kafka/NewKafkaSystemConsumer.java | 6 +++---
2 files changed, 11 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/f14d6081/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 7232a0a..5c79017 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -48,7 +48,7 @@ import org.slf4j.LoggerFactory;
/**
* Separate thread that reads messages from kafka and puts them into the BlockingEnvelopeMap.
- * This class is not thread safe. There will be only one instance of this class per LiKafkaSystemConsumer object.
+ * This class is not thread safe. There will be only one instance of this class per KafkaSystemConsumer object.
* We still need some synchronization around kafkaConsumer. See pollConsumer() method for details.
*/
public class KafkaConsumerProxy<K, V> {
@@ -108,7 +108,7 @@ public class KafkaConsumerProxy<K, V> {
}
}
} else {
- LOG.debug("Tried to start an already started LiKafkaConsumerProxy (%s). Ignoring.", this.toString());
+ LOG.debug("Tried to start an already started KafkaConsumerProxy (%s). Ignoring.", this.toString());
}
}
@@ -146,14 +146,14 @@ public class KafkaConsumerProxy<K, V> {
}
System.out.println("THREAD: finished " + consumerPollThread.getName());
} catch (Throwable throwable) {
- LOG.error(String.format("Error in LiKafkaConsumerProxy poll thread for system: %s.", systemName), throwable);
- // SamzaLiKafkaSystemConsumer uses the failureCause to propagate the throwable to the container
+ LOG.error(String.format("Error in KafkaConsumerProxy poll thread for system: %s.", systemName), throwable);
+ // SamzaKafkaSystemConsumer uses the failureCause to propagate the throwable to the container
failureCause = throwable;
isRunning = false;
}
if (!isRunning) {
- LOG.info("Stopping the LiKafkaConsumerProxy poll thread for system: {}.", systemName);
+ LOG.info("Stopping the KafkaConsumerProxy poll thread for system: {}.", systemName);
}
};
}
@@ -318,7 +318,7 @@ public class KafkaConsumerProxy<K, V> {
}
/*
- The only way to figure out lag for the LiKafkaConsumer is to look at the metrics after each poll() call.
+ The only way to figure out lag for the KafkaConsumer is to look at the metrics after each poll() call.
One of the metrics (records-lag) shows how far behind the HighWatermark the consumer is.
This method populates the lag information for each SSP into latestLags member variable.
*/
@@ -335,7 +335,7 @@ public class KafkaConsumerProxy<K, V> {
MetricName mn = ssp2MetricName.get(ssp);
Metric currentLagM = consumerMetrics.get(mn);
- // In linkedin-kafka-client 5.*, high watermark is fixed to be the offset of last available message,
+ // High watermark is fixed to be the offset of last available message,
// so the lag is now at least 0, which is the same as Samza's definition.
// If the lag is not 0, then isAtHead is not true, and kafkaClient keeps polling.
long currentLag = (currentLagM != null) ? (long) currentLagM.value() : -1L;
@@ -433,7 +433,7 @@ public class KafkaConsumerProxy<K, V> {
}
public void stop(long timeout) {
- System.out.println("THREAD: Shutting down LiKafkaConsumerProxy poll thread:" + consumerPollThread.getName());
+ System.out.println("THREAD: Shutting down KafkaConsumerProxy poll thread:" + consumerPollThread.getName());
isRunning = false;
try {
http://git-wip-us.apache.org/repos/asf/samza/blob/f14d6081/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index 717b45d..afec8ad 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -194,7 +194,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
try {
synchronized (kafkaConsumer) {
// TODO in the future we may need to add special handling here for BEGIN/END_OFFSET
- // this will call liKafkaConsumer.seekToBegin/End()
+ // this will call KafkaConsumer.seekToBegin/End()
kafkaConsumer.seek(tp, startingOffset); // this value should already be the 'upcoming' value
}
} catch (Exception e) {
@@ -274,7 +274,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
kafkaConsumer.close();
}
} catch (Exception e) {
- LOG.warn("failed to stop SamzaRawLiKafkaConsumer + " + this, e);
+ LOG.warn("failed to stop SamzaRawKafkaConsumer + " + this, e);
}
}
@@ -340,7 +340,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
throw new SamzaException(message, proxy.getFailureCause());
} else {
LOG.warn("Failure cause is not populated for KafkaConsumerProxy");
- throw new SamzaException("LiKafkaConsumerProxy has stopped");
+ throw new SamzaException("KafkaConsumerProxy has stopped");
}
}
[46/47] samza git commit: rename of a var
Posted by bo...@apache.org.
rename of a var
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/74b6cfab
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/74b6cfab
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/74b6cfab
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 74b6cfabdbb5112488965c2fc3629156e0ff8c4c
Parents: ed0648d
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 18 14:17:58 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 18 14:17:58 2018 -0700
----------------------------------------------------------------------
.../apache/samza/system/kafka/KafkaConsumerProxy.java | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/74b6cfab/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 6fc6491..b67df0a 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -367,20 +367,20 @@ import org.slf4j.LoggerFactory;
Using the consumer to poll the messages from the stream.
*/
private void fetchMessages() {
- Set<SystemStreamPartition> SSPsToFetch = new HashSet<>();
+ Set<SystemStreamPartition> sspsToFetch = new HashSet<>();
for (SystemStreamPartition ssp : nextOffsets.keySet()) {
if (sink.needsMoreMessages(ssp)) {
- SSPsToFetch.add(ssp);
+ sspsToFetch.add(ssp);
}
}
- LOG.debug("pollConsumer {}", SSPsToFetch.size());
- if (!SSPsToFetch.isEmpty()) {
+ LOG.debug("pollConsumer {}", sspsToFetch.size());
+ if (!sspsToFetch.isEmpty()) {
kafkaConsumerMetrics.incClientReads(metricName);
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> response;
- LOG.debug("pollConsumer from following SSPs: {}; total#={}", SSPsToFetch, SSPsToFetch.size());
+ LOG.debug("pollConsumer from following SSPs: {}; total#={}", sspsToFetch, sspsToFetch.size());
- response = pollConsumer(SSPsToFetch, 500); // TODO should be default value from ConsumerConfig
+ response = pollConsumer(sspsToFetch, 500); // TODO should be default value from ConsumerConfig
// move the responses into the queue
for (Map.Entry<SystemStreamPartition, List<IncomingMessageEnvelope>> e : response.entrySet()) {
@@ -390,7 +390,7 @@ import org.slf4j.LoggerFactory;
}
}
- populateCurrentLags(SSPsToFetch); // find current lags for for each SSP
+ populateCurrentLags(sspsToFetch); // find current lags for for each SSP
} else { // nothing to read
LOG.debug("No topic/partitions need to be fetched for consumer {} right now. Sleeping {}ms.", kafkaConsumer,
[36/47] samza git commit: merge
Posted by bo...@apache.org.
merge
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/9217644e
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/9217644e
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/9217644e
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 9217644ec05cc9dfe0140b5ee488fcea2fed83b9
Parents: 0b6768f 728dc18
Author: Boris S <bo...@apache.org>
Authored: Fri Sep 7 16:00:02 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Sep 7 16:00:02 2018 -0700
----------------------------------------------------------------------
.../application/ApplicationDescriptor.java | 80 +++
.../samza/application/SamzaApplication.java | 40 ++
.../samza/application/StreamApplication.java | 75 +--
.../StreamApplicationDescriptor.java | 113 ++++
.../samza/application/TaskApplication.java | 86 +++
.../application/TaskApplicationDescriptor.java | 64 ++
.../java/org/apache/samza/config/Config.java | 3 +-
.../samza/metrics/MetricsReporterFactory.java | 5 +-
.../apache/samza/operators/MessageStream.java | 9 +-
.../org/apache/samza/operators/StreamGraph.java | 120 ----
.../operators/functions/ClosableFunction.java | 7 +-
.../operators/functions/InitableFunction.java | 6 +-
.../operators/functions/StreamExpander.java | 16 +-
.../apache/samza/runtime/ApplicationRunner.java | 92 +--
.../samza/runtime/ApplicationRunners.java | 82 +++
.../apache/samza/runtime/ProcessorContext.java | 31 +
.../runtime/ProcessorLifecycleListener.java | 55 ++
.../ProcessorLifecycleListenerFactory.java | 40 ++
.../samza/task/AsyncStreamTaskFactory.java | 10 +-
.../apache/samza/task/StreamTaskFactory.java | 6 +-
.../java/org/apache/samza/task/TaskFactory.java | 38 ++
.../samza/runtime/TestApplicationRunners.java | 88 +++
.../application/ApplicationDescriptorImpl.java | 179 ++++++
.../application/ApplicationDescriptorUtil.java | 51 ++
.../samza/application/ApplicationUtil.java | 63 ++
.../application/LegacyTaskApplication.java | 37 ++
.../StreamApplicationDescriptorImpl.java | 381 ++++++++++++
.../TaskApplicationDescriptorImpl.java | 129 ++++
.../samza/container/SamzaContainerListener.java | 22 +-
.../samza/execution/ExecutionPlanner.java | 7 +-
.../org/apache/samza/execution/JobGraph.java | 6 -
.../org/apache/samza/execution/JobPlanner.java | 188 ++++++
.../apache/samza/execution/LocalJobPlanner.java | 134 +++++
.../samza/execution/RemoteJobPlanner.java | 96 +++
.../samza/operators/MessageStreamImpl.java | 57 +-
.../samza/operators/OperatorSpecGraph.java | 26 +-
.../apache/samza/operators/StreamGraphSpec.java | 336 -----------
.../samza/operators/spec/OperatorSpec.java | 2 +-
.../stream/IntermediateMessageStreamImpl.java | 6 +-
.../apache/samza/processor/StreamProcessor.java | 122 ++--
.../StreamProcessorLifecycleListener.java | 49 --
.../runtime/AbstractApplicationRunner.java | 135 -----
.../samza/runtime/ApplicationRunnerMain.java | 42 +-
.../samza/runtime/LocalApplicationRunner.java | 355 ++++-------
.../samza/runtime/LocalContainerRunner.java | 56 +-
.../samza/runtime/RemoteApplicationRunner.java | 123 ++--
.../apache/samza/task/StreamOperatorTask.java | 5 +-
.../org/apache/samza/task/TaskFactoryUtil.java | 137 ++---
.../apache/samza/container/SamzaContainer.scala | 16 +-
.../scala/org/apache/samza/job/JobRunner.scala | 2 -
.../samza/job/local/ThreadJobFactory.scala | 49 +-
.../application/MockStreamApplication.java | 29 +
.../samza/application/TestApplicationUtil.java | 96 +++
.../TestStreamApplicationDescriptorImpl.java | 584 +++++++++++++++++++
.../TestTaskApplicationDescriptorImpl.java | 144 +++++
.../samza/execution/TestExecutionPlanner.java | 192 +++---
.../execution/TestJobGraphJsonGenerator.java | 120 ++--
.../org/apache/samza/execution/TestJobNode.java | 53 +-
.../samza/execution/TestLocalJobPlanner.java | 211 +++++++
.../samza/execution/TestRemoteJobPlanner.java | 88 +++
.../samza/operators/TestJoinOperator.java | 103 ++--
.../samza/operators/TestMessageStreamImpl.java | 29 +-
.../samza/operators/TestOperatorSpecGraph.java | 19 +-
.../samza/operators/TestStreamGraphSpec.java | 506 ----------------
.../operators/impl/TestOperatorImplGraph.java | 190 +++---
.../operators/impl/TestWindowOperator.java | 147 ++---
.../spec/TestPartitionByOperatorSpec.java | 70 ++-
.../samza/processor/TestStreamProcessor.java | 139 +++--
.../runtime/TestApplicationRunnerMain.java | 47 +-
.../runtime/TestLocalApplicationRunner.java | 311 +++-------
.../runtime/TestRemoteApplicationRunner.java | 35 +-
.../apache/samza/task/MockAsyncStreamTask.java | 31 +
.../org/apache/samza/task/MockStreamTask.java | 31 +
.../apache/samza/task/TestTaskFactoryUtil.java | 215 ++-----
.../samza/testUtils/TestAsyncStreamTask.java | 35 --
.../samza/testUtils/TestStreamApplication.java | 33 --
.../apache/samza/testUtils/TestStreamTask.java | 34 --
.../samza/container/TestSamzaContainer.scala | 76 ++-
.../samza/sql/runner/SamzaSqlApplication.java | 13 +-
.../sql/runner/SamzaSqlApplicationRunner.java | 53 +-
.../samza/sql/translator/JoinTranslator.java | 2 +-
.../samza/sql/translator/QueryTranslator.java | 27 +-
.../samza/sql/translator/ScanTranslator.java | 8 +-
.../samza/sql/translator/TranslatorContext.java | 19 +-
.../apache/samza/sql/e2e/TestSamzaSqlTable.java | 8 +-
.../runner/TestSamzaSqlApplicationRunner.java | 2 -
.../sql/translator/TestFilterTranslator.java | 6 +-
.../sql/translator/TestJoinTranslator.java | 16 +-
.../sql/translator/TestProjectTranslator.java | 14 +-
.../sql/translator/TestQueryTranslator.java | 162 +++--
.../example/AppWithGlobalConfigExample.java | 25 +-
.../apache/samza/example/BroadcastExample.java | 22 +-
.../samza/example/KeyValueStoreExample.java | 19 +-
.../org/apache/samza/example/MergeExample.java | 18 +-
.../samza/example/OrderShipmentJoinExample.java | 19 +-
.../samza/example/PageViewCounterExample.java | 15 +-
.../samza/example/RepartitionExample.java | 19 +-
.../samza/example/TaskApplicationExample.java | 77 +++
.../org/apache/samza/example/WindowExample.java | 18 +-
.../samza/system/mock/MockSystemConsumer.java | 4 +-
.../apache/samza/test/framework/TestRunner.java | 41 +-
.../integration/LocalApplicationRunnerMain.java | 21 +-
.../TestStandaloneIntegrationApplication.java | 9 +-
.../processor/TestZkStreamProcessorBase.java | 20 +-
.../EndOfStreamIntegrationTest.java | 37 +-
.../WatermarkIntegrationTest.java | 62 +-
.../test/framework/BroadcastAssertApp.java | 7 +-
.../StreamApplicationIntegrationTest.java | 9 +-
...StreamApplicationIntegrationTestHarness.java | 42 +-
.../samza/test/framework/TestTimerApp.java | 7 +-
.../apache/samza/test/framework/TimerTest.java | 18 +-
.../test/operator/RepartitionJoinWindowApp.java | 25 +-
.../test/operator/RepartitionWindowApp.java | 20 +-
.../samza/test/operator/SessionWindowApp.java | 17 +-
.../operator/TestRepartitionJoinWindowApp.java | 30 +-
.../test/operator/TestRepartitionWindowApp.java | 10 +-
.../samza/test/operator/TumblingWindowApp.java | 16 +-
.../test/processor/TestStreamApplication.java | 82 +--
.../test/processor/TestStreamProcessor.java | 18 +-
.../processor/TestZkLocalApplicationRunner.java | 317 +++++-----
.../apache/samza/test/table/TestLocalTable.java | 39 +-
.../table/TestLocalTableWithSideInputs.java | 13 +-
.../samza/test/table/TestRemoteTable.java | 27 +-
.../benchmark/SystemConsumerWithSamzaBench.java | 14 +-
124 files changed, 5280 insertions(+), 3632 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/9217644e/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/9217644e/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
----------------------------------------------------------------------
diff --cc samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
index 0d71303,abd7f65..bec4ec0
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
@@@ -19,17 -19,19 +19,18 @@@
package org.apache.samza.job.local
+ import org.apache.samza.application.{ApplicationDescriptorUtil, ApplicationUtil}
-import org.apache.samza.config.{Config, TaskConfigJava}
import org.apache.samza.config.JobConfig._
import org.apache.samza.config.ShellCommandConfig._
+import org.apache.samza.config.{Config, TaskConfigJava}
import org.apache.samza.container.{SamzaContainer, SamzaContainerListener, TaskName}
import org.apache.samza.coordinator.JobModelManager
import org.apache.samza.coordinator.stream.CoordinatorStreamManager
import org.apache.samza.job.{StreamJob, StreamJobFactory}
import org.apache.samza.metrics.{JmxServer, MetricsRegistryMap, MetricsReporter}
- import org.apache.samza.operators.StreamGraphSpec
+ import org.apache.samza.runtime.ProcessorContext
import org.apache.samza.storage.ChangelogStreamManager
-import org.apache.samza.task.TaskFactory
--import org.apache.samza.task.TaskFactoryUtil
++import org.apache.samza.task.{TaskFactory, TaskFactoryUtil}
import org.apache.samza.util.Logging
import scala.collection.JavaConversions._
@@@ -72,32 -72,36 +73,36 @@@ class ThreadJobFactory extends StreamJo
val containerId = "0"
val jmxServer = new JmxServer
- val streamApp = TaskFactoryUtil.createStreamApplication(config)
-
- val taskFactory = if (streamApp != null) {
- val graphSpec = new StreamGraphSpec(config)
- streamApp.init(graphSpec, config)
- TaskFactoryUtil.createTaskFactory(graphSpec.getOperatorSpecGraph(), graphSpec.getContextManager)
- } else {
- TaskFactoryUtil.createTaskFactory(config)
- }
+
+ val appDesc = ApplicationDescriptorUtil.getAppDescriptor(ApplicationUtil.fromConfig(config), config)
- val taskFactory : TaskFactory[_] = TaskFactoryUtil.getTaskFactory(appDesc)
++ val taskFactory: TaskFactory[_] = TaskFactoryUtil.getTaskFactory(appDesc)
// Give developers a nice friendly warning if they've specified task.opts and are using a threaded job.
config.getTaskOpts match {
- case Some(taskOpts) => warn("%s was specified in config, but is not being used because job is being executed with ThreadJob. You probably want to run %s=%s." format(TASK_JVM_OPTS, STREAM_JOB_FACTORY_CLASS, classOf[ProcessJobFactory].getName))
+ case Some(taskOpts) => warn("%s was specified in config, but is not being used because job is being executed with ThreadJob. " +
- "You probably want to run %s=%s." format (TASK_JVM_OPTS, STREAM_JOB_FACTORY_CLASS, classOf[ProcessJobFactory].getName))
++ "You probably want to run %s=%s." format(TASK_JVM_OPTS, STREAM_JOB_FACTORY_CLASS, classOf[ProcessJobFactory].getName))
case _ => None
}
- val containerListener = new SamzaContainerListener {
- override def onContainerFailed(t: Throwable): Unit = {
- error("Container failed.", t)
- throw t
- }
-
- override def onContainerStop(): Unit = {
- }
-
- override def onContainerStart(): Unit = {
+ val containerListener = {
- val processorLifecycleListener = appDesc.getProcessorLifecycleListenerFactory().createInstance(new ProcessorContext() { }, config)
++ val processorLifecycleListener = appDesc.getProcessorLifecycleListenerFactory().createInstance(new ProcessorContext() {}, config)
+ new SamzaContainerListener {
+ override def afterFailure(t: Throwable): Unit = {
+ processorLifecycleListener.afterFailure(t)
+ throw t
+ }
+
+ override def afterStart(): Unit = {
+ processorLifecycleListener.afterStart()
+ }
+
+ override def afterStop(): Unit = {
+ processorLifecycleListener.afterStop()
+ }
+
+ override def beforeStart(): Unit = {
+ processorLifecycleListener.beforeStart()
+ }
}
}
[17/47] samza git commit: test
Posted by bo...@apache.org.
test
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/4801709f
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/4801709f
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/4801709f
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 4801709f3d8d2b50a059abb830de23ffcdaffda5
Parents: 57fca52
Author: Boris S <bo...@apache.org>
Authored: Thu Aug 16 10:38:26 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Thu Aug 16 10:38:26 2018 -0700
----------------------------------------------------------------------
.../samza/checkpoint/kafka/TestKafkaCheckpointManager.scala | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/4801709f/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala b/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
index 8d92f4d..065170c 100644
--- a/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
+++ b/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
@@ -88,12 +88,12 @@ class TestKafkaCheckpointManager extends KafkaServerTestHarness {
zkClient.close
// read before topic exists should result in a null checkpoint
- val readCp = readCheckpoint(checkpointTopic, taskName)
- assertNull(readCp)
+ //val readCp = readCheckpoint(checkpointTopic, taskName)
+ //assertNull(readCp)
writeCheckpoint(checkpointTopic, taskName, checkpoint1)
assertEquals(checkpoint1, readCheckpoint(checkpointTopic, taskName))
-
+try {Thread.sleep(20000)} catch { case e:Exception =>() }
// writing a second message and reading it returns a more recent checkpoint
writeCheckpoint(checkpointTopic, taskName, checkpoint2)
assertEquals(checkpoint2, readCheckpoint(checkpointTopic, taskName))
@@ -194,6 +194,7 @@ class TestKafkaCheckpointManager extends KafkaServerTestHarness {
val systemFactory = Util.getObj(systemFactoryClassName, classOf[SystemFactory])
val spec = new KafkaStreamSpec("id", cpTopic, checkpointSystemName, 1, 1, props)
+ System.out.println("CONFIG:" + config)
new KafkaCheckpointManager(spec, systemFactory, failOnTopicValidation, config, new NoOpMetricsRegistry, serde)
}
[45/47] samza git commit: addressed comments
Posted by bo...@apache.org.
addressed comments
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/ed0648dc
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/ed0648dc
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/ed0648dc
Branch: refs/heads/NewKafkaSystemConsumer
Commit: ed0648dca2b2a902875073861a433238d84ce68f
Parents: 5120740
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 18 13:12:14 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 18 13:12:14 2018 -0700
----------------------------------------------------------------------
.../org/apache/samza/system/kafka/KafkaConsumerProxy.java | 2 +-
.../org/apache/samza/system/kafka/KafkaSystemFactory.scala | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/ed0648dc/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 83e7a58..6fc6491 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -51,7 +51,7 @@ import org.slf4j.LoggerFactory;
* This class is not thread safe. There will be only one instance of this class per KafkaSystemConsumer object.
* We still need some synchronization around kafkaConsumer. See pollConsumer() method for details.
*/
-public class KafkaConsumerProxy<K, V> {
+/*package private */class KafkaConsumerProxy<K, V> {
private static final Logger LOG = LoggerFactory.getLogger(KafkaConsumerProxy.class);
private static final int SLEEP_MS_WHILE_NO_TOPIC_PARTITION = 100;
http://git-wip-us.apache.org/repos/asf/samza/blob/ed0648dc/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index 9f92583..5342b08 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -53,10 +53,10 @@ class KafkaSystemFactory extends SystemFactory with Logging {
val kafkaConsumer = KafkaSystemConsumer.getKafkaConsumerImpl(systemName, clientId, config)
info("Created kafka consumer for system %s, clientId %s: %s" format (systemName, clientId, kafkaConsumer))
- val kc = new KafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, new SystemClock)
- info("Created samza system consumer %s" format (kc.toString))
+ val kafkaSystemConsumer = new KafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, new SystemClock)
+ info("Created samza system consumer %s" format (kafkaSystemConsumer.toString))
- kc
+ kafkaSystemConsumer
}
def getProducer(systemName: String, config: Config, registry: MetricsRegistry): SystemProducer = {
[02/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/410ce78b
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/410ce78b
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/410ce78b
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 410ce78ba1ff8dafa2587481473e62ac9cfa6f4f
Parents: a31a7aa 343712e
Author: Boris S <bo...@apache.org>
Authored: Mon Oct 16 18:20:04 2017 -0700
Committer: Boris S <bo...@apache.org>
Committed: Mon Oct 16 18:20:04 2017 -0700
----------------------------------------------------------------------
.../versioned/jobs/configuration-table.html | 8 +++++
.../samza/storage/kv/RocksDbKeyValueStore.scala | 32 ++++++++++++-----
.../storage/kv/TestRocksDbKeyValueStore.scala | 38 ++++++++++++++++++--
3 files changed, 66 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
[19/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/7f7b5594
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/7f7b5594
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/7f7b5594
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 7f7b5594c21e9e3ebf25d243cbf078dbf6201a0f
Parents: 57fca52 a9ff093
Author: Boris S <bo...@apache.org>
Authored: Wed Aug 22 11:31:26 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Aug 22 11:31:26 2018 -0700
----------------------------------------------------------------------
.../samza/execution/ExecutionPlanner.java | 20 +++++---
.../runtime/AbstractApplicationRunner.java | 20 +++++---
.../samza/runtime/LocalApplicationRunner.java | 41 ++++++++--------
.../samza/runtime/RemoteApplicationRunner.java | 36 +++++++-------
.../org/apache/samza/config/MetricsConfig.scala | 11 +++--
.../diagnostics/DiagnosticsExceptionEvent.java | 6 +--
.../scala/org/apache/samza/job/JobRunner.scala | 2 +-
.../reporter/MetricsSnapshotReporter.scala | 35 +++++++-------
.../MetricsSnapshotReporterFactory.scala | 6 +--
.../runtime/TestLocalApplicationRunner.java | 50 ++++++++++++--------
10 files changed, 129 insertions(+), 98 deletions(-)
----------------------------------------------------------------------
[10/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/67e611ee
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/67e611ee
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/67e611ee
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 67e611ee47f0747b81c0de5d061f9888145b605a
Parents: dd39d08 b00ebd2
Author: Boris S <bo...@apache.org>
Authored: Wed Jan 10 09:50:40 2018 -0800
Committer: Boris S <bo...@apache.org>
Committed: Wed Jan 10 09:50:40 2018 -0800
----------------------------------------------------------------------
RELEASE.md | 4 +-
bin/merge-pull-request.py | 28 +-
build.gradle | 96 +++++-
docs/README.md | 4 +-
docs/_config.yml | 4 +-
docs/_docs/replace-versioned.sh | 3 +
docs/_layouts/default.html | 1 +
docs/archive/index.html | 8 +
.../documentation/hadoop/multi_stage_batch.png | Bin 0 -> 65364 bytes
.../hadoop/unified_batch_streaming.png | Bin 0 -> 17371 bytes
.../azure/eventhub_send_methods.png | Bin 0 -> 52427 bytes
.../documentation/versioned/aws/kinesis.md | 104 +++++++
.../documentation/versioned/azure/eventhubs.md | 197 ++++++++++++
.../versioned/container/metrics-table.html | 12 +
.../documentation/versioned/hadoop/consumer.md | 110 +++++++
.../documentation/versioned/hadoop/overview.md | 46 +++
.../documentation/versioned/hadoop/producer.md | 68 ++++
.../documentation/versioned/hdfs/consumer.md | 110 -------
.../documentation/versioned/hdfs/producer.md | 70 -----
docs/learn/documentation/versioned/index.html | 27 +-
.../versioned/jobs/configuration-table.html | 128 ++++++++
.../versioned/yarn/yarn-host-affinity.md | 2 +-
.../versioned/yarn/yarn-security.md | 1 -
.../versioned/deploy-samza-job-from-hdfs.md | 2 +-
.../tutorials/versioned/deploy-samza-to-CDH.md | 4 +-
.../versioned/hello-samza-high-level-code.md | 87 +++---
.../versioned/hello-samza-high-level-yarn.md | 2 +-
.../versioned/hello-samza-high-level-zk.md | 2 +-
docs/learn/tutorials/versioned/index.md | 3 +
.../versioned/remote-debugging-samza.md | 2 +-
.../versioned/run-in-multi-node-yarn.md | 4 +-
.../versioned/samza-rest-getting-started.md | 2 +-
docs/learn/tutorials/versioned/samza-sql.md | 123 ++++++++
docs/learn/tutorials/versioned/samza-tools.md | 109 +++++++
docs/startup/download/index.md | 24 +-
docs/startup/hello-samza/versioned/index.md | 2 +-
docs/startup/preview/index.md | 135 ++++++--
.../startup/releases/versioned/release-notes.md | 82 +++++
gradle.properties | 2 +-
gradle/dependency-versions.gradle | 1 +
.../java/org/apache/samza/operators/KV.java | 2 +-
.../apache/samza/operators/MessageStream.java | 69 ++++-
.../org/apache/samza/operators/StreamGraph.java | 48 ++-
.../apache/samza/operators/TableDescriptor.java | 73 +++++
.../functions/StreamTableJoinFunction.java | 59 ++++
.../org/apache/samza/serializers/KVSerde.java | 8 +-
.../table/LocalStoreBackedTableProvider.java | 37 +++
.../org/apache/samza/table/ReadWriteTable.java | 75 +++++
.../org/apache/samza/table/ReadableTable.java | 61 ++++
.../main/java/org/apache/samza/table/Table.java | 31 ++
.../org/apache/samza/table/TableProvider.java | 57 ++++
.../samza/table/TableProviderFactory.java | 35 +++
.../java/org/apache/samza/table/TableSpec.java | 125 ++++++++
.../java/org/apache/samza/task/TaskContext.java | 6 +-
.../kinesis/KinesisAWSCredentialsProvider.java | 69 +++++
.../samza/system/kinesis/KinesisConfig.java | 287 +++++++++++++++++
.../system/kinesis/KinesisSystemAdmin.java | 124 ++++++++
.../system/kinesis/KinesisSystemFactory.java | 87 ++++++
.../KinesisIncomingMessageEnvelope.java | 62 ++++
.../consumer/KinesisRecordProcessor.java | 208 +++++++++++++
.../KinesisRecordProcessorListener.java | 51 +++
.../kinesis/consumer/KinesisSystemConsumer.java | 256 +++++++++++++++
.../consumer/KinesisSystemConsumerOffset.java | 107 +++++++
.../consumer/NoAvailablePartitionException.java | 38 +++
.../system/kinesis/consumer/SSPAllocator.java | 73 +++++
.../metrics/KinesisSystemConsumerMetrics.java | 106 +++++++
.../system/kinesis/metrics/SamzaHistogram.java | 63 ++++
.../TestKinesisAWSCredentialsProvider.java | 60 ++++
.../samza/system/kinesis/TestKinesisConfig.java | 132 ++++++++
.../kinesis/TestKinesisSystemFactory.java | 115 +++++++
.../consumer/TestKinesisRecordProcessor.java | 301 ++++++++++++++++++
.../consumer/TestKinesisSystemConsumer.java | 270 ++++++++++++++++
.../TestKinesisSystemConsumerOffset.java | 48 +++
.../kinesis/consumer/TestSSPAllocator.java | 127 ++++++++
.../azure/AzureCheckpointManager.java | 31 +-
.../azure/AzureCheckpointManagerFactory.java | 4 +-
.../samza/system/eventhub/EventHubConfig.java | 71 ++++-
.../eventhub/SamzaEventHubClientManager.java | 4 +-
.../eventhub/admin/EventHubSystemAdmin.java | 17 +-
.../consumer/EventHubSystemConsumer.java | 77 +++--
.../producer/EventHubSystemProducer.java | 103 +++---
.../azure/ITestAzureCheckpointManager.java | 2 +-
.../MockEventHubClientManagerFactory.java | 18 ++
.../eventhub/admin/TestEventHubSystemAdmin.java | 15 +-
.../consumer/ITestEventHubSystemConsumer.java | 5 +-
.../consumer/TestEventHubSystemConsumer.java | 19 +-
.../producer/ITestEventHubSystemProducer.java | 2 +
.../producer/TestEventHubSystemProducer.java | 34 +-
.../AbstractContainerAllocator.java | 17 +-
.../clustermanager/ClusterResourceManager.java | 26 +-
.../clustermanager/ContainerProcessManager.java | 74 ++++-
.../clustermanager/SamzaApplicationState.java | 8 +-
.../samza/clustermanager/SamzaResource.java | 8 +
.../apache/samza/config/JavaStorageConfig.java | 5 +-
.../apache/samza/config/JavaTableConfig.java | 87 ++++++
.../container/ContainerHeartbeatClient.java | 20 +-
.../container/ContainerHeartbeatMonitor.java | 11 +-
.../apache/samza/container/TaskContextImpl.java | 24 +-
.../AllSspToSingleTaskGrouperFactory.java | 50 ++-
.../samza/execution/ExecutionPlanner.java | 5 +
.../org/apache/samza/execution/JobGraph.java | 16 +
.../samza/execution/JobGraphJsonGenerator.java | 60 +++-
.../org/apache/samza/execution/JobNode.java | 54 +++-
.../samza/operators/BaseTableDescriptor.java | 94 ++++++
.../samza/operators/MessageStreamImpl.java | 35 ++-
.../apache/samza/operators/StreamGraphImpl.java | 46 ++-
.../org/apache/samza/operators/TableImpl.java | 40 +++
.../samza/operators/impl/InputOperatorImpl.java | 2 +-
.../samza/operators/impl/OperatorImpl.java | 16 +-
.../samza/operators/impl/OperatorImplGraph.java | 44 ++-
.../operators/impl/OutputOperatorImpl.java | 2 +-
.../operators/impl/PartitionByOperatorImpl.java | 7 +-
.../operators/impl/SendToTableOperatorImpl.java | 71 +++++
.../impl/StreamTableJoinOperatorImpl.java | 82 +++++
.../samza/operators/spec/InputOperatorSpec.java | 10 +-
.../samza/operators/spec/OperatorSpec.java | 12 +-
.../samza/operators/spec/OperatorSpecs.java | 44 ++-
.../samza/operators/spec/OutputStreamImpl.java | 10 +-
.../operators/spec/SendToTableOperatorSpec.java | 65 ++++
.../spec/StreamTableJoinOperatorSpec.java | 67 ++++
.../stream/IntermediateMessageStreamImpl.java | 13 +
.../samza/runtime/LocalContainerRunner.java | 9 +-
.../samza/runtime/RemoteApplicationRunner.java | 7 +-
.../standalone/PassthroughJobCoordinator.java | 6 +-
.../org/apache/samza/table/TableManager.java | 153 +++++++++
.../org/apache/samza/zk/ZkJobCoordinator.java | 30 +-
.../main/java/org/apache/samza/zk/ZkUtils.java | 2 +-
.../org/apache/samza/config/JobConfig.scala | 1 +
.../apache/samza/container/SamzaContainer.scala | 75 ++---
.../apache/samza/container/TaskInstance.scala | 40 ++-
.../samza/coordinator/JobModelManager.scala | 23 +-
.../org/apache/samza/metrics/JvmMetrics.scala | 28 +-
.../MockClusterResourceManager.java | 19 +-
.../MockClusterResourceManagerCallback.java | 10 +
.../clustermanager/TestContainerAllocator.java | 55 ----
.../TestContainerProcessManager.java | 101 +++---
.../TestHostAwareContainerAllocator.java | 56 ----
.../samza/config/TestJavaStorageConfig.java | 13 +
.../samza/config/TestJavaTableConfig.java | 58 ++++
.../stream/TestAllSspToSingleTaskGrouper.java | 125 ++++++++
.../execution/TestJobGraphJsonGenerator.java | 75 +++++
.../samza/operators/TestMessageStreamImpl.java | 70 ++++-
.../samza/operators/TestStreamGraphImpl.java | 25 +-
.../samza/operators/impl/TestOperatorImpl.java | 1 +
.../impl/TestStreamTableJoinOperatorImpl.java | 101 ++++++
.../apache/samza/table/TestTableManager.java | 176 +++++++++++
.../org/apache/samza/task/TestAsyncRunLoop.java | 14 +-
.../kafka/KafkaCheckpointManager.scala | 2 +-
.../system/kafka/KafkaSystemProducer.scala | 135 ++++----
.../kafka/TestKafkaCheckpointManager.scala | 1 -
.../system/kafka/TestKafkaSystemProducer.scala | 39 ++-
.../kv/inmemory/InMemoryTableDescriptor.java | 59 ++++
.../kv/inmemory/InMemoryTableProvider.java | 65 ++++
.../inmemory/InMemoryTableProviderFactory.java | 33 ++
.../inmemory/TestInMemoryTableDescriptor.java | 48 +++
.../kv/inmemory/TestInMemoryTableProvider.java | 65 ++++
.../storage/kv/RocksDbTableDescriptor.java | 232 ++++++++++++++
.../samza/storage/kv/RocksDbTableProvider.java | 64 ++++
.../storage/kv/RocksDbTableProviderFactory.java | 31 ++
.../samza/storage/kv/RocksDbKeyValueStore.scala | 12 +-
.../storage/kv/TestRocksDbTableDescriptor.java | 87 ++++++
.../storage/kv/TestRocksDbTableProvider.java | 66 ++++
.../kv/BaseLocalStoreBackedTableDescriptor.java | 56 ++++
.../kv/BaseLocalStoreBackedTableProvider.java | 92 ++++++
.../kv/LocalStoreBackedReadWriteTable.java | 68 ++++
.../kv/LocalStoreBackedReadableTable.java | 61 ++++
.../samza/storage/kv/AccessLoggedStore.scala | 4 +-
.../storage/kv/KeyValueStorageEngine.scala | 14 +-
.../kv/KeyValueStorageEngineMetrics.scala | 4 +-
.../apache/samza/storage/kv/LoggedStore.scala | 4 +-
.../storage/kv/NullSafeKeyValueStore.scala | 4 +-
.../storage/kv/SerializedKeyValueStore.scala | 4 +-
.../TestLocalBaseStoreBackedTableProvider.java | 77 +++++
.../storage/kv/TestKeyValueStorageEngine.scala | 16 +
.../samza/logging/log4j/StreamAppender.java | 110 ++++++-
.../logging/log4j/StreamAppenderMetrics.java | 43 +++
.../samza/logging/log4j/MockSystemProducer.java | 12 +-
.../samza/logging/log4j/TestStreamAppender.java | 144 +++++++--
.../src/main/visualizer/js/planToDagre.js | 6 +-
.../apache/samza/sql/avro/AvroRelConverter.java | 6 +-
samza-test/src/main/config/join/README | 8 +-
samza-test/src/main/python/configs/tests.json | 2 +-
.../test/processor/TestStreamProcessor.java | 9 +-
.../processor/TestZkLocalApplicationRunner.java | 152 +++++++--
.../apache/samza/test/table/TestLocalTable.java | 304 ++++++++++++++++++
.../apache/samza/test/table/TestTableData.java | 200 ++++++++++++
.../samza/test/util/ArraySystemConsumer.java | 4 +-
.../samza/test/util/SimpleSystemAdmin.java | 26 +-
samza-tools/config/eh-consumer-log4j.xml | 35 +++
.../config/generate-kafka-events-log4j.xml | 35 +++
samza-tools/config/samza-sql-console-log4j.xml | 35 +++
samza-tools/scripts/eh-consumer.sh | 34 ++
samza-tools/scripts/generate-kafka-events.sh | 34 ++
samza-tools/scripts/samza-sql-console.sh | 34 ++
.../apache/samza/tools/CommandLineHelper.java | 42 +++
.../tools/ConsoleLoggingSystemFactory.java | 126 ++++++++
.../samza/tools/EventHubConsoleConsumer.java | 120 +++++++
.../apache/samza/tools/GenerateKafkaEvents.java | 205 ++++++++++++
.../samza/tools/RandomValueGenerator.java | 87 ++++++
.../org/apache/samza/tools/SamzaSqlConsole.java | 188 +++++++++++
.../tools/avro/AvroSchemaGenRelConverter.java | 94 ++++++
.../avro/AvroSchemaGenRelConverterFactory.java | 43 +++
.../samza/tools/avro/AvroSerDeFactory.java | 96 ++++++
.../tools/json/JsonRelConverterFactory.java | 93 ++++++
.../samza/tools/schemas/PageViewEvent.avsc | 51 +++
.../samza/tools/schemas/PageViewEvent.java | 60 ++++
.../samza/tools/schemas/ProfileChangeEvent.avsc | 51 +++
.../samza/tools/schemas/ProfileChangeEvent.java | 60 ++++
.../apache/samza/tools/udf/RegexMatchUdf.java | 40 +++
samza-tools/src/main/resources/log4j.xml | 43 +++
.../org/apache/samza/job/yarn/YarnAppState.java | 4 +-
.../job/yarn/YarnClusterResourceManager.java | 310 +++++++++++++++++--
.../samza/job/yarn/YarnContainerRunner.java | 272 ----------------
settings.gradle | 6 +-
214 files changed, 10944 insertions(+), 1302 deletions(-)
----------------------------------------------------------------------
[39/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/952dbbe2
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/952dbbe2
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/952dbbe2
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 952dbbe20a23fc318589b62a044ac7e2cc944fc0
Parents: 728dc18 6668351
Author: Boris S <bo...@apache.org>
Authored: Mon Sep 10 19:06:24 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Mon Sep 10 19:06:24 2018 -0700
----------------------------------------------------------------------
samza-shell/src/main/bash/run-class.sh | 12 ++-
.../job/yarn/YarnClusterResourceManager.java | 41 ++++++----
.../yarn/TestYarnClusterResourceManager.java | 81 ++++++++++++++++++++
3 files changed, 116 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
[28/47] samza git commit: added eventPrcoessed sync
Posted by bo...@apache.org.
added eventPrcoessed sync
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/b5ce9b38
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/b5ce9b38
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/b5ce9b38
Branch: refs/heads/NewKafkaSystemConsumer
Commit: b5ce9b38da88318a625f1dd7a6d35b9ed14ca04b
Parents: 19ba300
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 4 17:22:16 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 4 17:22:16 2018 -0700
----------------------------------------------------------------------
.../apache/samza/checkpoint/OffsetManager.scala | 4 ++--
.../apache/samza/container/SamzaContainer.scala | 2 +-
.../org/apache/samza/job/local/ThreadJob.scala | 5 +----
.../samza/job/local/ThreadJobFactory.scala | 6 +++++-
.../apache/samza/job/local/TestThreadJob.scala | 9 --------
.../samza/system/kafka/KafkaConsumerProxy.java | 22 ++++++++++++++------
.../system/kafka/NewKafkaSystemConsumer.java | 18 +++++++++-------
.../test/integration/StreamTaskTestUtil.scala | 17 +++++++++++++--
.../integration/TestShutdownStatefulTask.scala | 6 +-----
9 files changed, 52 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala b/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
index 53d5e98..d2b6667 100644
--- a/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
+++ b/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
@@ -304,7 +304,7 @@ class OffsetManager(
*/
private def loadOffsetsFromCheckpointManager {
if (checkpointManager != null) {
- debug("Loading offsets from checkpoint manager.")
+ info("Loading offsets from checkpoint manager.")
checkpointManager.start
val result = systemStreamPartitions
@@ -332,7 +332,7 @@ class OffsetManager(
* Loads last processed offsets for a single taskName.
*/
private def restoreOffsetsFromCheckpoint(taskName: TaskName): Map[TaskName, Map[SystemStreamPartition, String]] = {
- debug("Loading checkpoints for taskName: %s." format taskName)
+ info("Loading checkpoints for taskName: %s." format taskName)
val checkpoint = checkpointManager.readLastCheckpoint(taskName)
http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala b/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
index 0c889d2..d02660b 100644
--- a/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
+++ b/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
@@ -825,7 +825,7 @@ class SamzaContainer(
}
try {
- info("Shutting down.")
+ info("Shutting down Samza.")
removeShutdownHook
jmxServer.stop
http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
index 33dde52..a61a297 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
@@ -19,12 +19,11 @@
package org.apache.samza.job.local
-import org.apache.samza.coordinator.JobModelManager
import org.apache.samza.job.ApplicationStatus.{New, Running, SuccessfulFinish, UnsuccessfulFinish}
import org.apache.samza.job.{ApplicationStatus, StreamJob}
import org.apache.samza.util.Logging
-class ThreadJob(runnable: Runnable, val jobModelManager: JobModelManager) extends StreamJob with Logging {
+class ThreadJob(runnable: Runnable) extends StreamJob with Logging {
@volatile var jobStatus: Option[ApplicationStatus] = None
var thread: Thread = null
@@ -44,8 +43,6 @@ class ThreadJob(runnable: Runnable, val jobModelManager: JobModelManager) extend
jobStatus = Some(UnsuccessfulFinish)
throw e
}
- } finally {
- jobModelManager.stop
}
}
}
http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
index 4b08721..34cc2a0 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
@@ -19,6 +19,8 @@
package org.apache.samza.job.local
+import java.util.concurrent.{CountDownLatch, TimeUnit}
+
import org.apache.samza.config.{Config, TaskConfigJava}
import org.apache.samza.config.JobConfig._
import org.apache.samza.config.ShellCommandConfig._
@@ -65,6 +67,7 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
val checkpointManager = new TaskConfigJava(jobModel.getConfig).getCheckpointManager(metricsRegistry)
if (checkpointManager != null) {
checkpointManager.createResources()
+ checkpointManager.stop()
}
ChangelogStreamManager.createChangelogStreams(jobModel.getConfig, jobModel.maxChangeLogStreamPartitions)
@@ -110,10 +113,11 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
taskFactory)
container.setContainerListener(containerListener)
- val threadJob = new ThreadJob(container, coordinator)
+ val threadJob = new ThreadJob(container)
threadJob
} finally {
coordinator.stop
+ coordinatorStreamManager.stop
jmxServer.stop
}
}
http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala b/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
index b1de215..4f3f511 100644
--- a/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
+++ b/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
@@ -19,7 +19,6 @@
package org.apache.samza.job.local
-import org.apache.samza.coordinator.JobModelManager
import org.junit.Assert._
import org.junit.Test
import org.apache.samza.job.ApplicationStatus
@@ -30,10 +29,6 @@ class TestThreadJob {
val job = new ThreadJob(new Runnable {
override def run {
}
- }, new JobModelManager(null) {
- override def stop: Unit = {
-
- }
})
job.submit
job.waitForFinish(999999)
@@ -45,10 +40,6 @@ class TestThreadJob {
override def run {
Thread.sleep(999999)
}
- }, new JobModelManager(null) {
- override def stop: Unit = {
-
- }
})
job.submit
job.waitForFinish(500)
http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index e61e0ff..cddfdfd 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -72,7 +72,7 @@ public class KafkaConsumerProxy<K, V> {
private volatile boolean isRunning = false;
private volatile Throwable failureCause = null;
- private CountDownLatch consumerPollThreadStartLatch = new CountDownLatch(1);
+ private final CountDownLatch consumerPollThreadStartLatch = new CountDownLatch(1);
public KafkaConsumerProxy(Consumer<K, V> kafkaConsumer, String systemName, String clientId,
NewKafkaSystemConsumer.KafkaConsumerMessageSink messageSink, KafkaSystemConsumerMetrics samzaConsumerMetrics,
@@ -93,19 +93,26 @@ public class KafkaConsumerProxy<K, V> {
public void start() {
if (!consumerPollThread.isAlive()) {
- LOG.info("Starting LiKafkaConsumerProxy polling thread for system " + systemName + " " + this.toString());
+ LOG.info("Starting KafkaConsumerProxy polling thread for system " + systemName + " " + this.toString());
consumerPollThread.setDaemon(true);
consumerPollThread.setName(
- "Samza LiKafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
+ "Samza KafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
consumerPollThread.start();
+ System.out.println("THREAD: starting" + consumerPollThread.getName());
+
+
// we need to wait until the thread starts
while (!isRunning) {
try {
consumerPollThreadStartLatch.await(3000, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
+ LOG.info("WTH");
}
}
+ new Exception().printStackTrace(System.out);
+ System.out.println("THREAD: started" + consumerPollThread.getName());
+
} else {
LOG.debug("Tried to start an already started LiKafkaConsumerProxy (%s). Ignoring.", this.toString());
}
@@ -135,12 +142,15 @@ public class KafkaConsumerProxy<K, V> {
return () -> {
isRunning = true;
+
try {
consumerPollThreadStartLatch.countDown();
+ System.out.println("THREAD: runing " + consumerPollThread.getName());
initializeLags();
while (isRunning) {
fetchMessages();
}
+ System.out.println("THREAD: finished " + consumerPollThread.getName());
} catch (Throwable throwable) {
LOG.error(String.format("Error in LiKafkaConsumerProxy poll thread for system: %s.", systemName), throwable);
// SamzaLiKafkaSystemConsumer uses the failureCause to propagate the throwable to the container
@@ -164,7 +174,7 @@ public class KafkaConsumerProxy<K, V> {
// If the message we are about to consume is < end offset, we are starting with a lag.
long initialLag = endOffsets.get(tp) - startingOffset;
- LOG.info("Initial lag is {} for SSP {}", initialLag, ssp);
+ LOG.info("Initial lag for SSP {} is {} (end={}, startOffset={})", ssp, initialLag, endOffsets.get(tp), startingOffset);
latestLags.put(ssp, initialLag);
sink.setIsAtHighWatermark(ssp, initialLag == 0);
});
@@ -446,13 +456,13 @@ public class KafkaConsumerProxy<K, V> {
}
public void stop(long timeout) {
- LOG.info("Shutting down LiKafkaConsumerProxy poll thread:" + toString());
+ System.out.println("THREAD: Shutting down LiKafkaConsumerProxy poll thread:" + consumerPollThread.getName());
isRunning = false;
try {
consumerPollThread.join(timeout);
} catch (InterruptedException e) {
- LOG.warn("Join in LiKafkaConsumerProxy has failed", e);
+ LOG.warn("Join in KafkaConsumerProxy has failed", e);
consumerPollThread.interrupt();
}
}
http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index aeeadce..b33db42 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -103,13 +103,16 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
public static <K, V> NewKafkaSystemConsumer getNewKafkaSystemConsumer(String systemName, Config config,
String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) {
- System.out.println("GETTING FOR " + systemName);
- System.out.printf("RETURNING NEW ONE");
+
// extract consumer configs and create kafka consumer
KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
- return new NewKafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
+
+ NewKafkaSystemConsumer kc = new NewKafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
+ System.out.println("kc=" + kc + "!!!!!!!!!!!!!!!!!GETTING FOR NKC for " + systemName);
+
+ return kc;
}
/**
@@ -254,7 +257,8 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
@Override
public void stop() {
- System.out.println("##################### stopping " + this + "; kc=" + kafkaConsumer);
+ System.out.println("kc=" + this + "!!!!!!!!!!!!!!!!!!!!!! stopping "+ "; kc=" + kafkaConsumer);
+ System.out.println("kc=" + this + "!!!!!!!!!!!!!!!!!!!!!!TPs = " + topicPartitions2Offset);
if (!stopped.compareAndSet(false, true)) {
LOG.warn("attempting to stop stopped consumer.");
@@ -300,7 +304,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
topicPartitions2SSP.put(tp, systemStreamPartition);
- LOG.info("==============>registering ssp = " + systemStreamPartition + " with offset " + offset);
+ LOG.info("============>registering ssp = " + systemStreamPartition + " with offset " + offset + "; kc=" + this);
String existingOffset = topicPartitions2Offset.get(tp);
// register the older (of the two) offset in the consumer, to guarantee we do not miss any messages.
@@ -348,8 +352,8 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
}
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> res = super.poll(systemStreamPartitions, timeout);
- LOG.info("=============================>. Res for " + systemStreamPartitions);
- LOG.info("=============================>. Res:" + res.toString());
+ //LOG.info("=============================>. Res for " + systemStreamPartitions);
+ //LOG.info("=============================>. Res:" + res.toString());
return res;
}
http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
----------------------------------------------------------------------
diff --git a/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala b/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
index 864d2e5..2ea9a5f 100644
--- a/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
+++ b/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
@@ -37,7 +37,7 @@ import org.apache.kafka.common.protocol.SecurityProtocol
import org.apache.kafka.common.security.JaasUtils
import org.apache.samza.config._
import org.apache.samza.container.TaskName
-import org.apache.samza.job.local.ThreadJobFactory
+import org.apache.samza.job.local.{ThreadJob, ThreadJobFactory}
import org.apache.samza.job.model.{ContainerModel, JobModel}
import org.apache.samza.job.{ApplicationStatus, JobRunner, StreamJob}
import org.apache.samza.metrics.MetricsRegistryMap
@@ -223,9 +223,16 @@ class StreamTaskTestUtil {
* interrupt, which is forwarded on to ThreadJob, and marked as a failure).
*/
def stopJob(job: StreamJob) {
+ // make sure we don't kill the job before it was started
+ val tasks = TestTask.tasks
+ val task = tasks.values.toList.head
+ task.eventProcessed.await(60, TimeUnit.SECONDS)
+ System.out.println("THREAD: JOB KILL BEFORE")
// Shutdown task.
job.kill
+ System.out.println("THREAD: JOB KILL")
val status = job.waitForFinish(60000)
+ System.out.println("THREAD: JOB KILL WAIT")
assertEquals(ApplicationStatus.UnsuccessfulFinish, status)
}
@@ -279,7 +286,10 @@ class StreamTaskTestUtil {
val taskConfig = new TaskConfig(jobModel.getConfig)
val checkpointManager = taskConfig.getCheckpointManager(new MetricsRegistryMap())
checkpointManager match {
- case Some(checkpointManager) => checkpointManager.createResources
+ case Some(checkpointManager) => {
+ checkpointManager.createResources
+ checkpointManager.stop
+ }
case _ => assert(checkpointManager != null, "No checkpoint manager factory configured")
}
@@ -323,6 +333,7 @@ object TestTask {
abstract class TestTask extends StreamTask with InitableTask {
var received = ArrayBuffer[String]()
val initFinished = new CountDownLatch(1)
+ val eventProcessed = new CountDownLatch(1)
@volatile var gotMessage = new CountDownLatch(1)
def init(config: Config, context: TaskContext) {
@@ -334,6 +345,8 @@ abstract class TestTask extends StreamTask with InitableTask {
def process(envelope: IncomingMessageEnvelope, collector: MessageCollector, coordinator: TaskCoordinator) {
val msg = envelope.getMessage.asInstanceOf[String]
+ eventProcessed.countDown()
+
System.err.println("TestTask.process(): %s" format msg)
received += msg
http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
----------------------------------------------------------------------
diff --git a/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala b/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
index a42433c..ccb7cd4 100644
--- a/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
+++ b/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
@@ -77,20 +77,16 @@ class TestShutdownStatefulTask extends StreamTaskTestUtil {
val (job, task) = startJob
// Validate that restored is empty.
- assertEquals(0, task.initFinished.getCount)
assertEquals(0, task.asInstanceOf[ShutdownStateStoreTask].restored.size)
assertEquals(0, task.received.size)
// Send some messages to input stream.
- System.out.println("************************BEFORE DONE sending")
send(task, "1")
- System.out.println("************************FIRST DONE sending")
send(task, "2")
send(task, "3")
send(task, "2")
send(task, "99")
send(task, "99")
- System.out.println("************************DONE sending")
stopJob(job)
}
@@ -122,7 +118,7 @@ class ShutdownStateStoreTask extends TestTask {
.asInstanceOf[KeyValueStore[String, String]]
val iter = store.all
iter.asScala.foreach( p => restored += (p.getKey -> p.getValue))
- System.err.println("ShutdownStateStoreTask.createStream(): %s" format restored)
+ System.out.println("ShutdownStateStoreTask.createStream(): %s" format restored)
iter.close
}
[26/47] samza git commit: added apache license
Posted by bo...@apache.org.
added apache license
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/59b3dc1c
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/59b3dc1c
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/59b3dc1c
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 59b3dc1c2c1820c899b5d5b217b0268d119348af
Parents: 2203494
Author: Boris S <bo...@apache.org>
Authored: Fri Aug 31 15:11:39 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Aug 31 15:11:39 2018 -0700
----------------------------------------------------------------------
.../kafka/TestNewKafkaSystemConsumer.java | 21 ++++++++++++++++++++
1 file changed, 21 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/59b3dc1c/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
index f7f63f3..fb7533b 100644
--- a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
+++ b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
@@ -1,3 +1,24 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
package org.apache.samza.system.kafka;
import java.util.Collections;
[30/47] samza git commit: Merge branch 'master' into NewConsumer
Posted by bo...@apache.org.
Merge branch 'master' into NewConsumer
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/bab5bdd5
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/bab5bdd5
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/bab5bdd5
Branch: refs/heads/NewKafkaSystemConsumer
Commit: bab5bdd5a8d12ae0efcd6d3b5c5601d476470373
Parents: b5ce9b3 add733b
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 4 17:23:51 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 4 17:23:51 2018 -0700
----------------------------------------------------------------------
.../org/apache/samza/execution/JobNode.java | 5 +++++
.../runtime/AbstractApplicationRunner.java | 17 ++++++++-------
.../sql/runner/SamzaSqlApplicationConfig.java | 6 ++++--
.../runner/TestSamzaSqlApplicationConfig.java | 22 +++++++++++++++++++-
.../sql/testutil/TestSamzaSqlFileParser.java | 1 -
.../table/TestLocalTableWithSideInputs.java | 3 ++-
6 files changed, 41 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
[15/47] samza git commit: Merge branch 'master' of
https://github.com/sborya/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/sborya/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/afb34d91
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/afb34d91
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/afb34d91
Branch: refs/heads/NewKafkaSystemConsumer
Commit: afb34d916b435c1c08fe80009cc7d47d8287e27f
Parents: 7887d88 78ad578
Author: Boris S <bo...@apache.org>
Authored: Sun Aug 12 23:48:54 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Sun Aug 12 23:48:54 2018 -0700
----------------------------------------------------------------------
----------------------------------------------------------------------
[37/47] samza git commit: addessed some review comments
Posted by bo...@apache.org.
addessed some review comments
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/26552213
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/26552213
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/26552213
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 2655221348304507e1a91e6fa93ef2dc79a4620d
Parents: 9217644
Author: Boris S <bo...@apache.org>
Authored: Mon Sep 10 11:17:18 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Mon Sep 10 11:17:18 2018 -0700
----------------------------------------------------------------------
.../apache/samza/container/SamzaContainer.scala | 2 +-
.../samza/coordinator/JobModelManager.scala | 3 +-
.../clients/consumer/KafkaConsumerConfig.java | 43 +-
.../samza/system/kafka/KafkaConsumerProxy.java | 50 +--
.../samza/system/kafka/KafkaSystemConsumer.java | 406 ++++++++++++++++++
.../samza/system/kafka/KafkaSystemFactory.scala | 4 +-
.../system/kafka/NewKafkaSystemConsumer.java | 412 -------------------
.../system/kafka/TestKafkaSystemConsumer.java | 224 ++++++++++
.../kafka/TestNewKafkaSystemConsumer.java | 224 ----------
9 files changed, 687 insertions(+), 681 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala b/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
index b17788f..5ee9206 100644
--- a/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
+++ b/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
@@ -829,7 +829,7 @@ class SamzaContainer(
}
try {
- info("Shutting down Samza.")
+ info("Shutting down SamzaContaier.")
removeShutdownHook
jmxServer.stop
http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala b/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
index f95a521..e626d9a 100644
--- a/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
+++ b/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
@@ -35,7 +35,6 @@ import org.apache.samza.container.LocalityManager
import org.apache.samza.container.TaskName
import org.apache.samza.coordinator.server.HttpServer
import org.apache.samza.coordinator.server.JobServlet
-import org.apache.samza.coordinator.stream.CoordinatorStreamManager
import org.apache.samza.job.model.JobModel
import org.apache.samza.job.model.TaskModel
import org.apache.samza.metrics.MetricsRegistryMap
@@ -64,7 +63,7 @@ object JobModelManager extends Logging {
* a) Reads the jobModel from coordinator stream using the job's configuration.
* b) Recomputes changelog partition mapping based on jobModel and job's configuration.
* c) Builds JobModelManager using the jobModel read from coordinator stream.
- * @param config Coordinator stream manager config.
+ * @param config Config from the coordinator stream.
* @param changelogPartitionMapping The changelog partition-to-task mapping.
* @return JobModelManager
*/
http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index 98792ab..8ca5b93 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -54,21 +54,28 @@ public class KafkaConsumerConfig extends ConsumerConfig {
* By default, KafkaConsumer will fetch ALL available messages for all the partitions.
* This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
*/
- private static final String KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT = "100";
+ private static final String DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS = "100";
-
- public KafkaConsumerConfig(Properties props) {
+ private KafkaConsumerConfig(Properties props) {
super(props);
}
+ /**
+ * Create kafka consumer configs, based on the subset of global configs.
+ * @param config
+ * @param systemName
+ * @param clientId
+ * @param injectProps
+ * @return KafkaConsumerConfig
+ */
public static KafkaConsumerConfig getKafkaSystemConsumerConfig(Config config, String systemName, String clientId,
Map<String, String> injectProps) {
- Config subConf = config.subset(String.format("systems.%s.consumer.", systemName), true);
+ final Config subConf = config.subset(String.format("systems.%s.consumer.", systemName), true);
- String groupId = getConsumerGroupId(config);
+ final String groupId = getConsumerGroupId(config);
- Properties consumerProps = new Properties();
+ final Properties consumerProps = new Properties();
consumerProps.putAll(subConf);
consumerProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
@@ -109,8 +116,8 @@ public class KafkaConsumerConfig extends ConsumerConfig {
}
// NOT SURE THIS IS NEEDED TODO
- String maxPollRecords =
- subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT);
+ final String maxPollRecords =
+ subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS);
consumerProps.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
// put overrides
@@ -120,7 +127,7 @@ public class KafkaConsumerConfig extends ConsumerConfig {
}
// group id should be unique per job
- static String getConsumerGroupId(Config config) {
+ private static String getConsumerGroupId(Config config) {
JobConfig jobConfig = new JobConfig(config);
Option<String> jobIdOption = jobConfig.getJobId();
Option<String> jobNameOption = jobConfig.getName();
@@ -151,11 +158,12 @@ public class KafkaConsumerConfig extends ConsumerConfig {
}
/**
- * Settings for auto.reset in samza are different from settings in Kafka (auto.offset.reset) - need to convert
+ * If settings for auto.reset in samza are different from settings in Kafka (auto.offset.reset),
+ * then need to convert them (see kafka.apache.org/documentation):
* "largest" -> "latest"
* "smallest" -> "earliest"
- * "none" -> "none"
- * "none" - will fail the kafka consumer, if offset is out of range
+ *
+ * If no setting specified we return "latest" (same as Kafka).
* @param properties All consumer related {@link Properties} parsed from samza config
* @return String representing the config value for "auto.offset.reset" property
*/
@@ -168,13 +176,18 @@ public class KafkaConsumerConfig extends ConsumerConfig {
return autoOffsetReset;
}
+ String newAutoOffsetReset;
switch (autoOffsetReset) {
case SAMZA_OFFSET_LARGEST:
- return KAFKA_OFFSET_LATEST;
+ newAutoOffsetReset = KAFKA_OFFSET_LATEST;
+ break;
case SAMZA_OFFSET_SMALLEST:
- return KAFKA_OFFSET_EARLIEST;
+ newAutoOffsetReset = KAFKA_OFFSET_EARLIEST;
+ break;
default:
- return KAFKA_OFFSET_LATEST;
+ newAutoOffsetReset = KAFKA_OFFSET_LATEST;
}
+ LOG.info("AutoOffsetReset value converted from {} to {}", autoOffsetReset, newAutoOffsetReset);
+ return newAutoOffsetReset;
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index ae80d50..0825c90 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -22,7 +22,6 @@
package org.apache.samza.system.kafka;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -40,6 +39,7 @@ import org.apache.kafka.clients.consumer.InvalidOffsetException;
import org.apache.kafka.common.Metric;
import org.apache.kafka.common.MetricName;
import org.apache.kafka.common.TopicPartition;
+import org.apache.samza.Partition;
import org.apache.samza.SamzaException;
import org.apache.samza.system.IncomingMessageEnvelope;
import org.apache.samza.system.SystemStreamPartition;
@@ -58,13 +58,13 @@ public class KafkaConsumerProxy<K, V> {
/* package private */ final Thread consumerPollThread;
private final Consumer<K, V> kafkaConsumer;
- private final NewKafkaSystemConsumer.KafkaConsumerMessageSink sink;
+ private final KafkaSystemConsumer.KafkaConsumerMessageSink sink;
private final KafkaSystemConsumerMetrics kafkaConsumerMetrics;
private final String metricName;
private final String systemName;
private final String clientId;
private final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
- private final Map<SystemStreamPartition, MetricName> ssp2MetricName = new HashMap<>();
+ private final Map<SystemStreamPartition, MetricName> perPartitionMetrics = new HashMap<>();
// list of all the SSPs we poll from, with their next offsets correspondingly.
private final Map<SystemStreamPartition, Long> nextOffsets = new ConcurrentHashMap<>();
// lags behind the high water mark, as reported by the Kafka consumer.
@@ -75,7 +75,7 @@ public class KafkaConsumerProxy<K, V> {
private final CountDownLatch consumerPollThreadStartLatch = new CountDownLatch(1);
public KafkaConsumerProxy(Consumer<K, V> kafkaConsumer, String systemName, String clientId,
- NewKafkaSystemConsumer.KafkaConsumerMessageSink messageSink, KafkaSystemConsumerMetrics samzaConsumerMetrics,
+ KafkaSystemConsumer.KafkaConsumerMessageSink messageSink, KafkaSystemConsumerMetrics samzaConsumerMetrics,
String metricName) {
this.kafkaConsumer = kafkaConsumer;
@@ -88,14 +88,15 @@ public class KafkaConsumerProxy<K, V> {
this.kafkaConsumerMetrics.registerClientProxy(metricName);
consumerPollThread = new Thread(createProxyThreadRunnable());
+ consumerPollThread.setDaemon(true);
+ consumerPollThread.setName(
+ "Samza KafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
}
public void start() {
if (!consumerPollThread.isAlive()) {
LOG.info("Starting KafkaConsumerProxy polling thread for system " + systemName + " " + this.toString());
- consumerPollThread.setDaemon(true);
- consumerPollThread.setName(
- "Samza KafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
+
consumerPollThread.start();
// we need to wait until the thread starts
@@ -116,7 +117,7 @@ public class KafkaConsumerProxy<K, V> {
public void addTopicPartition(SystemStreamPartition ssp, long nextOffset) {
LOG.info(String.format("Adding new topic and partition %s, offset = %s to queue for consumer %s", ssp, nextOffset,
this));
- topicPartitions2SSP.put(NewKafkaSystemConsumer.toTopicPartition(ssp), ssp); //registered SSPs
+ topicPartitions2SSP.put(KafkaSystemConsumer.toTopicPartition(ssp), ssp); //registered SSPs
// this is already vetted offset so there is no need to validate it
LOG.info(String.format("Got offset %s for new topic and partition %s.", nextOffset, ssp));
@@ -135,7 +136,6 @@ public class KafkaConsumerProxy<K, V> {
Runnable runnable= () -> {
isRunning = true;
-
try {
consumerPollThreadStartLatch.countDown();
LOG.info("Starting runnable " + consumerPollThread.getName());
@@ -230,19 +230,19 @@ public class KafkaConsumerProxy<K, V> {
private Map<SystemStreamPartition, List<IncomingMessageEnvelope>> processResults(ConsumerRecords<K, V> records) {
if (records == null) {
- return Collections.emptyMap();
+ throw new SamzaException("processResults is called with null object for records");
}
int capacity = (int) (records.count() / 0.75 + 1); // to avoid rehash, allocate more then 75% of expected capacity.
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> results = new HashMap<>(capacity);
// Parse the returned records and convert them into the IncomingMessageEnvelope.
// Note. They have been already de-serialized by the consumer.
- for (ConsumerRecord<K, V> r : records) {
- int partition = r.partition();
- String topic = r.topic();
+ for (ConsumerRecord<K, V> record : records) {
+ int partition = record.partition();
+ String topic = record.topic();
TopicPartition tp = new TopicPartition(topic, partition);
- updateMetrics(r, tp);
+ updateMetrics(record, tp);
SystemStreamPartition ssp = topicPartitions2SSP.get(tp);
List<IncomingMessageEnvelope> listMsgs = results.get(ssp);
@@ -251,10 +251,10 @@ public class KafkaConsumerProxy<K, V> {
results.put(ssp, listMsgs);
}
- final K key = r.key();
- final Object value = r.value();
- IncomingMessageEnvelope imEnvelope =
- new IncomingMessageEnvelope(ssp, String.valueOf(r.offset()), key, value, getRecordSize(r));
+ final K key = record.key();
+ final Object value = record.value();
+ final IncomingMessageEnvelope imEnvelope =
+ new IncomingMessageEnvelope(ssp, String.valueOf(record.offset()), key, value, getRecordSize(record));
listMsgs.add(imEnvelope);
}
if (LOG.isDebugEnabled()) {
@@ -274,8 +274,8 @@ public class KafkaConsumerProxy<K, V> {
}
private void updateMetrics(ConsumerRecord<K, V> r, TopicPartition tp) {
- TopicAndPartition tap = NewKafkaSystemConsumer.toTopicAndPartition(tp);
- SystemStreamPartition ssp = NewKafkaSystemConsumer.toSystemStreamPartition(systemName, tap);
+ TopicAndPartition tap = KafkaSystemConsumer.toTopicAndPartition(tp);
+ SystemStreamPartition ssp = new SystemStreamPartition(systemName, tp.topic(), new Partition(tp.partition()));
long currentSSPLag = getLatestLag(ssp); // lag between the current offset and the highwatermark
if (currentSSPLag < 0) {
return;
@@ -312,8 +312,8 @@ public class KafkaConsumerProxy<K, V> {
tags.put("client-id", clientId);// this is required by the KafkaConsumer to get the metrics
for (SystemStreamPartition ssp : ssps) {
- TopicPartition tp = NewKafkaSystemConsumer.toTopicPartition(ssp);
- ssp2MetricName.put(ssp, new MetricName(tp + ".records-lag", "consumer-fetch-manager-metrics", "", tags));
+ TopicPartition tp = KafkaSystemConsumer.toTopicPartition(ssp);
+ perPartitionMetrics.put(ssp, new MetricName(tp + ".records-lag", "consumer-fetch-manager-metrics", "", tags));
}
}
@@ -327,12 +327,12 @@ public class KafkaConsumerProxy<K, V> {
Map<MetricName, ? extends Metric> consumerMetrics = kafkaConsumer.metrics();
// populate the MetricNames first time
- if (ssp2MetricName.isEmpty()) {
+ if (perPartitionMetrics.isEmpty()) {
populateMetricNames(ssps);
}
for (SystemStreamPartition ssp : ssps) {
- MetricName mn = ssp2MetricName.get(ssp);
+ MetricName mn = perPartitionMetrics.get(ssp);
Metric currentLagM = consumerMetrics.get(mn);
// High watermark is fixed to be the offset of last available message,
@@ -412,7 +412,7 @@ public class KafkaConsumerProxy<K, V> {
for (Map.Entry<SystemStreamPartition, Long> e : nextOffsets.entrySet()) {
SystemStreamPartition ssp = e.getKey();
Long offset = e.getValue();
- TopicAndPartition tp = NewKafkaSystemConsumer.toTopicAndPartition(ssp);
+ TopicAndPartition tp = new TopicAndPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
Long lag = latestLags.get(ssp);
LOG.trace("Latest offset of {} is {}; lag = {}", ssp, offset, lag);
if (lag != null && offset != null && lag >= 0) {
http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
new file mode 100644
index 0000000..196fb85
--- /dev/null
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
@@ -0,0 +1,406 @@
+
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.samza.system.kafka;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import kafka.common.TopicAndPartition;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.samza.Partition;
+import org.apache.samza.SamzaException;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.system.IncomingMessageEnvelope;
+import org.apache.samza.system.SystemConsumer;
+import org.apache.samza.system.SystemStreamPartition;
+import org.apache.samza.util.BlockingEnvelopeMap;
+import org.apache.samza.util.Clock;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Option;
+
+
+public class KafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements SystemConsumer {
+
+ private static final Logger LOG = LoggerFactory.getLogger(KafkaSystemConsumer.class);
+
+ private static final long FETCH_THRESHOLD = 50000;
+ private static final long FETCH_THRESHOLD_BYTES = -1L;
+
+ private final Consumer<K, V> kafkaConsumer;
+ private final String systemName;
+ private final KafkaSystemConsumerMetrics samzaConsumerMetrics;
+ private final String clientId;
+ private final String metricName;
+ private final AtomicBoolean stopped = new AtomicBoolean(false);
+ private final AtomicBoolean started = new AtomicBoolean(false);
+ private final Config config;
+ private final boolean fetchThresholdBytesEnabled;
+
+ // This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
+ /* package private */final KafkaConsumerMessageSink messageSink;
+
+ // proxy is doing the actual reading
+ final private KafkaConsumerProxy proxy;
+
+ /* package private */final Map<TopicPartition, String> topicPartitions2Offset = new HashMap<>();
+ /* package private */final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
+
+ /* package private */ long perPartitionFetchThreshold;
+ /* package private */ long perPartitionFetchThresholdBytes;
+
+ /**
+ * Constructor
+ * @param systemName system name for which we create the consumer
+ * @param config config
+ * @param metrics metrics
+ * @param clock - system clock
+ */
+ public KafkaSystemConsumer(Consumer<K, V> kafkaConsumer, String systemName, Config config, String clientId,
+ KafkaSystemConsumerMetrics metrics, Clock clock) {
+
+ super(metrics.registry(), clock, metrics.getClass().getName());
+
+ this.kafkaConsumer = kafkaConsumer;
+ this.samzaConsumerMetrics = metrics;
+ this.clientId = clientId;
+ this.systemName = systemName;
+ this.config = config;
+ this.metricName = String.format("%s %s", systemName, clientId);
+
+ this.fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName);
+
+ // create a sink for passing the messages between the proxy and the consumer
+ messageSink = new KafkaConsumerMessageSink();
+
+ // Create the proxy to do the actual message reading. It is a separate thread that reads the messages from the stream
+ // and puts them into the sink.
+ proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, samzaConsumerMetrics, metricName);
+ LOG.info("Created consumer proxy: " + proxy);
+
+ LOG.info("Created SamzaKafkaSystemConsumer for system={}, clientId={}, metricName={}, KafkaConsumer={}", systemName,
+ clientId, metricName, this.kafkaConsumer.toString());
+ }
+
+ public static <K, V> KafkaSystemConsumer getNewKafkaSystemConsumer(String systemName, Config config,
+ String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) {
+
+ // extract consumer configs and create kafka consumer
+ KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
+ LOG.info("Created kafka consumer for system {}, clientId {}: {}", systemName, clientId, kafkaConsumer);
+
+ KafkaSystemConsumer kc = new KafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
+ LOG.info("Created samza system consumer {}", kc.toString());
+
+ return kc;
+ }
+
+ /**
+ * create kafka consumer
+ * @param systemName system name for which we create the consumer
+ * @param clientId client id to use int the kafka client
+ * @param config config
+ * @return kafka consumer
+ */
+ public static <K, V> KafkaConsumer<K, V> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
+
+ Map<String, String> injectProps = new HashMap<>();
+
+ // extract kafka client configs
+ KafkaConsumerConfig consumerConfig =
+ KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
+
+ LOG.info("KafkaClient properties for systemName {}: {}", systemName, consumerConfig.originals());
+
+ return new KafkaConsumer<>(consumerConfig.originals());
+ }
+
+ @Override
+ public void start() {
+ if (!started.compareAndSet(false, true)) {
+ LOG.warn("attempting to start the consumer for the second (or more) time.");
+ return;
+ }
+ if (stopped.get()) {
+ LOG.warn("attempting to start a stopped consumer");
+ return;
+ }
+ // initialize the subscriptions for all the registered TopicPartitions
+ startSubscription();
+ // needs to be called after all the registrations are completed
+ setFetchThresholds();
+
+ startConsumer();
+ LOG.info("consumer {} started", this);
+ }
+
+ private void startSubscription() {
+ //subscribe to all the registered TopicPartitions
+ LOG.info("consumer {}, subscribes to {} ", this, topicPartitions2SSP.keySet());
+ try {
+ synchronized (kafkaConsumer) {
+ // we are using assign (and not subscribe), so we need to specify both topic and partition
+ kafkaConsumer.assign(topicPartitions2SSP.keySet());
+ }
+ } catch (Exception e) {
+ LOG.warn("startSubscription failed.", e);
+ throw new SamzaException(e);
+ }
+ }
+
+ /*
+ Set the offsets to start from.
+ Add the TopicPartitions to the proxy.
+ Start the proxy thread.
+ */
+ void startConsumer() {
+ //set the offset for each TopicPartition
+ if (topicPartitions2Offset.size() <= 0) {
+ LOG.warn("Consumer {} is not subscribed to any SSPs", this);
+ }
+
+ topicPartitions2Offset.forEach((tp, startingOffsetString) -> {
+ long startingOffset = Long.valueOf(startingOffsetString);
+
+ try {
+ synchronized (kafkaConsumer) {
+ // TODO in the future we may need to add special handling here for BEGIN/END_OFFSET
+ // this will call KafkaConsumer.seekToBegin/End()
+ kafkaConsumer.seek(tp, startingOffset); // this value should already be the 'upcoming' value
+ }
+ } catch (Exception e) {
+ // all other exceptions - non recoverable
+ LOG.error("Got Exception while seeking to " + startingOffsetString + " for " + tp, e);
+ throw new SamzaException(e);
+ }
+
+ LOG.info("Changing consumer's starting offset for tp = " + tp + " to " + startingOffsetString);
+
+ // add the partition to the proxy
+ proxy.addTopicPartition(topicPartitions2SSP.get(tp), startingOffset);
+ });
+
+ // start the proxy thread
+ if (proxy != null && !proxy.isRunning()) {
+ LOG.info("Starting proxy: " + proxy);
+ proxy.start();
+ }
+ }
+
+ private void setFetchThresholds() {
+ // get the thresholds, and set defaults if not defined.
+ KafkaConfig kafkaConfig = new KafkaConfig(config);
+
+ Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName);
+ long fetchThreshold = FETCH_THRESHOLD;
+ if (fetchThresholdOption.isDefined()) {
+ fetchThreshold = Long.valueOf(fetchThresholdOption.get());
+ LOG.info("fetchThresholdOption is configured. fetchThreshold=" + fetchThreshold);
+ }
+
+ Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName);
+ long fetchThresholdBytes = FETCH_THRESHOLD_BYTES;
+ if (fetchThresholdBytesOption.isDefined()) {
+ fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get());
+ LOG.info("fetchThresholdBytesOption is configured. fetchThresholdBytes=" + fetchThresholdBytes);
+ }
+
+ int numTPs = topicPartitions2SSP.size();
+ assert (numTPs == topicPartitions2Offset.size());
+
+ LOG.info("fetchThresholdBytes = " + fetchThresholdBytes + "; fetchThreshold=" + fetchThreshold);
+ LOG.info("number of topicPartitions " + numTPs);
+
+ if (numTPs > 0) {
+ perPartitionFetchThreshold = fetchThreshold / numTPs;
+ LOG.info("perPartitionFetchThreshold=" + perPartitionFetchThreshold);
+ if (fetchThresholdBytesEnabled) {
+ // currently this feature cannot be enabled, because we do not have the size of the messages available.
+ // messages get double buffered, hence divide by 2
+ perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / numTPs;
+ LOG.info("perPartitionFetchThresholdBytes is enabled. perPartitionFetchThresholdBytes="
+ + perPartitionFetchThresholdBytes);
+ }
+ }
+ }
+
+ @Override
+ public void stop() {
+ LOG.info("Stopping Samza kafkaConsumer " + this);
+
+ if (!stopped.compareAndSet(false, true)) {
+ LOG.warn("attempting to stop stopped consumer.");
+ return;
+ }
+
+ // stop the proxy (with 5 minutes timeout)
+ if (proxy != null) {
+ LOG.info("Stopping proxy " + proxy);
+ proxy.stop(TimeUnit.MINUTES.toMillis(5));
+ }
+
+ try {
+ synchronized (kafkaConsumer) {
+ LOG.info("Closing kafka consumer " + kafkaConsumer);
+ kafkaConsumer.close();
+ }
+ } catch (Exception e) {
+ LOG.warn("failed to stop SamzaRawKafkaConsumer + " + this, e);
+ }
+ }
+
+ /*
+ record the ssp and the offset. Do not submit it to the consumer yet.
+ */
+ @Override
+ public void register(SystemStreamPartition systemStreamPartition, String offset) {
+ if (started.get()) {
+ String msg =
+ String.format("Trying to register partition after consumer has been started. sn=%s, ssp=%s", systemName,
+ systemStreamPartition);
+ LOG.error(msg);
+ throw new SamzaException(msg);
+ }
+
+ if (!systemStreamPartition.getSystem().equals(systemName)) {
+ LOG.warn("ignoring SSP " + systemStreamPartition + ", because this consumer's system is " + systemName);
+ return;
+ }
+ super.register(systemStreamPartition, offset);
+
+ TopicPartition tp = toTopicPartition(systemStreamPartition);
+
+ topicPartitions2SSP.put(tp, systemStreamPartition);
+
+ LOG.info("Registering ssp = " + systemStreamPartition + " with offset " + offset);
+
+ String existingOffset = topicPartitions2Offset.get(tp);
+ // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages.
+ if (existingOffset == null || compareOffsets(existingOffset, offset) > 0) {
+ topicPartitions2Offset.put(tp, offset);
+ }
+
+ samzaConsumerMetrics.registerTopicAndPartition(toTopicAndPartition(tp));
+ }
+
+ /**
+ * Compare two String offsets.
+ * Note. There is a method in KafkaAdmin that does that, but that would require instantiation of systemadmin for each consumer.
+ * @return see {@link Long#compareTo(Long)}
+ */
+ public static int compareOffsets(String offset1, String offset2) {
+ return Long.valueOf(offset1).compareTo(Long.valueOf(offset2));
+ }
+
+ @Override
+ public String toString() {
+ return systemName + "/" + clientId + "/" + super.toString();
+ }
+
+ @Override
+ public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
+ Set<SystemStreamPartition> systemStreamPartitions, long timeout) throws InterruptedException {
+
+ // check if the proxy is running
+ if (!proxy.isRunning()) {
+ stop();
+ if (proxy.getFailureCause() != null) {
+ String message = "KafkaConsumerProxy has stopped";
+ throw new SamzaException(message, proxy.getFailureCause());
+ } else {
+ LOG.warn("Failure cause is not populated for KafkaConsumerProxy");
+ throw new SamzaException("KafkaConsumerProxy has stopped");
+ }
+ }
+
+ Map<SystemStreamPartition, List<IncomingMessageEnvelope>> res = super.poll(systemStreamPartitions, timeout);
+ return res;
+ }
+
+ /**
+ * convert from TopicPartition to TopicAndPartition
+ */
+ public static TopicAndPartition toTopicAndPartition(TopicPartition tp) {
+ return new TopicAndPartition(tp.topic(), tp.partition());
+ }
+
+ /**
+ * convert to TopicPartition from SystemStreamPartition
+ */
+ public static TopicPartition toTopicPartition(SystemStreamPartition ssp) {
+ return new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
+ }
+
+ /**
+ * return system name for this consumer
+ * @return system name
+ */
+ public String getSystemName() {
+ return systemName;
+ }
+
+ ////////////////////////////////////
+ // inner class for the message sink
+ ////////////////////////////////////
+ public class KafkaConsumerMessageSink {
+
+ public void setIsAtHighWatermark(SystemStreamPartition ssp, boolean isAtHighWatermark) {
+ setIsAtHead(ssp, isAtHighWatermark);
+ }
+
+ boolean needsMoreMessages(SystemStreamPartition ssp) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("needsMoreMessages from following SSP: {}. fetchLimitByBytes enabled={}; messagesSizeInQueue={};"
+ + "(limit={}); messagesNumInQueue={}(limit={};", ssp, fetchThresholdBytesEnabled,
+ getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes, getNumMessagesInQueue(ssp),
+ perPartitionFetchThreshold);
+ }
+
+ if (fetchThresholdBytesEnabled) {
+ return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes;
+ } else {
+ return getNumMessagesInQueue(ssp) < perPartitionFetchThreshold;
+ }
+ }
+
+ void addMessage(SystemStreamPartition ssp, IncomingMessageEnvelope envelope) {
+ LOG.trace("Incoming message ssp = {}: envelope = {}.", ssp, envelope);
+
+ try {
+ put(ssp, envelope);
+ } catch (InterruptedException e) {
+ throw new SamzaException(
+ String.format("Interrupted while trying to add message with offset %s for ssp %s", envelope.getOffset(),
+ ssp));
+ }
+ }
+ } // end of KafkaMessageSink class
+ ///////////////////////////////////////////////////////////////////////////
+}
http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index 6f58bed..e0e85be 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -50,7 +50,7 @@ class KafkaSystemFactory extends SystemFactory with Logging {
val clientId = KafkaConsumerConfig.getConsumerClientId( config)
val metrics = new KafkaSystemConsumerMetrics(systemName, registry)
- NewKafkaSystemConsumer.getNewKafkaSystemConsumer(
+ KafkaSystemConsumer.getNewKafkaSystemConsumer(
systemName, config, clientId, metrics, new SystemClock)
}
@@ -76,7 +76,7 @@ class KafkaSystemFactory extends SystemFactory with Logging {
}
def getAdmin(systemName: String, config: Config): SystemAdmin = {
- val clientId = KafkaConsumerConfig.getConsumerClientId(config)
+ val clientId = KafkaConsumerConfig.getAdminClientId(config)
val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId)
val bootstrapServers = producerConfig.bootsrapServers
val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)
http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
deleted file mode 100644
index afec8ad..0000000
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ /dev/null
@@ -1,412 +0,0 @@
-
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- */
-
-package org.apache.samza.system.kafka;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import kafka.common.TopicAndPartition;
-import org.apache.kafka.clients.consumer.Consumer;
-import org.apache.kafka.clients.consumer.KafkaConsumer;
-import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
-import org.apache.kafka.common.TopicPartition;
-import org.apache.samza.Partition;
-import org.apache.samza.SamzaException;
-import org.apache.samza.config.Config;
-import org.apache.samza.config.KafkaConfig;
-import org.apache.samza.system.IncomingMessageEnvelope;
-import org.apache.samza.system.SystemConsumer;
-import org.apache.samza.system.SystemStreamPartition;
-import org.apache.samza.util.BlockingEnvelopeMap;
-import org.apache.samza.util.Clock;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import scala.Option;
-
-
-public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements SystemConsumer {
-
- private static final Logger LOG = LoggerFactory.getLogger(NewKafkaSystemConsumer.class);
-
- private static final long FETCH_THRESHOLD = 50000;
- private static final long FETCH_THRESHOLD_BYTES = -1L;
-
- private final Consumer<K, V> kafkaConsumer;
- private final String systemName;
- private final KafkaSystemConsumerMetrics samzaConsumerMetrics;
- private final String clientId;
- private final String metricName;
- private final AtomicBoolean stopped = new AtomicBoolean(false);
- private final AtomicBoolean started = new AtomicBoolean(false);
- private final Config config;
- private final boolean fetchThresholdBytesEnabled;
-
- // This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
- /* package private */ KafkaConsumerMessageSink messageSink;
-
- // proxy is doing the actual reading
- private KafkaConsumerProxy proxy;
-
- /* package private */final Map<TopicPartition, String> topicPartitions2Offset = new HashMap<>();
- /* package private */final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
-
- /* package private */ long perPartitionFetchThreshold;
- /* package private */ long perPartitionFetchThresholdBytes;
-
- /**
- * @param systemName
- * @param config
- * @param metrics
- */
- protected NewKafkaSystemConsumer(Consumer<K, V> kafkaConsumer, String systemName, Config config, String clientId,
- KafkaSystemConsumerMetrics metrics, Clock clock) {
-
- super(metrics.registry(), clock, metrics.getClass().getName());
-
- this.kafkaConsumer = kafkaConsumer;
- this.samzaConsumerMetrics = metrics;
- this.clientId = clientId;
- this.systemName = systemName;
- this.config = config;
- this.metricName = systemName + " " + clientId;
-
- this.fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName);
-
- LOG.info("Created SamzaKafkaSystemConsumer for system={}, clientId={}, metricName={}, KafkaConsumer={}", systemName,
- clientId, metricName, this.kafkaConsumer.toString());
- }
-
- public static <K, V> NewKafkaSystemConsumer getNewKafkaSystemConsumer(String systemName, Config config,
- String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) {
-
- // extract consumer configs and create kafka consumer
- KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
- LOG.info("Created kafka consumer for system {}, clientId {}: {}", systemName, clientId, kafkaConsumer);
-
- NewKafkaSystemConsumer kc = new NewKafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
- LOG.info("Created samza system consumer {}", kc.toString());
-
- return kc;
- }
-
- /**
- * create kafka consumer
- * @param systemName
- * @param clientId
- * @param config
- * @return kafka consumer
- */
- private static <K, V> KafkaConsumer<K, V> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
-
- Map<String, String> injectProps = new HashMap<>();
-
- // extract kafka client configs
- KafkaConsumerConfig consumerConfig =
- KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
-
- LOG.info("KafkaClient properties for systemName {}: {}", systemName, consumerConfig.originals());
-
- return new KafkaConsumer<>(consumerConfig.originals());
- }
-
- @Override
- public void start() {
- if (!started.compareAndSet(false, true)) {
- LOG.warn("attempting to start the consumer for the second (or more) time.");
- return;
- }
- if (stopped.get()) {
- LOG.warn("attempting to start a stopped consumer");
- return;
- }
- // initialize the subscriptions for all the registered TopicPartitions
- startSubscription();
- // needs to be called after all the registrations are completed
- setFetchThresholds();
- // Create the proxy to do the actual message reading. It is a separate thread that reads the messages from the stream
- // and puts them into the sink.
- createConsumerProxy();
- startConsumer();
- LOG.info("consumer {} started", this);
- }
-
- private void startSubscription() {
- //subscribe to all the registered TopicPartitions
- LOG.info("consumer {}, subscribes to {} ", this, topicPartitions2SSP.keySet());
- try {
- synchronized (kafkaConsumer) {
- // we are using assign (and not subscribe), so we need to specify both topic and partition
- kafkaConsumer.assign(topicPartitions2SSP.keySet());
- }
- } catch (Exception e) {
- LOG.warn("startSubscription failed.", e);
- throw new SamzaException(e);
- }
- }
-
- void createConsumerProxy() {
- // create a sink for passing the messages between the proxy and the consumer
- messageSink = new KafkaConsumerMessageSink();
-
- // create the thread with the consumer
- proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, samzaConsumerMetrics, metricName);
-
- LOG.info("Created consumer proxy: " + proxy);
- }
-
- /*
- Set the offsets to start from.
- Add the TopicPartitions to the proxy.
- Start the proxy thread.
- */
- void startConsumer() {
- //set the offset for each TopicPartition
- if (topicPartitions2Offset.size() <= 0) {
- LOG.warn("Consumer {} is not subscribed to any SSPs", this);
- }
-
- topicPartitions2Offset.forEach((tp, startingOffsetString) -> {
- long startingOffset = Long.valueOf(startingOffsetString);
-
- try {
- synchronized (kafkaConsumer) {
- // TODO in the future we may need to add special handling here for BEGIN/END_OFFSET
- // this will call KafkaConsumer.seekToBegin/End()
- kafkaConsumer.seek(tp, startingOffset); // this value should already be the 'upcoming' value
- }
- } catch (Exception e) {
- // all other exceptions - non recoverable
- LOG.error("Got Exception while seeking to " + startingOffsetString + " for " + tp, e);
- throw new SamzaException(e);
- }
-
- LOG.info("Changing consumer's starting offset for tp = " + tp + " to " + startingOffsetString);
-
- // add the partition to the proxy
- proxy.addTopicPartition(topicPartitions2SSP.get(tp), startingOffset);
- });
-
- // start the proxy thread
- if (proxy != null && !proxy.isRunning()) {
- LOG.info("Starting proxy: " + proxy);
- proxy.start();
- }
- }
-
- private void setFetchThresholds() {
- // get the thresholds, and set defaults if not defined.
- KafkaConfig kafkaConfig = new KafkaConfig(config);
-
- Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName);
- long fetchThreshold = FETCH_THRESHOLD;
- if (fetchThresholdOption.isDefined()) {
- fetchThreshold = Long.valueOf(fetchThresholdOption.get());
- LOG.info("fetchThresholdOption is configured. fetchThreshold=" + fetchThreshold);
- }
-
- Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName);
- long fetchThresholdBytes = FETCH_THRESHOLD_BYTES;
- if (fetchThresholdBytesOption.isDefined()) {
- fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get());
- LOG.info("fetchThresholdBytesOption is configured. fetchThresholdBytes=" + fetchThresholdBytes);
- }
-
- int numTPs = topicPartitions2SSP.size();
- assert (numTPs == topicPartitions2Offset.size());
-
- LOG.info("fetchThresholdBytes = " + fetchThresholdBytes + "; fetchThreshold=" + fetchThreshold);
- LOG.info("number of topicPartitions " + numTPs);
-
- if (numTPs > 0) {
- perPartitionFetchThreshold = fetchThreshold / numTPs;
- LOG.info("perPartitionFetchThreshold=" + perPartitionFetchThreshold);
- if (fetchThresholdBytesEnabled) {
- // currently this feature cannot be enabled, because we do not have the size of the messages available.
- // messages get double buffered, hence divide by 2
- perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / numTPs;
- LOG.info("perPartitionFetchThresholdBytes is enabled. perPartitionFetchThresholdBytes="
- + perPartitionFetchThresholdBytes);
- }
- }
- }
-
- @Override
- public void stop() {
- LOG.info("Stopping Samza kafkaConsumer " + this);
-
- if (!stopped.compareAndSet(false, true)) {
- LOG.warn("attempting to stop stopped consumer.");
- return;
- }
-
- // stop the proxy (with 5 minutes timeout)
- if (proxy != null) {
- LOG.info("Stopping proxy " + proxy);
- proxy.stop(TimeUnit.MINUTES.toMillis(5));
- }
-
- try {
- synchronized (kafkaConsumer) {
- LOG.info("Closing kafka consumer " + kafkaConsumer);
- kafkaConsumer.close();
- }
- } catch (Exception e) {
- LOG.warn("failed to stop SamzaRawKafkaConsumer + " + this, e);
- }
- }
-
- /*
- record the ssp and the offset. Do not submit it to the consumer yet.
- */
- @Override
- public void register(SystemStreamPartition systemStreamPartition, String offset) {
- if (started.get()) {
- String msg =
- String.format("Trying to register partition after consumer has been started. sn=%s, ssp=%s", systemName,
- systemStreamPartition);
- LOG.error(msg);
- throw new SamzaException(msg);
- }
-
- if (!systemStreamPartition.getSystem().equals(systemName)) {
- LOG.warn("ignoring SSP " + systemStreamPartition + ", because this consumer's system is " + systemName);
- return;
- }
- super.register(systemStreamPartition, offset);
-
- TopicPartition tp = toTopicPartition(systemStreamPartition);
-
- topicPartitions2SSP.put(tp, systemStreamPartition);
-
- LOG.info("Registering ssp = " + systemStreamPartition + " with offset " + offset);
-
- String existingOffset = topicPartitions2Offset.get(tp);
- // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages.
- if (existingOffset == null || compareOffsets(existingOffset, offset) > 0) {
- topicPartitions2Offset.put(tp, offset);
- }
-
- samzaConsumerMetrics.registerTopicAndPartition(toTopicAndPartition(tp));
- }
-
- /**
- * Compare two String offsets.
- * Note. There is a method in KafkaAdmin that does that, but that would require instantiation of systemadmin for each consumer.
- * @param off1
- * @param off2
- * @return see {@link Long#compareTo(Long)}
- */
- public static int compareOffsets(String off1, String off2) {
- return Long.valueOf(off1).compareTo(Long.valueOf(off2));
- }
-
- @Override
- public String toString() {
- return systemName + "/" + clientId + "/" + super.toString();
- }
-
- @Override
- public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
- Set<SystemStreamPartition> systemStreamPartitions, long timeout) throws InterruptedException {
-
- // check if the proxy is running
- if (!proxy.isRunning()) {
- stop();
- if (proxy.getFailureCause() != null) {
- String message = "KafkaConsumerProxy has stopped";
- throw new SamzaException(message, proxy.getFailureCause());
- } else {
- LOG.warn("Failure cause is not populated for KafkaConsumerProxy");
- throw new SamzaException("KafkaConsumerProxy has stopped");
- }
- }
-
- Map<SystemStreamPartition, List<IncomingMessageEnvelope>> res = super.poll(systemStreamPartitions, timeout);
- return res;
- }
-
- public static TopicAndPartition toTopicAndPartition(TopicPartition tp) {
- return new TopicAndPartition(tp.topic(), tp.partition());
- }
-
- public static TopicAndPartition toTopicAndPartition(SystemStreamPartition ssp) {
- return new TopicAndPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
- }
-
- public static TopicPartition toTopicPartition(SystemStreamPartition ssp) {
- return new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
- }
-
- public static SystemStreamPartition toSystemStreamPartition(String systemName, TopicAndPartition tp) {
- return new SystemStreamPartition(systemName, tp.topic(), new Partition(tp.partition()));
- }
-
- /**
- * return system name for this consumer
- * @return system name
- */
- public String getSystemName() {
- return systemName;
- }
-
- ////////////////////////////////////
- // inner class for the message sink
- ////////////////////////////////////
- public class KafkaConsumerMessageSink {
-
- public void setIsAtHighWatermark(SystemStreamPartition ssp, boolean isAtHighWatermark) {
- setIsAtHead(ssp, isAtHighWatermark);
- }
-
- boolean needsMoreMessages(SystemStreamPartition ssp) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("needsMoreMessages from following SSP: {}. fetchLimitByBytes enabled={}; messagesSizeInQueue={};"
- + "(limit={}); messagesNumInQueue={}(limit={};", ssp, fetchThresholdBytesEnabled,
- getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes, getNumMessagesInQueue(ssp),
- perPartitionFetchThreshold);
- }
-
- if (fetchThresholdBytesEnabled) {
- return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes;
- } else {
- return getNumMessagesInQueue(ssp) < perPartitionFetchThreshold;
- }
- }
-
- void addMessage(SystemStreamPartition ssp, IncomingMessageEnvelope envelope) {
- LOG.trace("Incoming message ssp = {}: envelope = {}.", ssp, envelope);
-
- try {
- put(ssp, envelope);
- } catch (InterruptedException e) {
- throw new SamzaException(
- String.format("Interrupted while trying to add message with offset %s for ssp %s", envelope.getOffset(),
- ssp));
- }
- }
- } // end of KafkaMessageSink class
- ///////////////////////////////////////////////////////////////////////////
-}
http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java
new file mode 100644
index 0000000..d90bc35
--- /dev/null
+++ b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java
@@ -0,0 +1,224 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.samza.system.kafka;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
+import org.apache.kafka.common.serialization.ByteArraySerializer;
+import org.apache.samza.Partition;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.config.MapConfig;
+import org.apache.samza.system.IncomingMessageEnvelope;
+import org.apache.samza.system.SystemStreamPartition;
+import org.apache.samza.util.Clock;
+import org.apache.samza.util.NoOpMetricsRegistry;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+
+public class TestKafkaSystemConsumer {
+ public final String TEST_SYSTEM = "test-system";
+ public final String TEST_STREAM = "test-stream";
+ public final String TEST_CLIENT_ID = "testClientId";
+ public final String BOOTSTRAP_SERVER = "127.0.0.1:8888";
+ public final String FETCH_THRESHOLD_MSGS = "50000";
+ public final String FETCH_THRESHOLD_BYTES = "100000";
+
+ @Before
+ public void setUp() {
+
+ }
+
+ private KafkaSystemConsumer setupConsumer(String fetchMsg, String fetchBytes) {
+ final Map<String, String> map = new HashMap<>();
+
+ map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD(), TEST_SYSTEM), fetchMsg);
+ map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD_BYTES(), TEST_SYSTEM), fetchBytes);
+ map.put(String.format("systems.%s.consumer.%s", TEST_SYSTEM, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG),
+ BOOTSTRAP_SERVER);
+
+ Config config = new MapConfig(map);
+ KafkaConsumerConfig consumerConfig =
+ KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, TEST_SYSTEM, TEST_CLIENT_ID, Collections.emptyMap());
+ final KafkaConsumer<byte[], byte[]> kafkaConsumer = new MockKafkaConsumer(consumerConfig.originals());
+
+ MockKafkaSystmeCosumer newKafkaSystemConsumer =
+ new MockKafkaSystmeCosumer(kafkaConsumer, TEST_SYSTEM, config, TEST_CLIENT_ID,
+ new KafkaSystemConsumerMetrics(TEST_SYSTEM, new NoOpMetricsRegistry()), System::currentTimeMillis);
+
+ return newKafkaSystemConsumer;
+ }
+
+ @Test
+ public void testConfigValidations() {
+
+ final KafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+ consumer.start();
+ // should be no failures
+ }
+
+ @Test
+ public void testFetchThresholdShouldDivideEvenlyAmongPartitions() {
+ final KafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+ final int partitionsNum = 50;
+ for (int i = 0; i < partitionsNum; i++) {
+ consumer.register(new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(i)), "0");
+ }
+
+ consumer.start();
+
+ Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum, consumer.perPartitionFetchThreshold);
+ Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_BYTES) / 2 / partitionsNum,
+ consumer.perPartitionFetchThresholdBytes);
+ }
+
+ @Test
+ public void testConsumerRegisterOlderOffsetOfTheSamzaSSP() {
+
+ KafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+ SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+ SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+ SystemStreamPartition ssp2 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(2));
+
+ consumer.register(ssp0, "0");
+ consumer.register(ssp0, "5");
+ consumer.register(ssp1, "2");
+ consumer.register(ssp1, "3");
+ consumer.register(ssp2, "0");
+
+ assertEquals("0", consumer.topicPartitions2Offset.get(KafkaSystemConsumer.toTopicPartition(ssp0)));
+ assertEquals("2", consumer.topicPartitions2Offset.get(KafkaSystemConsumer.toTopicPartition(ssp1)));
+ assertEquals("0", consumer.topicPartitions2Offset.get(KafkaSystemConsumer.toTopicPartition(ssp2)));
+ }
+
+ @Test
+ public void testFetchThresholdBytes() {
+
+ SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+ SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+ int partitionsNum = 2;
+ int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size
+ int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 1; // fake size
+ int ime11Size = 20;
+ ByteArraySerializer bytesSerde = new ByteArraySerializer();
+ IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
+ bytesSerde.serialize("", "value0".getBytes()), ime0Size);
+ IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
+ bytesSerde.serialize("", "value1".getBytes()), ime1Size);
+ IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
+ bytesSerde.serialize("", "value11".getBytes()), ime11Size);
+ KafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+ consumer.register(ssp0, "0");
+ consumer.register(ssp1, "0");
+ consumer.start();
+ consumer.messageSink.addMessage(ssp0, ime0);
+ // queue for ssp0 should be full now, because we added message of size FETCH_THRESHOLD_MSGS/partitionsNum
+ Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp0));
+ consumer.messageSink.addMessage(ssp1, ime1);
+ // queue for ssp1 should be less then full now, because we added message of size (FETCH_THRESHOLD_MSGS/partitionsNum - 1)
+ Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
+ consumer.messageSink.addMessage(ssp1, ime11);
+ // queue for ssp1 should full now, because we added message of size 20 on top
+ Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
+
+ Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
+ Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
+ Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
+ Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
+ }
+
+ @Test
+ public void testFetchThresholdBytesDiabled() {
+ // Pass 0 as fetchThresholdByBytes, which disables checking for limit by size
+
+ SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+ SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+ int partitionsNum = 2;
+ int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size, upto the limit
+ int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 100; // fake size, below the limit
+ int ime11Size = 20;// event with the second message still below the size limit
+ ByteArraySerializer bytesSerde = new ByteArraySerializer();
+ IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
+ bytesSerde.serialize("", "value0".getBytes()), ime0Size);
+ IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
+ bytesSerde.serialize("", "value1".getBytes()), ime1Size);
+ IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
+ bytesSerde.serialize("", "value11".getBytes()), ime11Size);
+
+ // limit by number of messages 4/2 = 2 per partition
+ // limit by number of bytes - disabled
+ KafkaSystemConsumer consumer = setupConsumer("4", "0"); // should disable
+
+ consumer.register(ssp0, "0");
+ consumer.register(ssp1, "0");
+ consumer.start();
+ consumer.messageSink.addMessage(ssp0, ime0);
+ // should be full by size, but not full by number of messages (1 of 2)
+ Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp0));
+ consumer.messageSink.addMessage(ssp1, ime1);
+ // not full neither by size nor by messages
+ Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
+ consumer.messageSink.addMessage(ssp1, ime11);
+ // not full by size, but should be full by messages
+ Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
+
+ Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
+ Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
+ Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
+ Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
+ }
+
+ // mock kafkaConsumer and SystemConsumer
+ static class MockKafkaConsumer extends KafkaConsumer {
+ public MockKafkaConsumer(Map<String, Object> configs) {
+ super(configs);
+ }
+ }
+
+ static class MockKafkaSystmeCosumer extends KafkaSystemConsumer {
+ public MockKafkaSystmeCosumer(Consumer kafkaConsumer, String systemName, Config config, String clientId,
+ KafkaSystemConsumerMetrics metrics, Clock clock) {
+ super(kafkaConsumer, systemName, config, clientId, metrics, clock);
+ }
+
+ //@Override
+ //void createConsumerProxy() {
+ // this.messageSink = new KafkaConsumerMessageSink();
+ //}
+
+ @Override
+ void startConsumer() {
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
deleted file mode 100644
index fb7533b..0000000
--- a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- */
-
-package org.apache.samza.system.kafka;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.kafka.clients.consumer.Consumer;
-import org.apache.kafka.clients.consumer.ConsumerConfig;
-import org.apache.kafka.clients.consumer.KafkaConsumer;
-import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
-import org.apache.kafka.common.serialization.ByteArraySerializer;
-import org.apache.samza.Partition;
-import org.apache.samza.config.Config;
-import org.apache.samza.config.KafkaConfig;
-import org.apache.samza.config.MapConfig;
-import org.apache.samza.system.IncomingMessageEnvelope;
-import org.apache.samza.system.SystemStreamPartition;
-import org.apache.samza.util.Clock;
-import org.apache.samza.util.NoOpMetricsRegistry;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import static org.junit.Assert.*;
-
-
-public class TestNewKafkaSystemConsumer {
- public final String TEST_SYSTEM = "test-system";
- public final String TEST_STREAM = "test-stream";
- public final String TEST_CLIENT_ID = "testClientId";
- public final String BOOTSTRAP_SERVER = "127.0.0.1:8888";
- public final String FETCH_THRESHOLD_MSGS = "50000";
- public final String FETCH_THRESHOLD_BYTES = "100000";
-
- @Before
- public void setUp() {
-
- }
-
- private NewKafkaSystemConsumer setupConsumer(String fetchMsg, String fetchBytes) {
- final Map<String, String> map = new HashMap<>();
-
- map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD(), TEST_SYSTEM), fetchMsg);
- map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD_BYTES(), TEST_SYSTEM), fetchBytes);
- map.put(String.format("systems.%s.consumer.%s", TEST_SYSTEM, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG),
- BOOTSTRAP_SERVER);
-
- Config config = new MapConfig(map);
- KafkaConsumerConfig consumerConfig =
- KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, TEST_SYSTEM, TEST_CLIENT_ID, Collections.emptyMap());
- final KafkaConsumer<byte[], byte[]> kafkaConsumer = new MockKafkaConsumer(consumerConfig.originals());
-
- MockNewKafkaSystmeCosumer newKafkaSystemConsumer =
- new MockNewKafkaSystmeCosumer(kafkaConsumer, TEST_SYSTEM, config, TEST_CLIENT_ID,
- new KafkaSystemConsumerMetrics(TEST_SYSTEM, new NoOpMetricsRegistry()), System::currentTimeMillis);
-
- return newKafkaSystemConsumer;
- }
-
- @Test
- public void testConfigValidations() {
-
- final NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
-
- consumer.start();
- // should be no failures
- }
-
- @Test
- public void testFetchThresholdShouldDivideEvenlyAmongPartitions() {
- final NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
- final int partitionsNum = 50;
- for (int i = 0; i < partitionsNum; i++) {
- consumer.register(new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(i)), "0");
- }
-
- consumer.start();
-
- Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum, consumer.perPartitionFetchThreshold);
- Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_BYTES) / 2 / partitionsNum,
- consumer.perPartitionFetchThresholdBytes);
- }
-
- @Test
- public void testConsumerRegisterOlderOffsetOfTheSamzaSSP() {
-
- NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
-
- SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
- SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
- SystemStreamPartition ssp2 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(2));
-
- consumer.register(ssp0, "0");
- consumer.register(ssp0, "5");
- consumer.register(ssp1, "2");
- consumer.register(ssp1, "3");
- consumer.register(ssp2, "0");
-
- assertEquals("0", consumer.topicPartitions2Offset.get(NewKafkaSystemConsumer.toTopicPartition(ssp0)));
- assertEquals("2", consumer.topicPartitions2Offset.get(NewKafkaSystemConsumer.toTopicPartition(ssp1)));
- assertEquals("0", consumer.topicPartitions2Offset.get(NewKafkaSystemConsumer.toTopicPartition(ssp2)));
- }
-
- @Test
- public void testFetchThresholdBytes() {
-
- SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
- SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
- int partitionsNum = 2;
- int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size
- int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 1; // fake size
- int ime11Size = 20;
- ByteArraySerializer bytesSerde = new ByteArraySerializer();
- IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
- bytesSerde.serialize("", "value0".getBytes()), ime0Size);
- IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
- bytesSerde.serialize("", "value1".getBytes()), ime1Size);
- IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
- bytesSerde.serialize("", "value11".getBytes()), ime11Size);
- NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
-
- consumer.register(ssp0, "0");
- consumer.register(ssp1, "0");
- consumer.start();
- consumer.messageSink.addMessage(ssp0, ime0);
- // queue for ssp0 should be full now, because we added message of size FETCH_THRESHOLD_MSGS/partitionsNum
- Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp0));
- consumer.messageSink.addMessage(ssp1, ime1);
- // queue for ssp1 should be less then full now, because we added message of size (FETCH_THRESHOLD_MSGS/partitionsNum - 1)
- Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
- consumer.messageSink.addMessage(ssp1, ime11);
- // queue for ssp1 should full now, because we added message of size 20 on top
- Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
-
- Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
- Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
- Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
- Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
- }
-
- @Test
- public void testFetchThresholdBytesDiabled() {
- // Pass 0 as fetchThresholdByBytes, which disables checking for limit by size
-
- SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
- SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
- int partitionsNum = 2;
- int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size, upto the limit
- int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 100; // fake size, below the limit
- int ime11Size = 20;// event with the second message still below the size limit
- ByteArraySerializer bytesSerde = new ByteArraySerializer();
- IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
- bytesSerde.serialize("", "value0".getBytes()), ime0Size);
- IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
- bytesSerde.serialize("", "value1".getBytes()), ime1Size);
- IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
- bytesSerde.serialize("", "value11".getBytes()), ime11Size);
-
- // limit by number of messages 4/2 = 2 per partition
- // limit by number of bytes - disabled
- NewKafkaSystemConsumer consumer = setupConsumer("4", "0"); // should disable
-
- consumer.register(ssp0, "0");
- consumer.register(ssp1, "0");
- consumer.start();
- consumer.messageSink.addMessage(ssp0, ime0);
- // should be full by size, but not full by number of messages (1 of 2)
- Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp0));
- consumer.messageSink.addMessage(ssp1, ime1);
- // not full neither by size nor by messages
- Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
- consumer.messageSink.addMessage(ssp1, ime11);
- // not full by size, but should be full by messages
- Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
-
- Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
- Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
- Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
- Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
- }
-
- // mock kafkaConsumer and SystemConsumer
- static class MockKafkaConsumer extends KafkaConsumer {
- public MockKafkaConsumer(Map<String, Object> configs) {
- super(configs);
- }
- }
-
- static class MockNewKafkaSystmeCosumer extends NewKafkaSystemConsumer {
- public MockNewKafkaSystmeCosumer(Consumer kafkaConsumer, String systemName, Config config, String clientId,
- KafkaSystemConsumerMetrics metrics, Clock clock) {
- super(kafkaConsumer, systemName, config, clientId, metrics, clock);
- }
-
- @Override
- void createConsumerProxy() {
- this.messageSink = new KafkaConsumerMessageSink();
- }
-
- @Override
- void startConsumer() {
- }
- }
-}
[40/47] samza git commit: Merge branch 'master' into NewConsumer2
Posted by bo...@apache.org.
Merge branch 'master' into NewConsumer2
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/f81cf148
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/f81cf148
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/f81cf148
Branch: refs/heads/NewKafkaSystemConsumer
Commit: f81cf1489c35ba52ef215d414ecf860689bd86a8
Parents: ddada94 952dbbe
Author: Boris S <bo...@apache.org>
Authored: Mon Sep 10 19:06:52 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Mon Sep 10 19:06:52 2018 -0700
----------------------------------------------------------------------
samza-shell/src/main/bash/run-class.sh | 12 ++-
.../job/yarn/YarnClusterResourceManager.java | 41 ++++++----
.../yarn/TestYarnClusterResourceManager.java | 81 ++++++++++++++++++++
3 files changed, 116 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
[47/47] samza git commit: Merge branch 'NewConsumer2' of
https://github.com/sborya/samza into NewKafkaSystemConsumer
Posted by bo...@apache.org.
Merge branch 'NewConsumer2' of https://github.com/sborya/samza into NewKafkaSystemConsumer
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/36159631
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/36159631
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/36159631
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 361596317a990a047282669ab93c1a1eb7810ae5
Parents: 40f7430 74b6cfa
Author: Boris S <bs...@linkedin.com>
Authored: Tue Sep 18 15:56:52 2018 -0700
Committer: Boris S <bs...@linkedin.com>
Committed: Tue Sep 18 15:56:52 2018 -0700
----------------------------------------------------------------------
.../samza/system/IncomingMessageEnvelope.java | 3 +-
.../ClusterBasedJobCoordinator.java | 2 +-
.../apache/samza/storage/StorageRecovery.java | 2 +-
.../samza/checkpoint/CheckpointTool.scala | 2 +-
.../apache/samza/container/SamzaContainer.scala | 2 +-
.../samza/coordinator/JobModelManager.scala | 6 +-
.../samza/job/local/ProcessJobFactory.scala | 3 +-
.../samza/job/local/ThreadJobFactory.scala | 20 +-
.../samza/coordinator/TestJobCoordinator.scala | 4 +-
.../clients/consumer/KafkaConsumerConfig.java | 194 ++++++++
.../org/apache/samza/config/KafkaConfig.scala | 5 +-
.../apache/samza/system/kafka/BrokerProxy.scala | 332 --------------
.../samza/system/kafka/KafkaConsumerProxy.java | 456 +++++++++++++++++++
.../samza/system/kafka/KafkaSystemConsumer.java | 391 ++++++++++++++++
.../system/kafka/KafkaSystemConsumer.scala | 309 -------------
.../kafka/KafkaSystemConsumerMetrics.scala | 68 ++-
.../samza/system/kafka/KafkaSystemFactory.scala | 81 ++--
.../consumer/TestKafkaConsumerConfig.java | 137 ++++++
.../samza/system/kafka/TestBrokerProxy.scala | 434 ------------------
.../system/kafka/TestKafkaSystemConsumer.java | 224 +++++++++
.../system/kafka/TestKafkaSystemConsumer.scala | 191 --------
.../test/integration/StreamTaskTestUtil.scala | 17 +-
.../integration/TestShutdownStatefulTask.scala | 4 +-
.../samza/validation/YarnJobValidationTool.java | 2 +-
.../yarn/TestSamzaYarnAppMasterService.scala | 4 +-
25 files changed, 1511 insertions(+), 1382 deletions(-)
----------------------------------------------------------------------
[05/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/010fa168
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/010fa168
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/010fa168
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 010fa168ee2a290b93f5a3b0908709b2c19044ec
Parents: bbffb79 e6049b7
Author: Boris S <bo...@apache.org>
Authored: Tue Oct 24 18:33:03 2017 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Oct 24 18:33:03 2017 -0700
----------------------------------------------------------------------
.../samza/system/kafka/KafkaStreamSpec.java | 9 +++
.../kafka/KafkaCheckpointManagerFactory.scala | 21 ++-----
.../org/apache/samza/config/KafkaConfig.scala | 37 +++++++++++-
.../samza/system/kafka/KafkaSystemAdmin.scala | 9 ++-
.../samza/system/kafka/KafkaSystemFactory.scala | 22 ++++++-
.../TestKafkaCheckpointManagerFactory.java | 51 +++++++++++++++++
.../kafka/TestKafkaSystemFactoryJava.java | 60 ++++++++++++++++++++
.../kafka/TestKafkaCheckpointManager.scala | 6 +-
.../apache/samza/config/TestKafkaConfig.scala | 13 +++++
9 files changed, 204 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
[34/47] samza git commit: cleanup
Posted by bo...@apache.org.
cleanup
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/0b6768f8
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/0b6768f8
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/0b6768f8
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 0b6768f803db12bf433d96b832c95fa228f6e7ca
Parents: f14d608
Author: Boris S <bo...@apache.org>
Authored: Wed Sep 5 14:39:08 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Sep 5 14:39:08 2018 -0700
----------------------------------------------------------------------
.../org/apache/samza/coordinator/JobModelManager.scala | 2 +-
.../kafka/clients/consumer/KafkaConsumerConfig.java | 10 +++++-----
.../org/apache/samza/system/kafka/KafkaConsumerProxy.java | 10 +++++-----
.../apache/samza/system/kafka/KafkaSystemFactory.scala | 4 ++--
4 files changed, 13 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/0b6768f8/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala b/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
index f7ffd4e..f95a521 100644
--- a/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
+++ b/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
@@ -64,7 +64,7 @@ object JobModelManager extends Logging {
* a) Reads the jobModel from coordinator stream using the job's configuration.
* b) Recomputes changelog partition mapping based on jobModel and job's configuration.
* c) Builds JobModelManager using the jobModel read from coordinator stream.
- * @param config Coordinator stream manager config
+ * @param config Coordinator stream manager config.
* @param changelogPartitionMapping The changelog partition-to-task mapping.
* @return JobModelManager
*/
http://git-wip-us.apache.org/repos/asf/samza/blob/0b6768f8/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index 843e03d..98792ab 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -129,17 +129,17 @@ public class KafkaConsumerConfig extends ConsumerConfig {
}
// client id should be unique per job
- public static String getClientId(Config config) {
- return getClientId(CONSUMER_CLIENT_ID_PREFIX, config);
+ public static String getConsumerClientId(Config config) {
+ return getConsumerClientId(CONSUMER_CLIENT_ID_PREFIX, config);
}
public static String getProducerClientId(Config config) {
- return getClientId(PRODUCER_CLIENT_ID_PREFIX, config);
+ return getConsumerClientId(PRODUCER_CLIENT_ID_PREFIX, config);
}
public static String getAdminClientId(Config config) {
- return getClientId(ADMIN_CLIENT_ID_PREFIX, config);
+ return getConsumerClientId(ADMIN_CLIENT_ID_PREFIX, config);
}
- private static String getClientId(String id, Config config) {
+ private static String getConsumerClientId(String id, Config config) {
if (config.get(JobConfig.JOB_NAME()) == null) {
throw new ConfigException("Missing job name");
}
http://git-wip-us.apache.org/repos/asf/samza/blob/0b6768f8/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 5c79017..ae80d50 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -85,7 +85,6 @@ public class KafkaConsumerProxy<K, V> {
this.metricName = metricName;
this.clientId = clientId;
- // TODO - see if we need new metrics (not host:port based)
this.kafkaConsumerMetrics.registerClientProxy(metricName);
consumerPollThread = new Thread(createProxyThreadRunnable());
@@ -133,18 +132,17 @@ public class KafkaConsumerProxy<K, V> {
* creates a separate thread for pulling messages
*/
private Runnable createProxyThreadRunnable() {
- return () -> {
+ Runnable runnable= () -> {
isRunning = true;
try {
consumerPollThreadStartLatch.countDown();
- System.out.println("THREAD: runing " + consumerPollThread.getName());
+ LOG.info("Starting runnable " + consumerPollThread.getName());
initializeLags();
while (isRunning) {
fetchMessages();
}
- System.out.println("THREAD: finished " + consumerPollThread.getName());
} catch (Throwable throwable) {
LOG.error(String.format("Error in KafkaConsumerProxy poll thread for system: %s.", systemName), throwable);
// SamzaKafkaSystemConsumer uses the failureCause to propagate the throwable to the container
@@ -156,6 +154,8 @@ public class KafkaConsumerProxy<K, V> {
LOG.info("Stopping the KafkaConsumerProxy poll thread for system: {}.", systemName);
}
};
+
+ return runnable;
}
private void initializeLags() {
@@ -433,7 +433,7 @@ public class KafkaConsumerProxy<K, V> {
}
public void stop(long timeout) {
- System.out.println("THREAD: Shutting down KafkaConsumerProxy poll thread:" + consumerPollThread.getName());
+ LOG.info("Shutting down KafkaConsumerProxy poll thread:" + consumerPollThread.getName());
isRunning = false;
try {
http://git-wip-us.apache.org/repos/asf/samza/blob/0b6768f8/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index 892d400..6f58bed 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -47,7 +47,7 @@ object KafkaSystemFactory extends Logging {
class KafkaSystemFactory extends SystemFactory with Logging {
def getConsumer(systemName: String, config: Config, registry: MetricsRegistry): SystemConsumer = {
- val clientId = KafkaConsumerConfig.getClientId( config)
+ val clientId = KafkaConsumerConfig.getConsumerClientId( config)
val metrics = new KafkaSystemConsumerMetrics(systemName, registry)
NewKafkaSystemConsumer.getNewKafkaSystemConsumer(
@@ -76,7 +76,7 @@ class KafkaSystemFactory extends SystemFactory with Logging {
}
def getAdmin(systemName: String, config: Config): SystemAdmin = {
- val clientId = KafkaConsumerConfig.getClientId(config)
+ val clientId = KafkaConsumerConfig.getConsumerClientId(config)
val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId)
val bootstrapServers = producerConfig.bootsrapServers
val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)
[12/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/88f85595
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/88f85595
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/88f85595
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 88f855954e4d98aa3bdd3a3a778eb699a13ff659
Parents: 0edf343 8ce1bd5
Author: Boris S <bo...@apache.org>
Authored: Thu Aug 2 13:29:58 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Thu Aug 2 13:29:58 2018 -0700
----------------------------------------------------------------------
.travis.yml | 6 +-
README.md | 8 +-
RELEASE.md | 20 +
bin/integration-tests.sh | 17 +-
build.gradle | 2 +-
docs/community/committers.md | 5 +
.../versioned/jobs/configuration-table.html | 20 +
.../org/apache/samza/metrics/ListGauge.java | 143 +++++++
.../apache/samza/metrics/MetricsRegistry.java | 9 +
.../apache/samza/metrics/MetricsVisitor.java | 7 +-
.../ReadableMetricsRegistryListener.java | 2 +
.../samza/operators/functions/MapFunction.java | 2 +-
.../apache/samza/runtime/ApplicationRunner.java | 22 --
.../org/apache/samza/sql/SamzaSqlRelRecord.java | 113 ++++++
.../samza/storage/SideInputsProcessor.java | 46 +++
.../storage/SideInputsProcessorFactory.java | 45 +++
.../samza/system/ExtendedSystemAdmin.java | 6 +-
.../samza/system/IncomingMessageEnvelope.java | 9 +
.../org/apache/samza/system/StreamSpec.java | 56 +--
.../org/apache/samza/system/SystemAdmin.java | 31 ++
.../samza/table/TableDescriptorsProvider.java | 100 +++++
.../org/apache/samza/table/TableProvider.java | 3 +-
.../java/org/apache/samza/table/TableSpec.java | 44 ++-
.../apache/samza/util/NoOpMetricsRegistry.java | 7 +
.../org/apache/samza/util/TimestampedValue.java | 61 +++
.../org/apache/samza/metrics/TestListGauge.java | 99 +++++
.../org/apache/samza/metrics/TestTimer.java | 5 +-
.../apache/samza/sql/TestSamzaSqlRelRecord.java | 43 ++
.../apache/samza/system/TestSystemAdmin.java | 116 ++++++
.../samza/system/eventhub/EventHubConfig.java | 40 ++
.../consumer/EventHubSystemConsumer.java | 129 ++++--
.../MockEventHubClientManagerFactory.java | 4 +
.../system/eventhub/TestMetricsRegistry.java | 16 +-
.../consumer/TestEventHubSystemConsumer.java | 94 +++++
.../AbstractContainerAllocator.java | 5 +
.../HostAwareContainerAllocator.java | 10 +
.../clustermanager/SamzaApplicationState.java | 8 +
.../samza/config/InMemorySystemConfig.java | 52 +++
.../apache/samza/config/JavaStorageConfig.java | 47 +++
.../org/apache/samza/config/TaskConfigJava.java | 3 +-
.../samza/container/SamzaContainerListener.java | 9 +-
.../apache/samza/container/TaskContextImpl.java | 15 +-
.../samza/execution/ExecutionPlanner.java | 22 +-
.../org/apache/samza/execution/JobGraph.java | 18 +-
.../samza/execution/JobGraphJsonGenerator.java | 12 +-
.../org/apache/samza/execution/JobNode.java | 67 ++--
.../org/apache/samza/execution/StreamEdge.java | 34 +-
.../apache/samza/execution/StreamManager.java | 19 +-
.../executors/KeyBasedExecutorService.java | 174 +++++++++
.../org/apache/samza/metrics/MetricGroup.java | 4 +
.../samza/operators/OperatorSpecGraph.java | 15 +-
.../apache/samza/operators/StreamGraphSpec.java | 79 ++--
.../functions/PartialJoinFunction.java | 2 +-
.../operators/impl/BroadcastOperatorImpl.java | 4 +-
.../samza/operators/impl/OperatorImplGraph.java | 64 +--
.../operators/impl/OutputOperatorImpl.java | 4 +-
.../operators/impl/PartialJoinOperatorImpl.java | 2 +-
.../operators/impl/PartitionByOperatorImpl.java | 11 +-
.../operators/impl/WindowOperatorImpl.java | 2 +-
.../operators/impl/store/TimeSeriesStore.java | 2 +
.../impl/store/TimeSeriesStoreImpl.java | 1 +
.../operators/impl/store/TimestampedValue.java | 61 ---
.../impl/store/TimestampedValueSerde.java | 1 +
.../samza/operators/spec/InputOperatorSpec.java | 12 +-
.../samza/operators/spec/JoinOperatorSpec.java | 2 +-
.../samza/operators/spec/OperatorSpecs.java | 7 +-
.../samza/operators/spec/OutputStreamImpl.java | 18 +-
.../stream/IntermediateMessageStreamImpl.java | 7 +-
.../apache/samza/processor/StreamProcessor.java | 281 ++++++++-----
.../runtime/AbstractApplicationRunner.java | 111 ++----
.../samza/runtime/ApplicationRunnerMain.java | 10 -
.../samza/runtime/LocalApplicationRunner.java | 36 +-
.../samza/runtime/LocalContainerRunner.java | 150 +++----
.../samza/runtime/RemoteApplicationRunner.java | 34 +-
.../standalone/PassthroughJobCoordinator.java | 4 +
.../samza/storage/ChangelogStreamManager.java | 4 +-
.../samza/storage/StorageManagerUtil.java | 142 +++++++
.../apache/samza/storage/StorageRecovery.java | 14 +-
.../storage/TaskSideInputStorageManager.java | 375 ++++++++++++++++++
.../system/inmemory/InMemorySystemFactory.java | 15 +-
.../samza/table/TableConfigGenerator.java | 143 +++++++
.../samza/table/caching/CachingTable.java | 39 +-
.../table/caching/CachingTableProvider.java | 4 +-
.../table/caching/guava/GuavaCacheTable.java | 12 +-
.../caching/guava/GuavaCacheTableProvider.java | 1 +
.../table/remote/RemoteReadWriteTable.java | 39 +-
.../samza/table/remote/RemoteReadableTable.java | 20 +-
.../table/utils/DefaultTableReadMetrics.java | 55 +++
.../table/utils/DefaultTableWriteMetrics.java | 63 +++
.../samza/table/utils/TableMetricsUtil.java | 101 +++++
.../apache/samza/task/SystemTimerScheduler.java | 1 +
.../org/apache/samza/testUtils/TestClock.java | 45 +++
.../java/org/apache/samza/util/StreamUtil.java | 90 +++++
.../samza/zk/ZkBarrierForVersionUpgrade.java | 19 +-
.../java/org/apache/samza/zk/ZkController.java | 39 --
.../org/apache/samza/zk/ZkControllerImpl.java | 163 --------
.../apache/samza/zk/ZkControllerListener.java | 37 --
.../org/apache/samza/zk/ZkJobCoordinator.java | 172 +++++---
.../org/apache/samza/zk/ZkLeaderElector.java | 17 +-
.../main/java/org/apache/samza/zk/ZkUtils.java | 72 ++--
.../apache/samza/checkpoint/OffsetManager.scala | 6 +-
.../org/apache/samza/config/JobConfig.scala | 13 +
.../org/apache/samza/config/MetricsConfig.scala | 3 +
.../org/apache/samza/config/StorageConfig.scala | 17 +-
.../org/apache/samza/config/StreamConfig.scala | 2 +-
.../org/apache/samza/config/TaskConfig.scala | 8 +-
.../org/apache/samza/container/RunLoop.scala | 5 +-
.../apache/samza/container/SamzaContainer.scala | 142 +++++--
.../samza/container/SamzaContainerMetrics.scala | 3 +
.../apache/samza/container/TaskInstance.scala | 140 +++++--
.../diagnostics/DiagnosticsExceptionEvent.java | 90 +++++
.../samza/job/local/ThreadJobFactory.scala | 6 +-
.../ContainerProcessManagerMetrics.scala | 22 +-
.../apache/samza/metrics/MetricsHelper.scala | 6 +-
.../samza/metrics/MetricsRegistryMap.scala | 15 +
.../samza/metrics/reporter/JmxReporter.scala | 29 +-
.../apache/samza/metrics/reporter/Metrics.scala | 9 +-
.../samza/metrics/reporter/MetricsHeader.scala | 3 +
.../reporter/MetricsSnapshotReporter.scala | 67 +++-
.../MetricsSnapshotReporterFactory.scala | 11 +-
.../serializers/MetricsSnapshotSerdeV2.java | 75 ++++
.../MetricsSnapshotSerdeV2Factory.java | 31 ++
.../samza/storage/TaskStorageManager.scala | 117 +-----
.../apache/samza/system/SSPMetadataCache.java | 126 ++++++
.../system/chooser/BootstrappingChooser.scala | 35 +-
.../scala/org/apache/samza/util/FileUtil.scala | 21 +-
.../org/apache/samza/util/ScalaJavaUtil.scala | 12 +
.../main/scala/org/apache/samza/util/Util.scala | 22 --
.../TestHostAwareContainerAllocator.java | 10 +-
.../MockCoordinatorStreamSystemFactory.java | 8 +-
.../samza/execution/TestExecutionPlanner.java | 42 +-
.../apache/samza/execution/TestJobGraph.java | 28 +-
.../execution/TestJobGraphJsonGenerator.java | 45 +--
.../org/apache/samza/execution/TestJobNode.java | 17 +-
.../apache/samza/execution/TestStreamEdge.java | 16 +-
.../executors/TestKeyBasedExecutorService.java | 84 ++++
.../metrics/TestMetricsSnapshotReporter.java | 115 ++++++
.../samza/operators/TestJoinOperator.java | 11 +-
.../samza/operators/TestOperatorSpecGraph.java | 23 +-
.../samza/operators/TestStreamGraphSpec.java | 336 ++++++----------
.../operators/impl/TestOperatorImplGraph.java | 274 +++++++------
.../operators/impl/TestWindowOperator.java | 16 +-
.../impl/store/TestTimeSeriesStoreImpl.java | 1 +
.../impl/store/TestTimestampedValueSerde.java | 1 +
.../operators/spec/OperatorSpecTestUtils.java | 14 +-
.../samza/operators/spec/TestOperatorSpec.java | 18 +-
.../spec/TestPartitionByOperatorSpec.java | 14 +-
.../samza/processor/TestStreamProcessor.java | 162 +++++++-
.../runtime/TestAbstractApplicationRunner.java | 391 -------------------
.../runtime/TestApplicationRunnerMain.java | 11 +-
.../runtime/TestLocalApplicationRunner.java | 22 +-
.../serializers/TestMetricsSnapshotSerdeV2.java | 69 ++++
.../samza/system/TestSSPMetadataCache.java | 319 +++++++++++++++
.../samza/table/caching/TestCachingTable.java | 18 +-
.../org/apache/samza/task/TestAsyncRunLoop.java | 2 +-
.../apache/samza/task/TestTaskFactoryUtil.java | 5 +-
.../apache/samza/testUtils/StreamTestUtils.java | 39 ++
.../org/apache/samza/testUtils/TestClock.java | 45 ---
.../org/apache/samza/util/TestStreamUtil.java | 337 ++++++++++++++++
.../apache/samza/zk/TestZkJobCoordinator.java | 19 +-
.../java/org/apache/samza/zk/TestZkUtils.java | 21 +-
.../factories/TestPropertiesConfigFactory.scala | 3 +-
.../samza/container/TestSamzaContainer.scala | 44 ++-
.../samza/container/TestTaskInstance.scala | 5 +-
.../org/apache/samza/job/TestJobRunner.scala | 11 +-
.../serializers/TestMetricsSnapshotSerde.scala | 3 +-
.../samza/storage/TestTaskStorageManager.scala | 154 ++++----
.../org/apache/samza/util/TestFileUtil.scala | 22 ++
.../hdfs/TestHdfsSystemProducerTestSuite.scala | 5 +-
.../samza/system/kafka/KafkaStreamSpec.java | 15 +-
.../org/apache/samza/config/KafkaConfig.scala | 4 +-
.../samza/config/RegExTopicGenerator.scala | 8 +-
.../apache/samza/system/kafka/BrokerProxy.scala | 8 +-
.../samza/system/kafka/KafkaSystemAdmin.scala | 4 +-
.../system/kafka/KafkaSystemConsumer.scala | 25 +-
.../kafka/KafkaSystemConsumerMetrics.scala | 6 +-
.../util/ClientUtilTopicMetadataStore.scala | 3 +
.../scala/org/apache/samza/util/KafkaUtil.scala | 6 +-
.../samza/system/kafka/TestKafkaStreamSpec.java | 6 +-
.../kafka/TestKafkaCheckpointManager.scala | 4 +-
.../system/kafka/TestKafkaSystemAdmin.scala | 2 +-
.../system/kafka/TestKafkaSystemConsumer.scala | 2 +-
.../kv/inmemory/InMemoryTableDescriptor.java | 3 +-
.../storage/kv/RocksDbTableDescriptor.java | 3 +-
.../samza/storage/kv/RocksDbKeyValueStore.scala | 9 +-
.../kv/BaseLocalStoreBackedTableDescriptor.java | 21 +
.../kv/BaseLocalStoreBackedTableProvider.java | 12 +
.../kv/LocalStoreBackedReadWriteTable.java | 33 +-
.../kv/LocalStoreBackedReadableTable.java | 33 +-
.../TestLocalBaseStoreBackedTableProvider.java | 6 +-
.../log4j/SimpleDiagnosticsAppender.java | 101 +++++
.../apache/samza/rest/SamzaRestApplication.java | 1 -
.../org/apache/samza/rest/SamzaRestService.java | 15 +-
.../apache/samza/sql/avro/AvroRelConverter.java | 17 +-
.../samza/sql/data/SamzaSqlRelMessage.java | 85 +---
.../sql/runner/SamzaSqlApplicationRunner.java | 2 -
.../SamzaSqlRelMessageSerdeFactory.java | 2 +-
.../SamzaSqlRelRecordSerdeFactory.java | 18 +-
.../samza/sql/TestSamzaSqlRelMessageSerde.java | 102 -----
.../samza/sql/TestSamzaSqlRelRecordSerde.java | 86 ----
.../samza/sql/avro/TestAvroRelConversion.java | 17 +-
.../samza/sql/avro/schemas/ComplexRecord.java | 2 +-
.../apache/samza/sql/avro/schemas/MyFixed.java | 5 +-
.../samza/sql/data/TestSamzaSqlRelMessage.java | 18 +
.../TestSamzaSqlRelMessageSerde.java | 102 +++++
.../serializers/TestSamzaSqlRelRecordSerde.java | 85 ++++
.../samza/sql/system/TestAvroSystemFactory.java | 9 +
.../sql/translator/TestQueryTranslator.java | 235 ++++++-----
.../config/standalone.failure.test.properties | 45 +++
.../test/framework/MessageStreamAssert.java | 192 +++++++++
.../samza/test/framework/StreamAssert.java | 220 ++++-------
.../apache/samza/test/framework/TestRunner.java | 367 +++++++++++++++++
.../test/framework/stream/CollectionStream.java | 204 ++++++++++
.../system/CollectionStreamSystemSpec.java | 79 ++++
.../integration/LocalApplicationRunnerMain.java | 63 +++
.../test/integration/NegateNumberTask.java | 4 +-
.../TestStandaloneIntegrationApplication.java | 42 ++
samza-test/src/main/python/configs/kafka.json | 2 +-
samza-test/src/main/python/deployment.py | 4 +-
samza-test/src/main/python/requirements.txt | 1 +
.../src/main/python/standalone_deployment.py | 123 ++++++
.../main/python/standalone_integration_tests.py | 29 ++
samza-test/src/main/python/stream_processor.py | 121 ++++++
.../python/tests/standalone_failure_tests.py | 311 +++++++++++++++
samza-test/src/main/python/tests/zk_client.py | 129 ++++++
.../test/performance/TestPerformanceTask.scala | 4 +-
.../samza/processor/TestZkStreamProcessor.java | 4 +-
.../TestZkStreamProcessorFailures.java | 4 +-
.../processor/TestZkStreamProcessorSession.java | 4 +-
.../AsyncStreamTaskIntegrationTest.java | 144 +++++++
.../test/framework/BroadcastAssertApp.java | 58 +++
.../samza/test/framework/MyAsyncStreamTask.java | 67 ++++
.../samza/test/framework/MyStreamTestTask.java | 38 ++
.../StreamApplicationIntegrationTest.java | 132 +++++++
...StreamApplicationIntegrationTestHarness.java | 302 ++++++++++++++
.../framework/StreamTaskIntegrationTest.java | 138 +++++++
.../samza/test/framework/TestTimerApp.java | 86 ++++
.../apache/samza/test/framework/TimerTest.java | 50 +++
.../samza/test/operator/BroadcastAssertApp.java | 59 ---
.../test/operator/RepartitionJoinWindowApp.java | 13 +-
...StreamApplicationIntegrationTestHarness.java | 277 -------------
.../operator/TestRepartitionJoinWindowApp.java | 13 +-
.../test/operator/TestRepartitionWindowApp.java | 1 +
.../processor/TestZkLocalApplicationRunner.java | 145 +++++--
.../test/samzasql/TestSamzaSqlEndToEnd.java | 5 +-
.../table/TestTableDescriptorsProvider.java | 164 ++++++++
.../apache/samza/test/timer/TestTimerApp.java | 87 -----
.../org/apache/samza/test/timer/TimerTest.java | 51 ---
.../tools/json/JsonRelConverterFactory.java | 14 +-
.../org/apache/samza/config/YarnConfig.java | 56 ++-
.../job/yarn/YarnClusterResourceManager.java | 7 +
.../apache/samza/job/yarn/ClientHelper.scala | 7 +-
.../webapp/ApplicationMasterRestServlet.scala | 3 +
253 files changed, 9637 insertions(+), 3594 deletions(-)
----------------------------------------------------------------------
[25/47] samza git commit: added JobModelManager to ThreadJob
Posted by bo...@apache.org.
added JobModelManager to ThreadJob
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/22034947
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/22034947
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/22034947
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 22034947b998d3604bc3911a417b9c1e761bb90f
Parents: c14557f
Author: Boris S <bo...@apache.org>
Authored: Fri Aug 31 14:36:51 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Aug 31 14:36:51 2018 -0700
----------------------------------------------------------------------
.../stream/CoordinatorStreamSystemConsumer.java | 4 +-
.../org/apache/samza/job/local/ThreadJob.scala | 5 +-
.../samza/job/local/ThreadJobFactory.scala | 2 +-
.../apache/samza/job/local/TestThreadJob.scala | 9 ++
.../system/kafka/NewKafkaSystemConsumer.java | 121 +++++++++----------
.../integration/TestShutdownStatefulTask.scala | 4 +-
6 files changed, 75 insertions(+), 70 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/22034947/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java b/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
index 38255a2..0bdb874 100644
--- a/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
+++ b/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
@@ -176,7 +176,7 @@ public class CoordinatorStreamSystemConsumer {
valueMap = messageSerde.fromBytes((byte[]) envelope.getMessage());
}
CoordinatorStreamMessage coordinatorStreamMessage = new CoordinatorStreamMessage(keyArray, valueMap);
- log.debug("Received coordinator stream message: {}", coordinatorStreamMessage);
+ log.info("Received coordinator stream message: {}", coordinatorStreamMessage);
// Remove any existing entry. Set.add() does not add if the element already exists.
if (bootstrappedMessages.remove(coordinatorStreamMessage)) {
log.debug("Removed duplicate message: {}", coordinatorStreamMessage);
@@ -194,7 +194,7 @@ public class CoordinatorStreamSystemConsumer {
}
bootstrappedStreamSet = Collections.unmodifiableSet(bootstrappedMessages);
- log.debug("Bootstrapped configuration: {}", configMap);
+ log.info("Bootstrapped configuration: {}", configMap);
isBootstrapped = true;
} catch (Exception e) {
throw new SamzaException(e);
http://git-wip-us.apache.org/repos/asf/samza/blob/22034947/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
index a61a297..33dde52 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
@@ -19,11 +19,12 @@
package org.apache.samza.job.local
+import org.apache.samza.coordinator.JobModelManager
import org.apache.samza.job.ApplicationStatus.{New, Running, SuccessfulFinish, UnsuccessfulFinish}
import org.apache.samza.job.{ApplicationStatus, StreamJob}
import org.apache.samza.util.Logging
-class ThreadJob(runnable: Runnable) extends StreamJob with Logging {
+class ThreadJob(runnable: Runnable, val jobModelManager: JobModelManager) extends StreamJob with Logging {
@volatile var jobStatus: Option[ApplicationStatus] = None
var thread: Thread = null
@@ -43,6 +44,8 @@ class ThreadJob(runnable: Runnable) extends StreamJob with Logging {
jobStatus = Some(UnsuccessfulFinish)
throw e
}
+ } finally {
+ jobModelManager.stop
}
}
}
http://git-wip-us.apache.org/repos/asf/samza/blob/22034947/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
index 0b472aa..4b08721 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
@@ -110,7 +110,7 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
taskFactory)
container.setContainerListener(containerListener)
- val threadJob = new ThreadJob(container)
+ val threadJob = new ThreadJob(container, coordinator)
threadJob
} finally {
coordinator.stop
http://git-wip-us.apache.org/repos/asf/samza/blob/22034947/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala b/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
index 4f3f511..b1de215 100644
--- a/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
+++ b/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
@@ -19,6 +19,7 @@
package org.apache.samza.job.local
+import org.apache.samza.coordinator.JobModelManager
import org.junit.Assert._
import org.junit.Test
import org.apache.samza.job.ApplicationStatus
@@ -29,6 +30,10 @@ class TestThreadJob {
val job = new ThreadJob(new Runnable {
override def run {
}
+ }, new JobModelManager(null) {
+ override def stop: Unit = {
+
+ }
})
job.submit
job.waitForFinish(999999)
@@ -40,6 +45,10 @@ class TestThreadJob {
override def run {
Thread.sleep(999999)
}
+ }, new JobModelManager(null) {
+ override def stop: Unit = {
+
+ }
})
job.submit
job.waitForFinish(500)
http://git-wip-us.apache.org/repos/asf/samza/blob/22034947/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index b745628..e34812f 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -1,3 +1,4 @@
+
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
@@ -21,47 +22,38 @@
package org.apache.samza.system.kafka;
-import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.stream.Collectors;
import kafka.common.TopicAndPartition;
import org.apache.kafka.clients.consumer.Consumer;
-import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
import org.apache.kafka.common.TopicPartition;
-import org.apache.kafka.common.serialization.ByteArrayDeserializer;
-import org.apache.kafka.common.serialization.Deserializer;
import org.apache.samza.Partition;
import org.apache.samza.SamzaException;
import org.apache.samza.config.Config;
import org.apache.samza.config.KafkaConfig;
-import org.apache.samza.config.StreamConfig;
import org.apache.samza.system.IncomingMessageEnvelope;
import org.apache.samza.system.SystemConsumer;
-import org.apache.samza.system.SystemStream;
import org.apache.samza.system.SystemStreamPartition;
import org.apache.samza.util.BlockingEnvelopeMap;
import org.apache.samza.util.Clock;
-import org.apache.samza.util.KafkaUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Option;
-import scala.collection.JavaConversions;
-public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements SystemConsumer{
+public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements SystemConsumer {
private static final Logger LOG = LoggerFactory.getLogger(NewKafkaSystemConsumer.class);
private static final long FETCH_THRESHOLD = 50000;
private static final long FETCH_THRESHOLD_BYTES = -1L;
- private final Consumer<K,V> kafkaConsumer;
+ private final Consumer<K, V> kafkaConsumer;
private final String systemName;
private final KafkaSystemConsumerMetrics samzaConsumerMetrics;
private final String clientId;
@@ -78,8 +70,8 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
private KafkaConsumerProxy proxy;
/* package private */final Map<TopicPartition, String> topicPartitions2Offset = new HashMap<>();
- /* package private */long perPartitionFetchThreshold;
- /* package private */long perPartitionFetchThresholdBytes;
+ /* package private */ long perPartitionFetchThreshold;
+ /* package private */ long perPartitionFetchThresholdBytes;
// TODO - consider new class for KafkaSystemConsumerMetrics
@@ -88,15 +80,10 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
* @param config
* @param metrics
*/
- public NewKafkaSystemConsumer(
- Consumer<K,V> kafkaConsumer,
- String systemName,
- Config config,
- String clientId,
- KafkaSystemConsumerMetrics metrics,
- Clock clock) {
+ protected NewKafkaSystemConsumer(Consumer<K, V> kafkaConsumer, String systemName, Config config, String clientId,
+ KafkaSystemConsumerMetrics metrics, Clock clock) {
- super(metrics.registry(),clock, metrics.getClass().getName());
+ super(metrics.registry(), clock, metrics.getClass().getName());
this.samzaConsumerMetrics = metrics;
this.clientId = clientId;
@@ -109,26 +96,20 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
this.fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName);
LOG.info(String.format(
- "Created SamzaLiKafkaSystemConsumer for system=%s, clientId=%s, metricName=%s with liKafkaConsumer=%s",
- systemName, clientId, metricName, this.kafkaConsumer.toString()));
+ "Created SamzaKafkaSystemConsumer for system=%s, clientId=%s, metricName=%s with KafkaConsumer=%s", systemName,
+ clientId, metricName, this.kafkaConsumer.toString()));
}
- public static <K, V> NewKafkaSystemConsumer getNewKafkaSystemConsumer(
- String systemName,
- Config config,
- String clientId,
- KafkaSystemConsumerMetrics metrics,
- Clock clock) {
+ public static <K, V> NewKafkaSystemConsumer getNewKafkaSystemConsumer(String systemName, Config config,
+ String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) {
+
+ System.out.println("GETTING FOR " + systemName);
+ System.out.printf("RETURNING NEW ONE");
// extract consumer configs and create kafka consumer
KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
- return new NewKafkaSystemConsumer(kafkaConsumer,
- systemName,
- config,
- clientId,
- metrics,
- clock);
+ return new NewKafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
}
/**
@@ -146,7 +127,8 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
KafkaConsumerConfig consumerConfig =
KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
- LOG.info("==============>Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}", systemName, consumerConfig.originals());
+ LOG.info("==============>Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}",
+ systemName, consumerConfig.originals());
return new KafkaConsumer<>(consumerConfig.originals());
}
@@ -157,7 +139,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
LOG.warn("attempting to start the consumer for the second (or more) time.");
return;
}
- if(stopped.get()) {
+ if (stopped.get()) {
LOG.warn("attempting to start a stopped consumer");
return;
}
@@ -197,8 +179,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
messageSink = new KafkaConsumerMessageSink();
// create the thread with the consumer
- proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink,
- samzaConsumerMetrics, metricName);
+ proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, samzaConsumerMetrics, metricName);
LOG.info("==============>Created consumer proxy: " + proxy);
}
@@ -231,8 +212,10 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
proxy.addTopicPartition(topicPartitions2SSP.get(tp), startingOffset);
});
+ System.out.println("#####################started " + this + "; kc=" + kafkaConsumer);
// start the proxy thread
if (proxy != null && !proxy.isRunning()) {
+ System.out.println("#####################starting proxy " + proxy);
proxy.start();
}
}
@@ -242,33 +225,37 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
KafkaConfig kafkaConfig = new KafkaConfig(config);
Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName);
long fetchThreshold = FETCH_THRESHOLD;
- if(fetchThresholdOption.isDefined()) {
+ if (fetchThresholdOption.isDefined()) {
fetchThreshold = Long.valueOf(fetchThresholdOption.get());
LOG.info("fetchThresholdOption is defined. fetchThreshold=" + fetchThreshold);
}
Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName);
long fetchThresholdBytes = FETCH_THRESHOLD_BYTES;
- if(fetchThresholdBytesOption.isDefined()) {
+ if (fetchThresholdBytesOption.isDefined()) {
fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get());
LOG.info("fetchThresholdBytesOption is defined. fetchThresholdBytes=" + fetchThresholdBytes);
}
LOG.info("fetchThresholdBytes = " + fetchThresholdBytes + "; fetchThreshold=" + fetchThreshold);
- LOG.info("topicPartitions2Offset #=" + topicPartitions2Offset.size() + "; topicPartition2SSP #=" + topicPartitions2SSP.size());
+ LOG.info("topicPartitions2Offset #=" + topicPartitions2Offset.size() + "; topicPartition2SSP #="
+ + topicPartitions2SSP.size());
if (topicPartitions2SSP.size() > 0) {
perPartitionFetchThreshold = fetchThreshold / topicPartitions2SSP.size();
LOG.info("perPartitionFetchThreshold=" + perPartitionFetchThreshold);
- if(fetchThresholdBytesEnabled) {
+ if (fetchThresholdBytesEnabled) {
// currently this feature cannot be enabled, because we do not have the size of the messages available.
// messages get double buffered, hence divide by 2
perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / topicPartitions2SSP.size();
- LOG.info("perPartitionFetchThresholdBytes is enabled. perPartitionFetchThresholdBytes=" + perPartitionFetchThresholdBytes);
+ LOG.info("perPartitionFetchThresholdBytes is enabled. perPartitionFetchThresholdBytes="
+ + perPartitionFetchThresholdBytes);
}
}
}
@Override
public void stop() {
+ System.out.println("##################### stopping " + this + "; kc=" + kafkaConsumer);
+
if (!stopped.compareAndSet(false, true)) {
LOG.warn("attempting to stop stopped consumer.");
return;
@@ -276,8 +263,10 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
LOG.warn("Stopping SamzaRawLiKafkaConsumer + " + this);
// stop the proxy (with 5 minutes timeout)
- if(proxy != null)
+ if (proxy != null) {
+ System.out.println("##################### stopping proxy " + proxy);
proxy.stop(TimeUnit.MINUTES.toMillis(5));
+ }
try {
synchronized (kafkaConsumer) {
@@ -293,6 +282,14 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
*/
@Override
public void register(SystemStreamPartition systemStreamPartition, String offset) {
+ if (started.get()) {
+ String msg =
+ String.format("Trying to register partition after consumer has been started. sn=%s, ssp=%s", systemName,
+ systemStreamPartition);
+ LOG.error(msg);
+ throw new SamzaException(msg);
+ }
+
if (!systemStreamPartition.getSystem().equals(systemName)) {
LOG.warn("ignoring SSP " + systemStreamPartition + ", because this consumer's system is " + systemName);
return;
@@ -332,16 +329,17 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
@Override
public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
- Set<SystemStreamPartition> systemStreamPartitions, long timeout)
- throws InterruptedException {
+ Set<SystemStreamPartition> systemStreamPartitions, long timeout) throws InterruptedException {
// check if the proxy is running
- if(!proxy.isRunning()) {
+ if (!proxy.isRunning()) {
stop();
if (proxy.getFailureCause() != null) {
String message = "LiKafkaConsumerProxy has stopped";
- if(proxy.getFailureCause() instanceof org.apache.kafka.common.errors.TopicAuthorizationException)
- message += " due to TopicAuthorizationException Please refer to go/samzaacluserguide to correctly set up acls for your topic";
+ if (proxy.getFailureCause() instanceof org.apache.kafka.common.errors.TopicAuthorizationException) {
+ message +=
+ " due to TopicAuthorizationException Please refer to go/samzaacluserguide to correctly set up acls for your topic";
+ }
throw new SamzaException(message, proxy.getFailureCause());
} else {
LOG.warn("Failure cause not populated for LiKafkaConsumerProxy");
@@ -349,7 +347,9 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
}
}
- return super.poll(systemStreamPartitions, timeout);
+ Map<SystemStreamPartition, List<IncomingMessageEnvelope>> res = super.poll(systemStreamPartitions, timeout);
+ LOG.info("=============================>. Res in POLL:" + res.toString());
+ return res;
}
public static TopicAndPartition toTopicAndPartition(TopicPartition tp) {
@@ -376,15 +376,6 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
return systemName;
}
- private static Set<SystemStream> getIntermediateStreams(Config config) {
- StreamConfig streamConfig = new StreamConfig(config);
- Collection<String> streamIds = JavaConversions.asJavaCollection(streamConfig.getStreamIds());
- return streamIds.stream()
- .filter(streamConfig::getIsIntermediateStream)
- .map(id -> streamConfig.streamIdToSystemStream(id))
- .collect(Collectors.toSet());
- }
-
////////////////////////////////////
// inner class for the message sink
////////////////////////////////////
@@ -395,10 +386,11 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
}
boolean needsMoreMessages(SystemStreamPartition ssp) {
- if(LOG.isDebugEnabled()) {
+ if (LOG.isDebugEnabled()) {
LOG.debug("needsMoreMessages from following SSP: {}. fetchLimitByBytes enabled={}; messagesSizeInQueue={};"
- + "(limit={}); messagesNumInQueue={}(limit={};", ssp, fetchThresholdBytesEnabled, getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes,
- getNumMessagesInQueue(ssp), perPartitionFetchThreshold);
+ + "(limit={}); messagesNumInQueue={}(limit={};", ssp, fetchThresholdBytesEnabled,
+ getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes, getNumMessagesInQueue(ssp),
+ perPartitionFetchThreshold);
}
if (fetchThresholdBytesEnabled) {
@@ -415,8 +407,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
put(ssp, envelope);
} catch (InterruptedException e) {
throw new SamzaException(
- String.format("Interrupted while trying to add message with offset %s for ssp %s",
- envelope.getOffset(),
+ String.format("Interrupted while trying to add message with offset %s for ssp %s", envelope.getOffset(),
ssp));
}
}
http://git-wip-us.apache.org/repos/asf/samza/blob/22034947/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
----------------------------------------------------------------------
diff --git a/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala b/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
index e4d47d1..a42433c 100644
--- a/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
+++ b/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
@@ -82,13 +82,15 @@ class TestShutdownStatefulTask extends StreamTaskTestUtil {
assertEquals(0, task.received.size)
// Send some messages to input stream.
+ System.out.println("************************BEFORE DONE sending")
send(task, "1")
+ System.out.println("************************FIRST DONE sending")
send(task, "2")
send(task, "3")
send(task, "2")
send(task, "99")
send(task, "99")
-
+ System.out.println("************************DONE sending")
stopJob(job)
}
[07/47] samza git commit: Merge branch 'master' of
https://github.com/sborya/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/sborya/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/06b1ac36
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/06b1ac36
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/06b1ac36
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 06b1ac36e9c67a3bd558a0fa592639b16fcbfda9
Parents: 5e6f5fb 010fa16
Author: Boris Shkolnik <bs...@linkedin.com>
Authored: Wed Oct 25 09:50:55 2017 -0700
Committer: Boris Shkolnik <bs...@linkedin.com>
Committed: Wed Oct 25 09:50:55 2017 -0700
----------------------------------------------------------------------
----------------------------------------------------------------------
[16/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/57fca52c
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/57fca52c
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/57fca52c
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 57fca52cc48a37622aa6b8b1f71a37733c378524
Parents: afb34d9 ce57657
Author: Boris S <bo...@apache.org>
Authored: Wed Aug 15 12:13:37 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Aug 15 12:13:37 2018 -0700
----------------------------------------------------------------------
.../org/apache/samza/config/MetricsConfig.scala | 1 +
.../scala/org/apache/samza/job/JobRunner.scala | 43 ++++++++++++++++----
2 files changed, 35 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
[32/47] samza git commit: formatting
Posted by bo...@apache.org.
formatting
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/ceb0f6ae
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/ceb0f6ae
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/ceb0f6ae
Branch: refs/heads/NewKafkaSystemConsumer
Commit: ceb0f6aef45822191e29b6f43b9df76168c161e8
Parents: 332a048
Author: Boris S <bo...@apache.org>
Authored: Wed Sep 5 14:13:47 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Sep 5 14:13:47 2018 -0700
----------------------------------------------------------------------
.../scala/org/apache/samza/job/local/ThreadJobFactory.scala | 3 ++-
.../org/apache/samza/system/kafka/KafkaConsumerProxy.java | 9 +--------
.../samza/system/kafka/KafkaSystemConsumerMetrics.scala | 8 ++++----
.../org/apache/samza/validation/YarnJobValidationTool.java | 2 +-
.../samza/job/yarn/TestSamzaYarnAppMasterService.scala | 4 +++-
5 files changed, 11 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/ceb0f6ae/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
index 15aa5a6..0d71303 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
@@ -50,7 +50,7 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
val changelogStreamManager = new ChangelogStreamManager(coordinatorStreamManager)
val coordinator = JobModelManager(coordinatorStreamManager.getConfig, changelogStreamManager.readPartitionMapping())
- coordinatorStreamManager.stop()
+
val jobModel = coordinator.jobModel
val taskPartitionMappings: mutable.Map[TaskName, Integer] = mutable.Map[TaskName, Integer]()
@@ -116,6 +116,7 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
threadJob
} finally {
coordinator.stop
+ coordinatorStreamManager.stop()
jmxServer.stop
}
}
http://git-wip-us.apache.org/repos/asf/samza/blob/ceb0f6ae/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index a6272cd..7232a0a 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -99,20 +99,14 @@ public class KafkaConsumerProxy<K, V> {
"Samza KafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
consumerPollThread.start();
- System.out.println("THREAD: starting" + consumerPollThread.getName());
-
-
// we need to wait until the thread starts
while (!isRunning) {
try {
consumerPollThreadStartLatch.await(3000, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
- LOG.info("WTH");
+ LOG.info("Got InterruptedException", e);
}
}
- new Exception().printStackTrace(System.out);
- System.out.println("THREAD: started" + consumerPollThread.getName());
-
} else {
LOG.debug("Tried to start an already started LiKafkaConsumerProxy (%s). Ignoring.", this.toString());
}
@@ -220,7 +214,6 @@ public class KafkaConsumerProxy<K, V> {
kafkaConsumer.resume(topicPartitionsToPause);
}
} catch (InvalidOffsetException e) {
- LOG.error("LiKafkaConsumer with invalidOffsetException", e);
// If the consumer has thrown this exception it means that auto reset is not set for this consumer.
// So we just rethrow.
LOG.error("Caught InvalidOffsetException in pollConsumer", e);
http://git-wip-us.apache.org/repos/asf/samza/blob/ceb0f6ae/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
index 415bd38..7dce261 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
@@ -47,10 +47,10 @@ class KafkaSystemConsumerMetrics(val systemName: String = "unknown", val registr
}
def registerClientProxy(clientName: String) {
- clientBytesRead.put(clientName, newCounter("%s-%s-bytes-read" format clientName))
- clientReads.put((clientName), newCounter("%s-%s-messages-read" format clientName))
- clientSkippedFetchRequests.put((clientName), newCounter("%s-%s-skipped-fetch-requests" format clientName))
- topicPartitions.put(clientName, newGauge("%s-%s-topic-partitions" format clientName, 0))
+ clientBytesRead.put(clientName, newCounter("%s-bytes-read" format clientName))
+ clientReads.put((clientName), newCounter("%s-messages-read" format clientName))
+ clientSkippedFetchRequests.put((clientName), newCounter("%s-skipped-fetch-requests" format clientName))
+ topicPartitions.put(clientName, newGauge("%s-topic-partitions" format clientName, 0))
}
// java friendlier interfaces
http://git-wip-us.apache.org/repos/asf/samza/blob/ceb0f6ae/samza-yarn/src/main/java/org/apache/samza/validation/YarnJobValidationTool.java
----------------------------------------------------------------------
diff --git a/samza-yarn/src/main/java/org/apache/samza/validation/YarnJobValidationTool.java b/samza-yarn/src/main/java/org/apache/samza/validation/YarnJobValidationTool.java
index 0b405f0..b30b896 100644
--- a/samza-yarn/src/main/java/org/apache/samza/validation/YarnJobValidationTool.java
+++ b/samza-yarn/src/main/java/org/apache/samza/validation/YarnJobValidationTool.java
@@ -157,7 +157,7 @@ public class YarnJobValidationTool {
coordinatorStreamManager.start();
coordinatorStreamManager.bootstrap();
ChangelogStreamManager changelogStreamManager = new ChangelogStreamManager(coordinatorStreamManager);
- JobModelManager jobModelManager = JobModelManager.apply(coordinatorStreamManager, changelogStreamManager.readPartitionMapping());
+ JobModelManager jobModelManager = JobModelManager.apply(coordinatorStreamManager.getConfig(), changelogStreamManager.readPartitionMapping());
validator.init(config);
Map<String, String> jmxUrls = jobModelManager.jobModel().getAllContainerToHostValues(SetContainerHostMapping.JMX_TUNNELING_URL_KEY);
for (Map.Entry<String, String> entry : jmxUrls.entrySet()) {
http://git-wip-us.apache.org/repos/asf/samza/blob/ceb0f6ae/samza-yarn/src/test/scala/org/apache/samza/job/yarn/TestSamzaYarnAppMasterService.scala
----------------------------------------------------------------------
diff --git a/samza-yarn/src/test/scala/org/apache/samza/job/yarn/TestSamzaYarnAppMasterService.scala b/samza-yarn/src/test/scala/org/apache/samza/job/yarn/TestSamzaYarnAppMasterService.scala
index da23b91..1ad4522 100644
--- a/samza-yarn/src/test/scala/org/apache/samza/job/yarn/TestSamzaYarnAppMasterService.scala
+++ b/samza-yarn/src/test/scala/org/apache/samza/job/yarn/TestSamzaYarnAppMasterService.scala
@@ -106,7 +106,9 @@ class TestSamzaYarnAppMasterService {
coordinatorStreamManager.start
coordinatorStreamManager.bootstrap
val changelogPartitionManager = new ChangelogStreamManager(coordinatorStreamManager)
- JobModelManager(coordinatorStreamManager, changelogPartitionManager.readPartitionMapping())
+ val jobModelManager = JobModelManager(coordinatorStreamManager.getConfig, changelogPartitionManager.readPartitionMapping())
+ coordinatorStreamManager.stop()
+ jobModelManager
}
private def getDummyConfig: Config = new MapConfig(Map[String, String](
[21/47] samza git commit: Merge branch 'master' into NewConsumer
Posted by bo...@apache.org.
Merge branch 'master' into NewConsumer
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/34ae8ba2
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/34ae8ba2
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/34ae8ba2
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 34ae8ba2dd18c7d7af46464033b330a35966db3f
Parents: c0ea25c 7f7b559
Author: Boris S <bo...@apache.org>
Authored: Wed Aug 29 10:52:37 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Aug 29 10:52:37 2018 -0700
----------------------------------------------------------------------
.../samza/execution/ExecutionPlanner.java | 20 +++++---
.../runtime/AbstractApplicationRunner.java | 20 +++++---
.../samza/runtime/LocalApplicationRunner.java | 41 ++++++++--------
.../samza/runtime/RemoteApplicationRunner.java | 36 +++++++-------
.../org/apache/samza/config/MetricsConfig.scala | 11 +++--
.../diagnostics/DiagnosticsExceptionEvent.java | 6 +--
.../scala/org/apache/samza/job/JobRunner.scala | 2 +-
.../reporter/MetricsSnapshotReporter.scala | 35 +++++++-------
.../MetricsSnapshotReporterFactory.scala | 6 +--
.../runtime/TestLocalApplicationRunner.java | 50 ++++++++++++--------
10 files changed, 129 insertions(+), 98 deletions(-)
----------------------------------------------------------------------
[23/47] samza git commit: Merge branch 'master' of
https://github.com/apache/samza
Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/8ab04b20
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/8ab04b20
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/8ab04b20
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 8ab04b209eec4d035f126259305ae84318737d2e
Parents: 7f7b559 9eadfa0
Author: Boris S <bo...@apache.org>
Authored: Thu Aug 30 11:43:21 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Thu Aug 30 11:43:21 2018 -0700
----------------------------------------------------------------------
build.gradle | 5 +-
.../samza/metadatastore/MetadataStore.java | 80 ++++
.../metadatastore/MetadataStoreFactory.java | 30 ++
.../apache/samza/operators/MessageStream.java | 12 +-
.../org/apache/samza/operators/StreamGraph.java | 109 ++----
.../descriptors/GenericInputDescriptor.java | 43 +++
.../descriptors/GenericOutputDescriptor.java | 43 +++
.../descriptors/GenericSystemDescriptor.java | 67 ++++
.../base/stream/InputDescriptor.java | 187 +++++++++
.../base/stream/OutputDescriptor.java | 44 +++
.../base/stream/StreamDescriptor.java | 136 +++++++
.../ExpandingInputDescriptorProvider.java | 44 +++
.../base/system/OutputDescriptorProvider.java | 48 +++
.../system/SimpleInputDescriptorProvider.java | 43 +++
.../base/system/SystemDescriptor.java | 177 +++++++++
.../TransformingInputDescriptorProvider.java | 44 +++
.../operators/functions/InputTransformer.java | 45 +++
.../operators/functions/StreamExpander.java | 58 +++
.../org/apache/samza/table/TableProvider.java | 12 +-
.../TestExpandingInputDescriptor.java | 61 +++
.../descriptors/TestGenericInputDescriptor.java | 123 ++++++
.../TestGenericSystemDescriptor.java | 63 +++
.../descriptors/TestSimpleInputDescriptor.java | 65 ++++
.../TestTransformingInputDescriptor.java | 66 ++++
.../ExampleExpandingInputDescriptor.java | 30 ++
.../ExampleExpandingOutputDescriptor.java | 29 ++
.../ExampleExpandingSystemDescriptor.java | 49 +++
.../serde/ExampleSimpleInputDescriptor.java | 30 ++
.../serde/ExampleSimpleOutputDescriptor.java | 29 ++
.../serde/ExampleSimpleSystemDescriptor.java | 43 +++
.../ExampleTransformingInputDescriptor.java | 30 ++
.../ExampleTransformingOutputDescriptor.java | 29 ++
.../ExampleTransformingSystemDescriptor.java | 43 +++
.../apache/samza/container/LocalityManager.java | 126 +++---
.../grouper/task/TaskAssignmentManager.java | 109 ++++--
.../CoordinatorStreamMetadataStoreFactory.java | 36 ++
.../metadatastore/CoordinatorStreamStore.java | 188 +++++++++
.../stream/CoordinatorStreamKeySerde.java | 52 +++
.../stream/CoordinatorStreamValueSerde.java | 80 ++++
.../samza/execution/JobGraphJsonGenerator.java | 4 +-
.../org/apache/samza/execution/JobNode.java | 23 +-
.../samza/operators/BaseTableDescriptor.java | 2 +-
.../samza/operators/MessageStreamImpl.java | 2 +-
.../apache/samza/operators/StreamGraphSpec.java | 152 +++++---
.../descriptors/DelegatingSystemDescriptor.java | 70 ++++
.../samza/operators/impl/InputOperatorImpl.java | 23 +-
.../samza/operators/spec/InputOperatorSpec.java | 57 ++-
.../samza/operators/spec/OperatorSpecs.java | 11 +-
.../samza/operators/spec/OutputStreamImpl.java | 12 +
.../stream/IntermediateMessageStreamImpl.java | 2 +-
.../apache/samza/processor/StreamProcessor.java | 2 +-
.../runtime/AbstractApplicationRunner.java | 12 +
.../samza/table/TableConfigGenerator.java | 11 +-
.../table/caching/CachingTableDescriptor.java | 3 +-
.../table/caching/CachingTableProvider.java | 56 +--
.../guava/GuavaCacheTableDescriptor.java | 3 +-
.../caching/guava/GuavaCacheTableProvider.java | 44 +--
.../samza/table/remote/RemoteReadableTable.java | 5 +
.../table/remote/RemoteTableDescriptor.java | 3 +-
.../samza/table/remote/RemoteTableProvider.java | 43 +--
.../samza/table/utils/BaseTableProvider.java | 76 ++++
.../apache/samza/task/StreamOperatorTask.java | 3 +-
.../org/apache/samza/task/TaskCallbackImpl.java | 2 +-
.../java/org/apache/samza/util/StreamUtil.java | 1 +
.../org/apache/samza/config/JobConfig.scala | 4 +
.../org/apache/samza/config/StorageConfig.scala | 1 +
.../apache/samza/container/SamzaContainer.scala | 28 +-
.../samza/coordinator/JobModelManager.scala | 20 +-
.../TestClusterBasedJobCoordinator.java | 26 +-
.../samza/container/TestLocalityManager.java | 106 ++---
.../grouper/task/TestTaskAssignmentManager.java | 132 ++-----
.../coordinator/JobModelManagerTestUtil.java | 4 +-
.../TestCoordinatorStreamStore.java | 129 +++++++
.../samza/execution/TestExecutionPlanner.java | 92 +++--
.../execution/TestJobGraphJsonGenerator.java | 38 +-
.../org/apache/samza/execution/TestJobNode.java | 81 +++-
.../samza/operators/TestJoinOperator.java | 50 ++-
.../samza/operators/TestMessageStreamImpl.java | 4 +-
.../samza/operators/TestOperatorSpecGraph.java | 4 +-
.../samza/operators/TestStreamGraphSpec.java | 387 +++++++++----------
.../operators/impl/TestInputOperatorImpl.java | 80 ++++
.../operators/impl/TestOperatorImplGraph.java | 96 +++--
.../operators/impl/TestWindowOperator.java | 24 +-
.../samza/operators/spec/TestOperatorSpec.java | 16 +-
.../spec/TestPartitionByOperatorSpec.java | 63 ++-
.../scala/org/apache/samza/util/TestUtil.scala | 2 -
.../system/kafka/KafkaInputDescriptor.java | 108 ++++++
.../system/kafka/KafkaOutputDescriptor.java | 39 ++
.../system/kafka/KafkaSystemDescriptor.java | 251 ++++++++++++
.../apache/samza/system/kafka/GetOffset.scala | 3 +-
.../system/kafka/TestKafkaInputDescriptor.java | 68 ++++
.../system/kafka/TestKafkaSystemDescriptor.java | 69 ++++
.../samza/system/kafka/TestGetOffset.scala | 31 +-
.../kv/inmemory/InMemoryTableDescriptor.java | 3 +
.../kv/inmemory/InMemoryTableProvider.java | 21 +-
.../kv/inmemory/TestInMemoryTableProvider.java | 9 +-
.../storage/kv/RocksDbTableDescriptor.java | 7 +-
.../samza/storage/kv/RocksDbTableProvider.java | 21 +-
.../storage/kv/TestRocksDbTableDescriptor.java | 15 +
.../storage/kv/TestRocksDbTableProvider.java | 9 +-
.../kv/BaseLocalStoreBackedTableDescriptor.java | 68 ++++
.../kv/BaseLocalStoreBackedTableProvider.java | 60 ++-
.../kv/LocalStoreBackedReadableTable.java | 1 +
.../TestBaseLocalStoreBackedTableProvider.java | 149 +++++++
.../TestLocalBaseStoreBackedTableProvider.java | 85 ----
.../samza/rest/proxy/task/SamzaTaskProxy.java | 4 +-
.../apache/samza/sql/planner/QueryPlanner.java | 8 -
.../sql/planner/SamzaSqlOperatorTable.java | 1 +
.../samza/sql/translator/QueryTranslator.java | 10 +-
.../samza/sql/translator/ScanTranslator.java | 13 +-
.../samza/sql/translator/TranslatorContext.java | 34 +-
.../sql/testutil/TestIOResolverFactory.java | 15 +-
.../sql/translator/TestJoinTranslator.java | 2 +-
.../sql/translator/TestQueryTranslator.java | 5 +-
.../example/AppWithGlobalConfigExample.java | 17 +-
.../apache/samza/example/BroadcastExample.java | 24 +-
.../samza/example/KeyValueStoreExample.java | 27 +-
.../org/apache/samza/example/MergeExample.java | 25 +-
.../samza/example/OrderShipmentJoinExample.java | 29 +-
.../samza/example/PageViewCounterExample.java | 17 +-
.../samza/example/RepartitionExample.java | 24 +-
.../org/apache/samza/example/WindowExample.java | 19 +-
.../apache/samza/test/framework/TestRunner.java | 2 +-
.../system/CollectionStreamSystemSpec.java | 25 +-
.../TestStandaloneIntegrationApplication.java | 21 +-
.../EndOfStreamIntegrationTest.java | 11 +-
.../WatermarkIntegrationTest.java | 12 +-
.../test/framework/BroadcastAssertApp.java | 11 +-
.../StreamApplicationIntegrationTest.java | 29 +-
.../samza/test/framework/TestTimerApp.java | 17 +-
.../test/operator/RepartitionJoinWindowApp.java | 32 +-
.../test/operator/RepartitionWindowApp.java | 19 +-
.../samza/test/operator/SessionWindowApp.java | 19 +-
.../operator/TestRepartitionJoinWindowApp.java | 12 +-
.../test/operator/TestRepartitionWindowApp.java | 6 +-
.../samza/test/operator/TumblingWindowApp.java | 19 +-
.../test/processor/TestStreamApplication.java | 20 +-
.../processor/TestZkLocalApplicationRunner.java | 60 ++-
.../test/samzasql/TestSamzaSqlEndToEnd.java | 2 +-
.../table/PageViewToProfileJoinFunction.java | 48 +++
.../apache/samza/test/table/TestLocalTable.java | 75 +---
.../table/TestLocalTableWithSideInputs.java | 47 ++-
.../samza/test/table/TestRemoteTable.java | 50 +--
.../table/TestTableDescriptorsProvider.java | 10 +-
.../benchmark/SystemConsumerWithSamzaBench.java | 11 +-
145 files changed, 4990 insertions(+), 1376 deletions(-)
----------------------------------------------------------------------
[24/47] samza git commit: Merge branch 'master' into NewConsumer
Posted by bo...@apache.org.
Merge branch 'master' into NewConsumer
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/c14557fb
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/c14557fb
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/c14557fb
Branch: refs/heads/NewKafkaSystemConsumer
Commit: c14557fb401f0d718e78163a159695f50bc82845
Parents: 89f7982 8ab04b2
Author: Boris S <bo...@apache.org>
Authored: Thu Aug 30 12:45:23 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Thu Aug 30 12:45:23 2018 -0700
----------------------------------------------------------------------
build.gradle | 5 +-
.../samza/metadatastore/MetadataStore.java | 80 ++++
.../metadatastore/MetadataStoreFactory.java | 30 ++
.../apache/samza/operators/MessageStream.java | 12 +-
.../org/apache/samza/operators/StreamGraph.java | 109 ++----
.../descriptors/GenericInputDescriptor.java | 43 +++
.../descriptors/GenericOutputDescriptor.java | 43 +++
.../descriptors/GenericSystemDescriptor.java | 67 ++++
.../base/stream/InputDescriptor.java | 187 +++++++++
.../base/stream/OutputDescriptor.java | 44 +++
.../base/stream/StreamDescriptor.java | 136 +++++++
.../ExpandingInputDescriptorProvider.java | 44 +++
.../base/system/OutputDescriptorProvider.java | 48 +++
.../system/SimpleInputDescriptorProvider.java | 43 +++
.../base/system/SystemDescriptor.java | 177 +++++++++
.../TransformingInputDescriptorProvider.java | 44 +++
.../operators/functions/InputTransformer.java | 45 +++
.../operators/functions/StreamExpander.java | 58 +++
.../org/apache/samza/table/TableProvider.java | 12 +-
.../TestExpandingInputDescriptor.java | 61 +++
.../descriptors/TestGenericInputDescriptor.java | 123 ++++++
.../TestGenericSystemDescriptor.java | 63 +++
.../descriptors/TestSimpleInputDescriptor.java | 65 ++++
.../TestTransformingInputDescriptor.java | 66 ++++
.../ExampleExpandingInputDescriptor.java | 30 ++
.../ExampleExpandingOutputDescriptor.java | 29 ++
.../ExampleExpandingSystemDescriptor.java | 49 +++
.../serde/ExampleSimpleInputDescriptor.java | 30 ++
.../serde/ExampleSimpleOutputDescriptor.java | 29 ++
.../serde/ExampleSimpleSystemDescriptor.java | 43 +++
.../ExampleTransformingInputDescriptor.java | 30 ++
.../ExampleTransformingOutputDescriptor.java | 29 ++
.../ExampleTransformingSystemDescriptor.java | 43 +++
.../apache/samza/container/LocalityManager.java | 126 +++---
.../grouper/task/TaskAssignmentManager.java | 109 ++++--
.../CoordinatorStreamMetadataStoreFactory.java | 36 ++
.../metadatastore/CoordinatorStreamStore.java | 188 +++++++++
.../stream/CoordinatorStreamKeySerde.java | 52 +++
.../stream/CoordinatorStreamValueSerde.java | 80 ++++
.../samza/execution/JobGraphJsonGenerator.java | 4 +-
.../org/apache/samza/execution/JobNode.java | 23 +-
.../samza/operators/BaseTableDescriptor.java | 2 +-
.../samza/operators/MessageStreamImpl.java | 2 +-
.../apache/samza/operators/StreamGraphSpec.java | 152 +++++---
.../descriptors/DelegatingSystemDescriptor.java | 70 ++++
.../samza/operators/impl/InputOperatorImpl.java | 23 +-
.../samza/operators/spec/InputOperatorSpec.java | 57 ++-
.../samza/operators/spec/OperatorSpecs.java | 11 +-
.../samza/operators/spec/OutputStreamImpl.java | 12 +
.../stream/IntermediateMessageStreamImpl.java | 2 +-
.../apache/samza/processor/StreamProcessor.java | 2 +-
.../runtime/AbstractApplicationRunner.java | 12 +
.../samza/table/TableConfigGenerator.java | 11 +-
.../table/caching/CachingTableDescriptor.java | 3 +-
.../table/caching/CachingTableProvider.java | 56 +--
.../guava/GuavaCacheTableDescriptor.java | 3 +-
.../caching/guava/GuavaCacheTableProvider.java | 44 +--
.../samza/table/remote/RemoteReadableTable.java | 5 +
.../table/remote/RemoteTableDescriptor.java | 3 +-
.../samza/table/remote/RemoteTableProvider.java | 43 +--
.../samza/table/utils/BaseTableProvider.java | 76 ++++
.../apache/samza/task/StreamOperatorTask.java | 3 +-
.../org/apache/samza/task/TaskCallbackImpl.java | 2 +-
.../java/org/apache/samza/util/StreamUtil.java | 1 +
.../org/apache/samza/config/JobConfig.scala | 4 +
.../org/apache/samza/config/StorageConfig.scala | 1 +
.../apache/samza/container/SamzaContainer.scala | 28 +-
.../samza/coordinator/JobModelManager.scala | 20 +-
.../TestClusterBasedJobCoordinator.java | 26 +-
.../samza/container/TestLocalityManager.java | 106 ++---
.../grouper/task/TestTaskAssignmentManager.java | 132 ++-----
.../coordinator/JobModelManagerTestUtil.java | 4 +-
.../TestCoordinatorStreamStore.java | 129 +++++++
.../samza/execution/TestExecutionPlanner.java | 92 +++--
.../execution/TestJobGraphJsonGenerator.java | 38 +-
.../org/apache/samza/execution/TestJobNode.java | 81 +++-
.../samza/operators/TestJoinOperator.java | 50 ++-
.../samza/operators/TestMessageStreamImpl.java | 4 +-
.../samza/operators/TestOperatorSpecGraph.java | 4 +-
.../samza/operators/TestStreamGraphSpec.java | 387 +++++++++----------
.../operators/impl/TestInputOperatorImpl.java | 80 ++++
.../operators/impl/TestOperatorImplGraph.java | 96 +++--
.../operators/impl/TestWindowOperator.java | 24 +-
.../samza/operators/spec/TestOperatorSpec.java | 16 +-
.../spec/TestPartitionByOperatorSpec.java | 63 ++-
.../scala/org/apache/samza/util/TestUtil.scala | 2 -
.../system/kafka/KafkaInputDescriptor.java | 108 ++++++
.../system/kafka/KafkaOutputDescriptor.java | 39 ++
.../system/kafka/KafkaSystemDescriptor.java | 251 ++++++++++++
.../apache/samza/system/kafka/GetOffset.scala | 3 +-
.../system/kafka/TestKafkaInputDescriptor.java | 68 ++++
.../system/kafka/TestKafkaSystemDescriptor.java | 69 ++++
.../samza/system/kafka/TestGetOffset.scala | 31 +-
.../kv/inmemory/InMemoryTableDescriptor.java | 3 +
.../kv/inmemory/InMemoryTableProvider.java | 21 +-
.../kv/inmemory/TestInMemoryTableProvider.java | 9 +-
.../storage/kv/RocksDbTableDescriptor.java | 7 +-
.../samza/storage/kv/RocksDbTableProvider.java | 21 +-
.../storage/kv/TestRocksDbTableDescriptor.java | 15 +
.../storage/kv/TestRocksDbTableProvider.java | 9 +-
.../kv/BaseLocalStoreBackedTableDescriptor.java | 68 ++++
.../kv/BaseLocalStoreBackedTableProvider.java | 60 ++-
.../kv/LocalStoreBackedReadableTable.java | 1 +
.../TestBaseLocalStoreBackedTableProvider.java | 149 +++++++
.../TestLocalBaseStoreBackedTableProvider.java | 85 ----
.../samza/rest/proxy/task/SamzaTaskProxy.java | 4 +-
.../apache/samza/sql/planner/QueryPlanner.java | 8 -
.../sql/planner/SamzaSqlOperatorTable.java | 1 +
.../samza/sql/translator/QueryTranslator.java | 10 +-
.../samza/sql/translator/ScanTranslator.java | 13 +-
.../samza/sql/translator/TranslatorContext.java | 34 +-
.../sql/testutil/TestIOResolverFactory.java | 15 +-
.../sql/translator/TestJoinTranslator.java | 2 +-
.../sql/translator/TestQueryTranslator.java | 5 +-
.../example/AppWithGlobalConfigExample.java | 17 +-
.../apache/samza/example/BroadcastExample.java | 24 +-
.../samza/example/KeyValueStoreExample.java | 27 +-
.../org/apache/samza/example/MergeExample.java | 25 +-
.../samza/example/OrderShipmentJoinExample.java | 29 +-
.../samza/example/PageViewCounterExample.java | 17 +-
.../samza/example/RepartitionExample.java | 24 +-
.../org/apache/samza/example/WindowExample.java | 19 +-
.../apache/samza/test/framework/TestRunner.java | 2 +-
.../system/CollectionStreamSystemSpec.java | 25 +-
.../TestStandaloneIntegrationApplication.java | 21 +-
.../EndOfStreamIntegrationTest.java | 11 +-
.../WatermarkIntegrationTest.java | 12 +-
.../test/framework/BroadcastAssertApp.java | 11 +-
.../StreamApplicationIntegrationTest.java | 29 +-
.../samza/test/framework/TestTimerApp.java | 17 +-
.../test/operator/RepartitionJoinWindowApp.java | 32 +-
.../test/operator/RepartitionWindowApp.java | 19 +-
.../samza/test/operator/SessionWindowApp.java | 19 +-
.../operator/TestRepartitionJoinWindowApp.java | 12 +-
.../test/operator/TestRepartitionWindowApp.java | 6 +-
.../samza/test/operator/TumblingWindowApp.java | 19 +-
.../test/processor/TestStreamApplication.java | 20 +-
.../processor/TestZkLocalApplicationRunner.java | 60 ++-
.../test/samzasql/TestSamzaSqlEndToEnd.java | 2 +-
.../table/PageViewToProfileJoinFunction.java | 48 +++
.../apache/samza/test/table/TestLocalTable.java | 75 +---
.../table/TestLocalTableWithSideInputs.java | 47 ++-
.../samza/test/table/TestRemoteTable.java | 50 +--
.../table/TestTableDescriptorsProvider.java | 10 +-
.../benchmark/SystemConsumerWithSamzaBench.java | 11 +-
145 files changed, 4990 insertions(+), 1376 deletions(-)
----------------------------------------------------------------------
[42/47] samza git commit: comments
Posted by bo...@apache.org.
comments
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/2480aa36
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/2480aa36
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/2480aa36
Branch: refs/heads/NewKafkaSystemConsumer
Commit: 2480aa36ac7afe10b931d3148ab6e41f70c778cb
Parents: 053fe3b
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 11 14:08:02 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 11 14:08:02 2018 -0700
----------------------------------------------------------------------
.../clients/consumer/KafkaConsumerConfig.java | 6 +++---
.../samza/system/kafka/KafkaSystemConsumer.java | 17 ++---------------
.../samza/system/kafka/KafkaSystemFactory.scala | 11 ++++++++---
3 files changed, 13 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/2480aa36/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index 8ca5b93..1a97ec7 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -93,12 +93,12 @@ public class KafkaConsumerConfig extends ConsumerConfig {
// make sure bootstrap configs are in ?? SHOULD WE FAIL IF THEY ARE NOT?
if (!subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
// get it from the producer config
- String bootstrapServer =
+ String bootstrapServers =
config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
- if (StringUtils.isEmpty(bootstrapServer)) {
+ if (StringUtils.isEmpty(bootstrapServers)) {
throw new SamzaException("Missing " + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + " config for " + systemName);
}
- consumerProps.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer);
+ consumerProps.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
}
// Always use default partition assignment strategy. Do not allow override.
http://git-wip-us.apache.org/repos/asf/samza/blob/2480aa36/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
index 196fb85..9cdfce1 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
@@ -109,19 +109,6 @@ public class KafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements Sy
clientId, metricName, this.kafkaConsumer.toString());
}
- public static <K, V> KafkaSystemConsumer getNewKafkaSystemConsumer(String systemName, Config config,
- String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) {
-
- // extract consumer configs and create kafka consumer
- KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
- LOG.info("Created kafka consumer for system {}, clientId {}: {}", systemName, clientId, kafkaConsumer);
-
- KafkaSystemConsumer kc = new KafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
- LOG.info("Created samza system consumer {}", kc.toString());
-
- return kc;
- }
-
/**
* create kafka consumer
* @param systemName system name for which we create the consumer
@@ -129,7 +116,7 @@ public class KafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements Sy
* @param config config
* @return kafka consumer
*/
- public static <K, V> KafkaConsumer<K, V> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
+ public static KafkaConsumer<byte[], byte[]> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
Map<String, String> injectProps = new HashMap<>();
@@ -263,7 +250,7 @@ public class KafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements Sy
// stop the proxy (with 5 minutes timeout)
if (proxy != null) {
LOG.info("Stopping proxy " + proxy);
- proxy.stop(TimeUnit.MINUTES.toMillis(5));
+ proxy.stop(TimeUnit.SECONDS.toMillis(60));
}
try {
http://git-wip-us.apache.org/repos/asf/samza/blob/2480aa36/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index e0e85be..9f92583 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -22,7 +22,7 @@ package org.apache.samza.system.kafka
import java.util.Properties
import kafka.utils.ZkUtils
-import org.apache.kafka.clients.consumer.KafkaConsumerConfig
+import org.apache.kafka.clients.consumer.{KafkaConsumer, KafkaConsumerConfig}
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.samza.SamzaException
import org.apache.samza.config.ApplicationConfig.ApplicationMode
@@ -50,8 +50,13 @@ class KafkaSystemFactory extends SystemFactory with Logging {
val clientId = KafkaConsumerConfig.getConsumerClientId( config)
val metrics = new KafkaSystemConsumerMetrics(systemName, registry)
- KafkaSystemConsumer.getNewKafkaSystemConsumer(
- systemName, config, clientId, metrics, new SystemClock)
+ val kafkaConsumer = KafkaSystemConsumer.getKafkaConsumerImpl(systemName, clientId, config)
+ info("Created kafka consumer for system %s, clientId %s: %s" format (systemName, clientId, kafkaConsumer))
+
+ val kc = new KafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, new SystemClock)
+ info("Created samza system consumer %s" format (kc.toString))
+
+ kc
}
def getProducer(systemName: String, config: Config, registry: MetricsRegistry): SystemProducer = {