You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by bo...@apache.org on 2018/09/18 23:00:58 UTC

[01/47] samza git commit: reduce debugging from info to debug in KafkaCheckpointManager.java

Repository: samza
Updated Branches:
  refs/heads/NewKafkaSystemConsumer [created] 361596317


reduce debugging from info to debug in KafkaCheckpointManager.java


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/a31a7aa2
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/a31a7aa2
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/a31a7aa2

Branch: refs/heads/NewKafkaSystemConsumer
Commit: a31a7aa29b7be4bb46f8e651b6b8fa46a65b48e2
Parents: c93dd8f
Author: Boris Shkolnik <bs...@linkedin.com>
Authored: Mon Oct 16 15:25:49 2017 -0700
Committer: Boris Shkolnik <bs...@linkedin.com>
Committed: Mon Oct 16 15:25:49 2017 -0700

----------------------------------------------------------------------
 .../org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/a31a7aa2/samza-kafka/src/main/scala/org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala b/samza-kafka/src/main/scala/org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala
index 4eb6666..b016b4a 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala
@@ -103,7 +103,7 @@ class KafkaCheckpointManager(
 
         systemProducer.send(taskName.getTaskName, envelope)
         systemProducer.flush(taskName.getTaskName) // make sure it is written
-        info("Completed writing checkpoint=%s into %s topic for system %s." format(checkpoint, checkpointTopic, systemName) )
+        debug("Completed writing checkpoint=%s into %s topic for system %s." format(checkpoint, checkpointTopic, systemName) )
         loop.done
       },
 


[27/47] samza git commit: LOG line

Posted by bo...@apache.org.
LOG line


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/19ba3003
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/19ba3003
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/19ba3003

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 19ba3003078f989400cd9fbefee7c00421bebc6c
Parents: 59b3dc1
Author: Boris S <bo...@apache.org>
Authored: Fri Aug 31 15:15:04 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Aug 31 15:20:12 2018 -0700

----------------------------------------------------------------------
 .../org/apache/samza/system/kafka/NewKafkaSystemConsumer.java     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/19ba3003/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index e34812f..aeeadce 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -348,7 +348,8 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
     }
 
     Map<SystemStreamPartition, List<IncomingMessageEnvelope>> res = super.poll(systemStreamPartitions, timeout);
-    LOG.info("=============================>. Res in POLL:" + res.toString());
+    LOG.info("=============================>. Res for " + systemStreamPartitions);
+    LOG.info("=============================>. Res:" + res.toString());
     return res;
   }
 


[13/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/78ad578c
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/78ad578c
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/78ad578c

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 78ad578cc022af3f9c5f9a15826a9dc010502a89
Parents: 88f8559 d28f0c8
Author: Boris S <bo...@apache.org>
Authored: Tue Aug 7 19:04:17 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Aug 7 19:04:17 2018 -0700

----------------------------------------------------------------------
 .../org/apache/samza/metrics/ListGauge.java     |   3 +-
 .../samza/runtime/RemoteApplicationRunner.java  |  69 +----
 .../storage/TaskSideInputStorageManager.java    |  38 ++-
 .../MetricsSnapshotReporterFactory.scala        |   4 +-
 .../runtime/TestRemoteApplicationRunner.java    |  70 +++++
 .../TestTaskSideInputStorageManager.java        | 295 +++++++++++++++++++
 .../samza/storage/kv/RocksDbTableProvider.java  |   4 +
 .../table/TestLocalTableWithSideInputs.java     | 161 ++++++++++
 .../apache/samza/test/table/TestTableData.java  |  22 +-
 .../table/TestTableDescriptorsProvider.java     |   2 +-
 10 files changed, 597 insertions(+), 71 deletions(-)
----------------------------------------------------------------------



[03/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/d4620d66
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/d4620d66
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/d4620d66

Branch: refs/heads/NewKafkaSystemConsumer
Commit: d4620d6690f74cad9472d0e27a1b31aeb4156c54
Parents: 410ce78 958edc4
Author: Boris S <bo...@apache.org>
Authored: Tue Oct 24 17:11:48 2017 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Oct 24 17:11:48 2017 -0700

----------------------------------------------------------------------
 .../apache/samza/storage/kv/KeyValueStore.java  |  69 ++++---------
 .../kafka/KafkaCheckpointManager.scala          | 103 +++++++------------
 .../kv/inmemory/InMemoryKeyValueStore.scala     |   8 --
 .../samza/storage/kv/RocksDbKeyValueStore.scala |  86 ++++++----------
 .../storage/kv/TestRocksDbKeyValueStore.scala   |   4 +-
 .../apache/samza/storage/kv/CachedStore.scala   |   2 +-
 .../samza/storage/kv/MockKeyValueStore.scala    |   8 --
 7 files changed, 93 insertions(+), 187 deletions(-)
----------------------------------------------------------------------



[04/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/bbffb79b
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/bbffb79b
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/bbffb79b

Branch: refs/heads/NewKafkaSystemConsumer
Commit: bbffb79b8b9799a41e8e82ded60f83550736886b
Parents: d4620d6 cc1ca2c
Author: Boris S <bo...@apache.org>
Authored: Tue Oct 24 17:54:20 2017 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Oct 24 17:54:20 2017 -0700

----------------------------------------------------------------------
 .../src/main/java/org/apache/samza/task/StreamOperatorTask.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------



[08/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/1ad58d43
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/1ad58d43
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/1ad58d43

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 1ad58d43fbe00a57054cb85b0be2eef6ee6470a6
Parents: 06b1ac3 2d10732
Author: Boris S <bo...@apache.org>
Authored: Tue Oct 31 12:46:53 2017 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Oct 31 12:46:53 2017 -0700

----------------------------------------------------------------------
 build.gradle                                    |  11 +
 .../apache/samza/operators/MessageStream.java   |  41 +-
 .../system/eventhub/EventHubClientManager.java  |  69 ++++
 .../eventhub/EventHubClientManagerFactory.java  |  32 ++
 .../samza/system/eventhub/EventHubConfig.java   | 181 +++++++++
 .../system/eventhub/EventHubSystemFactory.java  |  65 +++
 .../samza/system/eventhub/Interceptor.java      |  34 ++
 .../eventhub/SamzaEventHubClientManager.java    | 104 +++++
 .../eventhub/admin/EventHubSystemAdmin.java     | 199 +++++++++
 .../eventhub/admin/PassThroughInterceptor.java  |  33 ++
 .../EventHubIncomingMessageEnvelope.java        |  42 ++
 .../consumer/EventHubSystemConsumer.java        | 401 +++++++++++++++++++
 .../system/eventhub/metrics/SamzaHistogram.java |  62 +++
 .../producer/EventHubSystemProducer.java        | 345 ++++++++++++++++
 .../samza/system/eventhub/MockEventData.java    |  57 +++
 .../MockEventHubClientManagerFactory.java       | 196 +++++++++
 .../eventhub/MockEventHubConfigFactory.java     |  61 +++
 .../system/eventhub/TestMetricsRegistry.java    |  85 ++++
 .../eventhub/admin/TestEventHubSystemAdmin.java | 113 ++++++
 .../consumer/ITestEventHubSystemConsumer.java   |  76 ++++
 .../consumer/TestEventHubSystemConsumer.java    | 272 +++++++++++++
 .../producer/ITestEventHubSystemProducer.java   | 163 ++++++++
 .../producer/TestEventHubSystemProducer.java    | 153 +++++++
 .../samza/execution/JobGraphJsonGenerator.java  |   4 +-
 .../samza/operators/MessageStreamImpl.java      |  44 +-
 .../apache/samza/operators/StreamGraphImpl.java |  74 +++-
 .../samza/operators/impl/OperatorImpl.java      |  32 +-
 .../samza/operators/impl/OperatorImplGraph.java |  20 +-
 .../operators/impl/PartialJoinOperatorImpl.java |  32 +-
 .../operators/impl/WindowOperatorImpl.java      |  21 +-
 .../impl/store/TimeSeriesStoreImpl.java         |   9 +-
 .../samza/operators/spec/InputOperatorSpec.java |   2 +-
 .../samza/operators/spec/JoinOperatorSpec.java  |  14 +-
 .../samza/operators/spec/OperatorSpec.java      |  14 +-
 .../samza/operators/spec/OperatorSpecs.java     |  36 +-
 .../operators/spec/OutputOperatorSpec.java      |   2 +-
 .../operators/spec/PartitionByOperatorSpec.java |   2 +-
 .../samza/operators/spec/SinkOperatorSpec.java  |   2 +-
 .../operators/spec/StreamOperatorSpec.java      |   2 +-
 .../operators/spec/WindowOperatorSpec.java      |   4 +-
 .../apache/samza/system/SystemConsumers.scala   |  14 +-
 .../samza/example/KeyValueStoreExample.java     |   2 +-
 .../samza/example/OrderShipmentJoinExample.java |   2 +-
 .../samza/example/PageViewCounterExample.java   |   2 +-
 .../samza/example/RepartitionExample.java       |   5 +-
 .../org/apache/samza/example/WindowExample.java |   5 +-
 .../samza/execution/TestExecutionPlanner.java   |  44 +-
 .../execution/TestJobGraphJsonGenerator.java    |  16 +-
 .../org/apache/samza/execution/TestJobNode.java |  34 +-
 .../samza/operators/TestJoinOperator.java       |  33 +-
 .../samza/operators/TestMessageStreamImpl.java  |  26 +-
 .../samza/operators/TestStreamGraphImpl.java    |  61 +--
 .../samza/operators/TestWindowOperator.java     | 102 ++++-
 .../samza/operators/impl/TestOperatorImpl.java  |   2 +-
 .../operators/impl/TestOperatorImplGraph.java   |  47 ++-
 .../operators/spec/TestWindowOperatorSpec.java  |   4 +-
 .../samza/system/TestSystemConsumers.scala      |  60 ++-
 .../samza/storage/kv/RocksDbKeyValueStore.scala |  90 +++--
 .../storage/kv/TestRocksDbKeyValueStore.scala   |  36 +-
 .../rest/model/yarn/YarnApplicationInfo.java    |  12 +-
 .../proxy/job/YarnRestJobStatusProvider.java    |  41 +-
 .../job/TestYarnRestJobStatusProvider.java      |  77 ++++
 .../src/main/config/perf/kv-perf.properties     |  34 +-
 .../performance/TestKeyValuePerformance.scala   |  37 +-
 .../EndOfStreamIntegrationTest.java             |   2 +-
 .../WatermarkIntegrationTest.java               |   2 +-
 .../test/operator/RepartitionJoinWindowApp.java |  13 +-
 .../samza/test/operator/SessionWindowApp.java   |   4 +-
 .../samza/test/operator/TumblingWindowApp.java  |   3 +-
 .../test/integration/TestStatefulTask.scala     |   2 +-
 70 files changed, 3595 insertions(+), 326 deletions(-)
----------------------------------------------------------------------



[43/47] samza git commit: Added Test for KafkaConsumerConfig

Posted by bo...@apache.org.
Added Test for KafkaConsumerConfig


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/32c92828
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/32c92828
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/32c92828

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 32c92828eaff98f4c2e6691533ece9f502ef1f98
Parents: 2480aa3
Author: Boris S <bo...@apache.org>
Authored: Wed Sep 12 14:06:41 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Sep 12 14:06:41 2018 -0700

----------------------------------------------------------------------
 .../clients/consumer/KafkaConsumerConfig.java   |  23 ++--
 .../org/apache/samza/config/KafkaConfig.scala   |   5 +-
 .../samza/system/kafka/KafkaConsumerProxy.java  |  14 ++-
 .../consumer/TestKafkaConsumerConfig.java       | 121 +++++++++++++++++++
 4 files changed, 149 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/32c92828/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index 1a97ec7..8ada1b4 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -54,7 +54,7 @@ public class KafkaConsumerConfig extends ConsumerConfig {
    * By default, KafkaConsumer will fetch ALL available messages for all the partitions.
    * This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
    */
-  private static final String DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS = "100";
+  static final String DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS = "100";
 
   private KafkaConsumerConfig(Properties props) {
     super(props);
@@ -83,6 +83,11 @@ public class KafkaConsumerConfig extends ConsumerConfig {
 
     //Kafka client configuration
 
+    // put overrides
+    consumerProps.putAll(injectProps);
+
+    // These are values we enforce in sazma, and they cannot be overwritten.
+
     // Disable consumer auto-commit because Samza controls commits
     consumerProps.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
 
@@ -106,28 +111,24 @@ public class KafkaConsumerConfig extends ConsumerConfig {
 
     // the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
     // default to byte[]
-    if (!config.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
+    if (!consumerProps.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
       LOG.info("setting default key serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
       consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
     }
-    if (!config.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
+    if (!consumerProps.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
       LOG.info("setting default value serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
       consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
     }
 
-    // NOT SURE THIS IS NEEDED TODO
-    final String maxPollRecords =
-        subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS);
-    consumerProps.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
-
-    // put overrides
-    consumerProps.putAll(injectProps);
+    // Override default max poll config if there is no value
+    consumerProps.computeIfAbsent(ConsumerConfig.MAX_POLL_RECORDS_CONFIG,
+        (k) -> DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS);
 
     return new KafkaConsumerConfig(consumerProps);
   }
 
   // group id should be unique per job
-  private static String getConsumerGroupId(Config config) {
+  static String getConsumerGroupId(Config config) {
     JobConfig jobConfig = new JobConfig(config);
     Option<String> jobIdOption = jobConfig.getJobId();
     Option<String> jobNameOption = jobConfig.getName();

http://git-wip-us.apache.org/repos/asf/samza/blob/32c92828/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala b/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala
index 26664ea..ef43e72 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/config/KafkaConfig.scala
@@ -289,7 +289,10 @@ class KafkaConfig(config: Config) extends ScalaMapConfig(config) {
     properties
   }
 
-  // kafka config
+  /**
+    * @deprecated Use KafkaConsumerConfig
+    */
+  @Deprecated
   def getKafkaSystemConsumerConfig( systemName: String,
                                     clientId: String,
                                     groupId: String = "undefined-samza-consumer-group-%s" format UUID.randomUUID.toString,

http://git-wip-us.apache.org/repos/asf/samza/blob/32c92828/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 4b99fcc..83e7a58 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -431,12 +431,22 @@ public class KafkaConsumerProxy<K, V> {
     return failureCause;
   }
 
-  public void stop(long timeout) {
+  /**
+   * stop the thread and wait for it to stop
+   * @param timeoutMs how long to wait in join
+   */
+  public void stop(long timeoutMs) {
     LOG.info("Shutting down KafkaConsumerProxy poll thread:" + consumerPollThread.getName());
 
     isRunning = false;
     try {
-      consumerPollThread.join(timeout);
+      consumerPollThread.join(timeoutMs);
+      // join returns event if the thread didn't finish
+      // in this case we should interrupt it and wait again
+      if (consumerPollThread.isAlive()) {
+        consumerPollThread.interrupt();
+        consumerPollThread.join(timeoutMs);
+      }
     } catch (InterruptedException e) {
       LOG.warn("Join in KafkaConsumerProxy has failed", e);
       consumerPollThread.interrupt();

http://git-wip-us.apache.org/repos/asf/samza/blob/32c92828/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java b/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
new file mode 100644
index 0000000..ee300d0
--- /dev/null
+++ b/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
@@ -0,0 +1,121 @@
+package org.apache.kafka.clients.consumer;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.apache.samza.SamzaException;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.MapConfig;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+
+public class TestKafkaConsumerConfig {
+  private final Map<String, String> props = new HashMap<>();
+  public final static String SYSTEM_NAME = "testSystem";
+  public final static String KAFKA_PRODUCER_PROPERTY_PREFIX = "systems." + SYSTEM_NAME + ".producer.";
+  public final static String KAFKA_CONSUMER_PROPERTY_PREFIX = "systems." + SYSTEM_NAME + ".consumer.";
+  private final static String CLIENT_ID = "clientId";
+
+  @Before
+  public void setProps() {
+
+  }
+
+  @Test
+  public void testDefaultsAndOverrides() {
+
+    Map<String, String> overrides = new HashMap<>();
+    overrides.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true"); // should be ignored
+    overrides.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG, "Ignore"); // should be ignored
+    overrides.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "100"); // should NOT be ignored
+
+    // if KAFKA_CONSUMER_PROPERTY_PREFIX is set, then PRODUCER should be ignored
+    props.put(KAFKA_PRODUCER_PROPERTY_PREFIX + "bootstrap.servers", "ignroeThis:9092");
+    props.put(KAFKA_CONSUMER_PROPERTY_PREFIX + "bootstrap.servers", "useThis:9092");
+
+    // should be overridden
+    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "true"); //ignore
+    props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, "1000"); // ignore
+
+
+    // should be overridden
+    props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "200");
+
+    Config config = new MapConfig(props);
+    KafkaConsumerConfig kafkaConsumerConfig = KafkaConsumerConfig.getKafkaSystemConsumerConfig(
+        config, SYSTEM_NAME, CLIENT_ID, overrides);
+
+    Assert.assertEquals(kafkaConsumerConfig.getBoolean(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG), false);
+
+    Assert.assertEquals(
+        kafkaConsumerConfig.getInt(ConsumerConfig.MAX_POLL_RECORDS_CONFIG),
+        Integer.valueOf(KafkaConsumerConfig.DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS));
+
+    Assert.assertEquals(
+        kafkaConsumerConfig.getList(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG).get(0),
+        RangeAssignor.class.getName());
+
+    Assert.assertEquals(
+        kafkaConsumerConfig.getList(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG).get(0),
+        "useThis:9092");
+    Assert.assertEquals(
+        kafkaConsumerConfig.getInt(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG).longValue(),
+        100);
+
+    Assert.assertEquals(
+        kafkaConsumerConfig.getClass(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG),
+        ByteArrayDeserializer.class);
+
+    Assert.assertEquals(
+        kafkaConsumerConfig.getClass(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG),
+        ByteArrayDeserializer.class);
+
+    Assert.assertEquals(
+        kafkaConsumerConfig.getString(ConsumerConfig.CLIENT_ID_CONFIG),
+        CLIENT_ID);
+
+    Assert.assertEquals(
+        kafkaConsumerConfig.getString(ConsumerConfig.GROUP_ID_CONFIG),
+        KafkaConsumerConfig.getConsumerGroupId(config));
+  }
+
+  @Test
+  // test stuff that should not be overridden
+  public void testNotOverride() {
+
+    // if KAFKA_CONSUMER_PROPERTY_PREFIX is not set, then PRODUCER should be used
+    props.put(KAFKA_PRODUCER_PROPERTY_PREFIX + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "useThis:9092");
+    props.put(KAFKA_CONSUMER_PROPERTY_PREFIX + ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, TestKafkaConsumerConfig.class.getName());
+    props.put(KAFKA_CONSUMER_PROPERTY_PREFIX + ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, TestKafkaConsumerConfig.class.getName());
+
+
+    Config config = new MapConfig(props);
+    KafkaConsumerConfig kafkaConsumerConfig = KafkaConsumerConfig.getKafkaSystemConsumerConfig(
+        config, SYSTEM_NAME, CLIENT_ID, Collections.emptyMap());
+
+    Assert.assertEquals(
+        kafkaConsumerConfig.getList(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG).get(0),
+        "useThis:9092");
+
+    Assert.assertEquals(
+        kafkaConsumerConfig.getClass(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG),
+        TestKafkaConsumerConfig.class);
+
+    Assert.assertEquals(
+        kafkaConsumerConfig.getClass(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG),
+        TestKafkaConsumerConfig.class);
+  }
+
+
+
+  @Test(expected = SamzaException.class)
+  public void testNoBootstrapServers() {
+    KafkaConsumerConfig kafkaConsumerConfig = KafkaConsumerConfig.getKafkaSystemConsumerConfig(
+        new MapConfig(Collections.emptyMap()), SYSTEM_NAME, "clientId", Collections.emptyMap());
+
+    Assert.fail("didn't get exception for the missing config:" + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG);
+  }
+}


[11/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/0edf343b
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/0edf343b
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/0edf343b

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 0edf343b8c5360437fa10cc5a543696a4edcc319
Parents: 67e611e f249e71
Author: Boris S <bo...@apache.org>
Authored: Fri Jun 8 10:15:36 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Jun 8 10:15:36 2018 -0700

----------------------------------------------------------------------
 .travis.yml                                     |   3 +-
 README.md                                       |   4 +-
 bin/check-all.sh                                |   2 +-
 bin/integration-tests.sh                        |   3 +-
 build.gradle                                    |  24 +-
 docs/Gemfile                                    |   7 +-
 docs/Gemfile.lock                               |  95 ++-
 docs/README.md                                  |   4 +-
 docs/community/committers.md                    |   8 +-
 .../documentation/versioned/azure/eventhubs.md  |   6 +-
 .../versioned/container/metrics-table.html      |   8 +
 .../versioned/jobs/configuration-table.html     |  91 ++-
 .../documentation/versioned/jobs/logging.md     |   2 +-
 .../documentation/versioned/rest/monitors.md    |   8 +
 .../versioned/deploy-samza-job-from-hdfs.md     |   2 +-
 .../tutorials/versioned/deploy-samza-to-CDH.md  |   4 +-
 .../versioned/hello-samza-high-level-yarn.md    |   2 +-
 .../versioned/hello-samza-high-level-zk.md      |   2 +-
 .../versioned/remote-debugging-samza.md         |   2 +-
 .../versioned/run-in-multi-node-yarn.md         |   4 +-
 .../versioned/samza-rest-getting-started.md     |   2 +-
 docs/startup/download/index.md                  |  19 +-
 docs/startup/hello-samza/versioned/index.md     |   2 +-
 gradle.properties                               |   2 +-
 gradle/buildscript.gradle                       |   2 +-
 gradle/dependency-versions-scala-2.10.gradle    |   2 +-
 gradle/dependency-versions-scala-2.11.gradle    |   2 +-
 gradle/dependency-versions-scala-2.12.gradle    |   2 +-
 gradle/dependency-versions.gradle               |   7 +-
 gradle/release.gradle                           |  11 +-
 .../samza/application/StreamApplication.java    |   7 +-
 .../samza/checkpoint/CheckpointManager.java     |  12 +-
 .../java/org/apache/samza/config/MapConfig.java |   9 +-
 .../apache/samza/operators/MessageStream.java   |  34 +-
 .../apache/samza/operators/TimerRegistry.java   |  41 ++
 .../operators/functions/ClosableFunction.java   |   3 +
 .../operators/functions/FilterFunction.java     |   3 +-
 .../operators/functions/FlatMapFunction.java    |   3 +-
 .../operators/functions/FoldLeftFunction.java   |  16 +-
 .../operators/functions/InitableFunction.java   |   3 +-
 .../samza/operators/functions/JoinFunction.java |   3 +-
 .../samza/operators/functions/MapFunction.java  |   3 +-
 .../samza/operators/functions/SinkFunction.java |   3 +-
 .../functions/StreamTableJoinFunction.java      |   3 +-
 .../operators/functions/SupplierFunction.java   |  38 ++
 .../operators/functions/TimerFunction.java      |  65 ++
 .../samza/operators/triggers/AnyTrigger.java    |  10 +-
 .../samza/operators/triggers/Trigger.java       |   3 +-
 .../apache/samza/operators/windows/Window.java  |   3 +-
 .../apache/samza/operators/windows/Windows.java |  53 +-
 .../windows/internal/WindowInternal.java        |  32 +-
 .../apache/samza/runtime/ApplicationRunner.java |  19 +
 .../samza/serializers/SerializableSerde.java    |   2 +-
 .../org/apache/samza/sql/udfs/ScalarUdf.java    |   4 +-
 .../samza/storage/kv/KeyValueSnapshot.java      |  42 ++
 .../apache/samza/storage/kv/KeyValueStore.java  |  14 +
 .../org/apache/samza/system/StreamSpec.java     |  39 +-
 .../org/apache/samza/system/SystemAdmin.java    |  23 +-
 .../samza/system/SystemStreamPartition.java     |   4 +-
 .../table/LocalStoreBackedTableProvider.java    |  37 --
 .../org/apache/samza/table/ReadWriteTable.java  |   9 +-
 .../org/apache/samza/table/ReadableTable.java   |  11 +-
 .../org/apache/samza/table/TableProvider.java   |  18 +-
 .../java/org/apache/samza/table/TableSpec.java  |  12 +-
 .../java/org/apache/samza/task/TaskContext.java |  19 +
 .../org/apache/samza/task/TimerCallback.java    |  34 ++
 .../java/org/apache/samza/util/RateLimiter.java | 108 ++++
 .../samza/operators/windows/TestWindowPane.java |   2 +-
 .../autoscaling/deployer/ConfigManager.java     |   4 +-
 .../samza/system/kinesis/KinesisConfig.java     |  28 +-
 .../samza/coordinator/AzureJobCoordinator.java  |  93 +--
 .../samza/coordinator/data/ProcessorEntity.java |   8 +
 .../scheduler/JMVersionUpgradeScheduler.java    |   9 +-
 .../eventhub/EventHubClientManagerFactory.java  |   3 +-
 .../samza/system/eventhub/EventHubConfig.java   | 117 +++-
 .../eventhub/SamzaEventHubClientManager.java    |  39 +-
 .../eventhub/admin/EventHubSystemAdmin.java     | 138 +++--
 .../consumer/EventHubSystemConsumer.java        | 304 ++++++----
 .../system/eventhub/metrics/SamzaHistogram.java |  69 ++-
 .../eventhub/producer/AsyncSystemProducer.java  | 218 +++++++
 .../producer/EventHubSystemProducer.java        | 378 +++++++-----
 .../java/org/apache/samza/util/TableUtils.java  |  49 +-
 .../samza/system/eventhub/MockEventData.java    |  21 +-
 .../MockEventHubClientManagerFactory.java       |  25 +-
 .../eventhub/MockEventHubConfigFactory.java     |  18 +-
 .../system/eventhub/TestMetricsRegistry.java    |   7 +-
 .../consumer/TestEventHubSystemConsumer.java    |  75 ++-
 .../producer/ITestEventHubSystemProducer.java   |  10 +-
 .../producer/TestEventHubSystemProducer.java    | 232 +++++--
 samza-azure/src/test/resources/log4j.xml        |  43 ++
 .../AbstractContainerAllocator.java             |   2 +-
 .../ClusterBasedJobCoordinator.java             |  78 ++-
 .../clustermanager/ContainerProcessManager.java |  14 +-
 .../HostAwareContainerAllocator.java            |  17 +-
 .../clustermanager/ResourceRequestState.java    |  53 +-
 .../apache/samza/config/JavaStorageConfig.java  |  18 +-
 .../apache/samza/config/JavaSystemConfig.java   |  12 +-
 .../apache/samza/config/JavaTableConfig.java    |   2 +-
 .../samza/config/JobCoordinatorConfig.java      |   4 +-
 .../org/apache/samza/config/TaskConfigJava.java |  35 +-
 .../container/ContainerHeartbeatClient.java     |   4 +-
 .../container/ContainerHeartbeatMonitor.java    |   3 +
 .../apache/samza/container/LocalityManager.java |  94 ++-
 .../apache/samza/container/RunLoopFactory.java  |  25 +-
 .../SamzaContainerExceptionHandler.java         |  57 --
 .../apache/samza/container/TaskContextImpl.java |  31 +-
 .../disk/PollingScanDiskSpaceMonitor.java       |  20 +-
 .../grouper/task/TaskAssignmentManager.java     |  37 +-
 .../container/host/StatisticsMonitorImpl.java   |  20 +-
 .../StreamPartitionCountMonitor.java            |  10 +-
 .../AbstractCoordinatorStreamManager.java       | 132 ----
 .../stream/CoordinatorStreamManager.java        | 170 ++++++
 .../stream/CoordinatorStreamSystemConsumer.java |  32 +-
 .../stream/CoordinatorStreamSystemProducer.java |  37 +-
 .../stream/CoordinatorStreamWriter.java         |   2 +-
 .../samza/execution/ExecutionPlanner.java       |  30 +-
 .../org/apache/samza/execution/JobGraph.java    |  17 +-
 .../samza/execution/JobGraphJsonGenerator.java  |   4 +-
 .../org/apache/samza/execution/JobNode.java     |  64 +-
 .../org/apache/samza/execution/StreamEdge.java  |   1 +
 .../apache/samza/execution/StreamManager.java   |  32 +-
 .../org/apache/samza/job/model/JobModel.java    |   4 +-
 .../samza/operators/MessageStreamImpl.java      |  89 +--
 .../samza/operators/OperatorSpecGraph.java      | 132 ++++
 .../apache/samza/operators/StreamGraphImpl.java | 315 ----------
 .../apache/samza/operators/StreamGraphSpec.java | 299 +++++++++
 .../org/apache/samza/operators/TableImpl.java   |   3 +-
 .../operators/impl/BroadcastOperatorImpl.java   |  84 +++
 .../operators/impl/ControlMessageSender.java    |  43 +-
 .../samza/operators/impl/EndOfStreamStates.java |   6 +-
 .../samza/operators/impl/OperatorImpl.java      |  70 ++-
 .../samza/operators/impl/OperatorImplGraph.java | 110 ++--
 .../operators/impl/OutputOperatorImpl.java      |   5 +-
 .../operators/impl/PartitionByOperatorImpl.java |  16 +-
 .../operators/impl/StreamOperatorImpl.java      |   3 +-
 .../samza/operators/impl/WatermarkStates.java   |  12 +-
 .../operators/impl/WindowOperatorImpl.java      |  23 +-
 .../operators/spec/BroadcastOperatorSpec.java   |  49 ++
 .../operators/spec/FilterOperatorSpec.java      |  74 +++
 .../operators/spec/FlatMapOperatorSpec.java     |  47 ++
 .../samza/operators/spec/InputOperatorSpec.java |  21 +-
 .../samza/operators/spec/JoinOperatorSpec.java  |  23 +-
 .../samza/operators/spec/MapOperatorSpec.java   |  77 +++
 .../samza/operators/spec/MergeOperatorSpec.java |  51 ++
 .../samza/operators/spec/OperatorSpec.java      |  29 +-
 .../samza/operators/spec/OperatorSpecs.java     |  85 +--
 .../operators/spec/OutputOperatorSpec.java      |   6 +
 .../samza/operators/spec/OutputStreamImpl.java  |  17 +-
 .../operators/spec/PartitionByOperatorSpec.java |  29 +-
 .../operators/spec/SendToTableOperatorSpec.java |  15 +-
 .../samza/operators/spec/SinkOperatorSpec.java  |   6 +
 .../operators/spec/StreamOperatorSpec.java      |  14 +-
 .../spec/StreamTableJoinOperatorSpec.java       |   6 +
 .../operators/spec/WindowOperatorSpec.java      |  22 +-
 .../stream/IntermediateMessageStreamImpl.java   |   4 +-
 .../samza/operators/triggers/Cancellable.java   |   2 +-
 .../samza/operators/triggers/TriggerImpl.java   |   6 +-
 .../operators/util/InternalInMemoryStore.java   |   6 +
 .../apache/samza/operators/util/MathUtils.java  |  50 --
 .../apache/samza/processor/StreamProcessor.java | 238 ++++----
 .../runtime/AbstractApplicationRunner.java      |  41 +-
 .../samza/runtime/ApplicationRunnerMain.java    |   9 +
 .../samza/runtime/LocalApplicationRunner.java   |  77 ++-
 .../samza/runtime/LocalContainerRunner.java     |  30 +-
 .../samza/runtime/RemoteApplicationRunner.java  |  78 ++-
 .../serializers/model/SamzaObjectMapper.java    |  17 +-
 .../standalone/PassthroughJobCoordinator.java   |  37 +-
 .../storage/ChangelogPartitionManager.java      |  89 ---
 .../samza/storage/ChangelogStreamManager.java   | 154 +++++
 .../apache/samza/storage/StorageRecovery.java   |  63 +-
 .../org/apache/samza/system/SystemAdmins.java   |  71 +++
 .../samza/system/inmemory/InMemoryManager.java  | 179 ++++++
 .../system/inmemory/InMemorySystemAdmin.java    | 137 +++++
 .../system/inmemory/InMemorySystemConsumer.java | 148 +++++
 .../system/inmemory/InMemorySystemFactory.java  |  50 ++
 .../system/inmemory/InMemorySystemProducer.java | 103 ++++
 .../org/apache/samza/table/TableManager.java    |  53 +-
 .../samza/table/caching/CachingTable.java       | 199 ++++++
 .../table/caching/CachingTableDescriptor.java   | 173 ++++++
 .../table/caching/CachingTableProvider.java     | 138 +++++
 .../caching/CachingTableProviderFactory.java    |  34 ++
 .../samza/table/caching/SupplierGauge.java      |  46 ++
 .../table/caching/guava/GuavaCacheTable.java    | 112 ++++
 .../guava/GuavaCacheTableDescriptor.java        |  76 +++
 .../caching/guava/GuavaCacheTableProvider.java  |  92 +++
 .../guava/GuavaCacheTableProviderFactory.java   |  34 ++
 .../samza/table/remote/CreditFunction.java      |  36 ++
 .../table/remote/RemoteReadWriteTable.java      | 184 ++++++
 .../samza/table/remote/RemoteReadableTable.java | 181 ++++++
 .../table/remote/RemoteTableDescriptor.java     | 194 ++++++
 .../samza/table/remote/RemoteTableProvider.java | 144 +++++
 .../remote/RemoteTableProviderFactory.java      |  38 ++
 .../samza/table/remote/TableReadFunction.java   |  66 ++
 .../samza/table/remote/TableWriteFunction.java  |  86 +++
 .../apache/samza/table/utils/SerdeUtils.java    |  66 ++
 .../org/apache/samza/task/AsyncRunLoop.java     | 151 ++++-
 .../apache/samza/task/StreamOperatorTask.java   |  48 +-
 .../apache/samza/task/SystemTimerScheduler.java | 154 +++++
 .../org/apache/samza/task/TaskCallbackImpl.java |  22 +-
 .../apache/samza/task/TaskCallbackManager.java  |  11 +-
 .../task/TaskCallbackTimeoutException.java      |  42 --
 .../org/apache/samza/task/TaskFactoryUtil.java  |  42 +-
 .../apache/samza/util/ClassLoaderHelper.java    |  48 --
 .../samza/util/EmbeddedTaggedRateLimiter.java   | 136 +++++
 .../java/org/apache/samza/util/MathUtil.java    |  77 +++
 .../samza/util/MetricsReporterLoader.java       |   6 +-
 .../util/SamzaUncaughtExceptionHandler.java     |  69 +++
 .../org/apache/samza/util/ScalaToJavaUtils.java |  41 --
 .../org/apache/samza/util/ShutdownUtil.java     |  74 +++
 .../apache/samza/util/ThrottlingExecutor.java   |   4 +-
 .../apache/samza/util/ThrottlingScheduler.java  |   2 +-
 .../samza/zk/ScheduleAfterDebounceTime.java     |  96 ++-
 .../samza/zk/ZkBarrierForVersionUpgrade.java    | 125 +++-
 .../org/apache/samza/zk/ZkControllerImpl.java   |  22 +-
 .../samza/zk/ZkCoordinationUtilsFactory.java    |   6 +-
 .../org/apache/samza/zk/ZkJobCoordinator.java   | 229 ++++---
 .../samza/zk/ZkJobCoordinatorFactory.java       |   2 +-
 .../org/apache/samza/zk/ZkLeaderElector.java    |   3 +-
 .../main/java/org/apache/samza/zk/ZkUtils.java  | 159 +++--
 .../org/apache/samza/zk/ZkUtilsMetrics.java     |   6 +
 .../samza/checkpoint/CheckpointTool.scala       |  23 +-
 .../apache/samza/checkpoint/OffsetManager.scala |  10 +-
 .../org/apache/samza/config/JobConfig.scala     |  11 +
 .../apache/samza/config/SerializerConfig.scala  |  32 +
 .../org/apache/samza/config/StorageConfig.scala |   7 +-
 .../org/apache/samza/config/StreamConfig.scala  |  14 +-
 .../org/apache/samza/config/SystemConfig.scala  |   7 +
 .../org/apache/samza/config/TaskConfig.scala    |  58 +-
 .../org/apache/samza/container/RunLoop.scala    |   4 +-
 .../apache/samza/container/SamzaContainer.scala | 215 ++++---
 .../samza/container/SamzaContainerMetrics.scala |   2 +
 .../apache/samza/container/TaskInstance.scala   |  42 +-
 .../samza/coordinator/JobModelManager.scala     | 169 +-----
 .../stream/CoordinatorStreamSystemFactory.scala |  50 --
 .../scala/org/apache/samza/job/JobRunner.scala  |  52 +-
 .../org/apache/samza/job/local/ProcessJob.scala | 167 +++---
 .../samza/job/local/ProcessJobFactory.scala     |  44 +-
 .../samza/job/local/ThreadJobFactory.scala      |  49 +-
 .../org/apache/samza/metrics/JmxServer.scala    |   1 +
 .../org/apache/samza/metrics/JvmMetrics.scala   |  24 +-
 .../reporter/MetricsSnapshotReporter.scala      |  31 +-
 .../MetricsSnapshotReporterFactory.scala        |   6 +-
 .../apache/samza/serializers/SerdeManager.scala |   2 -
 .../samza/storage/TaskStorageManager.scala      |  45 +-
 .../samza/system/StreamMetadataCache.scala      |  25 +-
 .../apache/samza/system/SystemConsumers.scala   |  11 +-
 .../system/chooser/BootstrappingChooser.scala   |  38 +-
 .../samza/system/chooser/DefaultChooser.scala   |  12 +-
 .../filereader/FileReaderSystemConsumer.scala   |  27 +-
 .../org/apache/samza/util/CommandLine.scala     |   4 +-
 .../samza/util/CoordinatorStreamUtil.scala      |  94 +++
 .../apache/samza/util/DaemonThreadFactory.scala |  39 --
 .../samza/util/ExponentialSleepStrategy.scala   |  17 +-
 .../scala/org/apache/samza/util/FileUtil.scala  | 104 ++++
 .../scala/org/apache/samza/util/HttpUtil.scala  |  89 +++
 .../samza/util/LexicographicComparator.scala    |  39 --
 .../org/apache/samza/util/ScalaJavaUtil.scala   |  62 ++
 .../scala/org/apache/samza/util/TimerUtil.scala |  56 ++
 .../org/apache/samza/util/TimerUtils.scala      |  56 --
 .../main/scala/org/apache/samza/util/Util.scala | 352 ++---------
 .../MockClusterResourceManager.java             |  39 +-
 .../MockHostAwareContainerAllocator.java        |  68 +++
 .../TestClusterBasedJobCoordinator.java         |   7 +-
 .../TestContainerProcessManager.java            | 138 +++--
 .../TestHostAwareContainerAllocator.java        | 164 ++++-
 .../samza/container/TestLocalityManager.java    |  26 +-
 .../TestSamzaContainerExceptionHandler.java     |  39 --
 .../TestSamzaUncaughtExceptionHandler.java      |  40 ++
 .../grouper/task/TestTaskAssignmentManager.java |  25 +-
 .../MockCoordinatorStreamSystemFactory.java     |   6 +-
 .../apache/samza/example/BroadcastExample.java  |  71 ---
 .../samza/example/KeyValueStoreExample.java     | 131 ----
 .../org/apache/samza/example/MergeExample.java  |  60 --
 .../samza/example/OrderShipmentJoinExample.java | 115 ----
 .../samza/example/PageViewCounterExample.java   |  95 ---
 .../samza/example/RepartitionExample.java       |  90 ---
 .../org/apache/samza/example/WindowExample.java |  81 ---
 .../samza/execution/TestExecutionPlanner.java   | 150 ++---
 .../apache/samza/execution/TestJobGraph.java    |  68 +--
 .../execution/TestJobGraphJsonGenerator.java    |  59 +-
 .../org/apache/samza/execution/TestJobNode.java |  14 +-
 .../apache/samza/execution/TestStreamEdge.java  |   4 +-
 .../samza/execution/TestStreamManager.java      |  24 +-
 .../samza/operators/TestJoinOperator.java       | 152 ++---
 .../samza/operators/TestMessageStreamImpl.java  |  55 +-
 .../samza/operators/TestOperatorSpecGraph.java  | 185 ++++++
 .../samza/operators/TestStreamGraphImpl.java    | 601 -------------------
 .../samza/operators/TestStreamGraphSpec.java    | 601 +++++++++++++++++++
 .../data/TestOutputMessageEnvelope.java         |  14 +
 .../impl/TestControlMessageSender.java          |  32 +-
 .../samza/operators/impl/TestOperatorImpl.java  |   6 +
 .../operators/impl/TestOperatorImplGraph.java   | 298 ++++++---
 .../operators/impl/TestStreamOperatorImpl.java  |   4 +-
 .../operators/impl/TestWindowOperator.java      | 263 ++++----
 .../operators/impl/store/TestInMemoryStore.java |  16 +
 .../operators/spec/OperatorSpecTestUtils.java   | 141 +++++
 .../samza/operators/spec/TestOperatorSpec.java  | 465 ++++++++++++++
 .../spec/TestPartitionByOperatorSpec.java       | 165 +++++
 .../operators/spec/TestWindowOperatorSpec.java  | 306 +++++++++-
 .../runtime/TestAbstractApplicationRunner.java  |  36 +-
 .../runtime/TestApplicationRunnerMain.java      |   2 +
 .../runtime/TestLocalApplicationRunner.java     |  46 +-
 .../runtime/TestRemoteApplicationRunner.java    |  53 ++
 .../model/TestSamzaObjectMapper.java            |  33 +-
 .../system/inmemory/TestInMemorySystem.java     | 211 +++++++
 .../apache/samza/table/TestTableManager.java    |  14 +-
 .../samza/table/caching/TestCachingTable.java   | 299 +++++++++
 .../table/remote/TestRemoteTableDescriptor.java | 244 ++++++++
 .../apache/samza/task/IdentityStreamTask.java   |  55 ++
 .../org/apache/samza/task/TestAsyncRunLoop.java | 333 +++++-----
 .../samza/task/TestSystemTimerScheduler.java    | 176 ++++++
 .../apache/samza/task/TestTaskFactoryUtil.java  |  64 +-
 .../testUtils/InvalidStreamApplication.java     |  25 -
 .../util/TestEmbeddedTaggedRateLimiter.java     | 230 +++++++
 .../org/apache/samza/util/TestMathUtils.java    |  43 +-
 .../org/apache/samza/util/TestShutdownUtil.java |  63 ++
 .../samza/zk/TestScheduleAfterDebounceTime.java |  59 +-
 .../zk/TestZkBarrierForVersionUpgrade.java      | 280 ++++-----
 .../apache/samza/zk/TestZkJobCoordinator.java   |  39 +-
 .../apache/samza/zk/TestZkLeaderElector.java    |   2 +-
 .../apache/samza/zk/TestZkProcessorLatch.java   |   6 +-
 .../java/org/apache/samza/zk/TestZkUtils.java   | 119 +++-
 .../samza/checkpoint/TestOffsetManager.scala    |  28 +-
 .../samza/config/TestSerializerConfig.scala     |  57 ++
 .../samza/container/TestSamzaContainer.scala    |  29 +-
 .../samza/container/TestTaskInstance.scala      |   4 +-
 .../samza/coordinator/TestJobCoordinator.scala  |  77 ++-
 .../TestStreamPartitionCountMonitor.scala       |  14 +-
 .../coordinator/server/TestHttpServer.scala     |   7 +-
 .../apache/samza/job/local/TestProcessJob.scala | 133 +++-
 .../processor/StreamProcessorTestUtils.scala    |   6 +-
 .../samza/serializers/TestSerdeManager.scala    |   3 +
 .../samza/storage/TestTaskStorageManager.scala  |  46 +-
 .../samza/system/TestStreamMetadataCache.scala  | 101 ++--
 .../chooser/TestBootstrappingChooser.scala      |  94 ++-
 .../system/chooser/TestDefaultChooser.scala     |  17 +-
 .../samza/util/TestDaemonThreadFactory.scala    |  37 --
 .../org/apache/samza/util/TestFileUtil.scala    |  84 +++
 .../scala/org/apache/samza/util/TestUtil.scala  | 100 +--
 .../ElasticsearchSystemFactory.java             |   2 +-
 .../samza/system/hdfs/HdfsSystemAdmin.java      |   2 +-
 .../samza/system/hdfs/HdfsSystemConsumer.java   |   2 +-
 .../hdfs/partitioner/HdfsFileSystemAdapter.java |   7 +-
 .../apache/samza/system/hdfs/HdfsConfig.scala   |   4 +-
 .../samza/system/hdfs/HdfsSystemProducer.scala  |   4 +-
 .../hdfs/writer/AvroDataFileHdfsWriter.scala    |  16 +-
 .../system/hdfs/TestHdfsSystemConsumer.java     |   2 +-
 .../partitioner/TestHdfsFileSystemAdapter.java  |   2 +-
 .../resources/partitioner/subfolder/testfile002 |  16 +
 .../kafka/KafkaCheckpointLogKeySerde.java       |   5 +
 .../samza/system/kafka/KafkaStreamSpec.java     |  18 +-
 .../kafka/KafkaCheckpointManager.scala          |  91 ++-
 .../kafka/KafkaCheckpointManagerFactory.scala   |   2 +-
 .../org/apache/samza/config/KafkaConfig.scala   |  24 +-
 .../apache/samza/system/kafka/BrokerProxy.scala |  25 +-
 .../apache/samza/system/kafka/GetOffset.scala   |   4 +-
 .../samza/system/kafka/KafkaSystemAdmin.scala   |  72 ++-
 .../system/kafka/KafkaSystemConsumer.scala      |   2 +
 .../samza/system/kafka/KafkaSystemFactory.scala |  10 +-
 .../system/kafka/KafkaSystemProducer.scala      |   4 +-
 .../samza/system/kafka/TopicMetadataCache.scala |   2 +-
 .../scala/org/apache/samza/util/KafkaUtil.scala |   8 +-
 .../kafka/TestKafkaCheckpointManagerJava.java   |  45 +-
 .../samza/system/kafka/MockKafkaProducer.java   |  25 +-
 .../samza/system/kafka/TestKafkaStreamSpec.java |   3 +-
 .../system/kafka/TestKafkaSystemAdminJava.java  |  44 +-
 .../kafka/TestKafkaCheckpointManager.scala      |  38 +-
 .../apache/samza/config/TestKafkaConfig.scala   |  10 +-
 .../samza/system/kafka/TestBrokerProxy.scala    |   9 +-
 .../system/kafka/TestKafkaSystemAdmin.scala     |  54 +-
 .../system/kafka/TestKafkaSystemConsumer.scala  |   4 +-
 .../system/kafka/TestTopicMetadataCache.scala   |  32 +-
 .../org/apache/samza/utils/TestKafkaUtil.scala  |   7 +-
 .../kv/inmemory/InMemoryKeyValueStore.scala     |  14 +-
 .../kv/inmemory/TestInMemoryKeyValueStore.java  |  84 +++
 .../samza/storage/kv/RocksDbKeyValueReader.java |   5 +-
 .../samza/storage/kv/RocksDbOptionsHelper.java  |  12 +-
 .../RocksDbKeyValueStorageEngineFactory.scala   |   3 +
 .../samza/storage/kv/RocksDbKeyValueStore.scala |  76 ++-
 .../kv/TestRocksDbKeyValueStoreJava.java        | 140 +++++
 .../storage/kv/TestRocksDbKeyValueStore.scala   |   6 +-
 .../kv/BaseLocalStoreBackedTableProvider.java   |  54 +-
 .../kv/LocalStoreBackedReadWriteTable.java      |  10 +-
 .../kv/LocalStoreBackedReadableTable.java       |   8 +-
 .../samza/storage/kv/AccessLoggedStore.scala    |   9 +-
 .../kv/BaseKeyValueStorageEngineFactory.scala   |   8 +-
 .../apache/samza/storage/kv/CachedStore.scala   |   4 +
 .../storage/kv/KeyValueStorageEngine.scala      |  52 +-
 .../kv/KeyValueStorageEngineMetrics.scala       |  21 +-
 .../samza/storage/kv/KeyValueStoreMetrics.scala |   5 +-
 .../apache/samza/storage/kv/LoggedStore.scala   |   3 +
 .../storage/kv/NullSafeKeyValueStore.scala      |  14 +-
 .../storage/kv/SerializedKeyValueStore.scala    |  15 +
 .../TestLocalBaseStoreBackedTableProvider.java  |   6 +-
 .../samza/storage/kv/MockKeyValueStore.scala    |   4 +
 .../storage/kv/TestKeyValueStorageEngine.scala  |   6 +-
 .../samza/logging/log4j/StreamAppender.java     |  69 ++-
 .../samza/logging/log4j/MockSystemAdmin.java    |  74 +++
 .../samza/logging/log4j/MockSystemFactory.java  |   2 +-
 .../samza/logging/log4j/TestStreamAppender.java |  57 ++
 .../samza/monitor/LocalStoreMonitorConfig.java  |   4 +-
 .../org/apache/samza/monitor/MonitorConfig.java |   8 +
 .../org/apache/samza/monitor/MonitorLoader.java |   4 +-
 .../samza/monitor/SamzaMonitorService.java      |   8 +-
 .../apache/samza/rest/SamzaRestApplication.java |   4 +-
 .../samza/rest/proxy/job/AbstractJobProxy.java  |   8 +-
 .../rest/proxy/job/SimpleYarnJobProxy.java      |   5 +-
 .../samza/rest/proxy/task/SamzaTaskProxy.java   |  14 +-
 .../rest/proxy/task/SamzaTaskProxyFactory.java  |   7 +-
 .../samza/rest/resources/TasksResource.java     |   9 +-
 .../samza/monitor/TestLocalStoreMonitor.java    |  84 +--
 .../apache/samza/sql/avro/AvroRelConverter.java | 239 +++++---
 .../samza/sql/avro/AvroTypeFactoryImpl.java     |   9 +-
 .../samza/sql/data/SamzaSqlCompositeKey.java    |  82 +++
 .../sql/data/SamzaSqlExecutionContext.java      |  24 +-
 .../samza/sql/data/SamzaSqlRelMessage.java      | 175 ++++--
 .../org/apache/samza/sql/fn/FlattenUdf.java     |   2 +-
 .../org/apache/samza/sql/fn/RegexMatchUdf.java  |  39 ++
 .../sql/impl/ConfigBasedIOResolverFactory.java  | 125 ++++
 .../impl/ConfigBasedSourceResolverFactory.java  |  71 ---
 .../samza/sql/impl/ConfigBasedUdfResolver.java  |  13 +-
 .../interfaces/RelSchemaProviderFactory.java    |   3 +-
 .../interfaces/SamzaRelConverterFactory.java    |   7 +-
 .../samza/sql/interfaces/SourceResolver.java    |  34 --
 .../sql/interfaces/SourceResolverFactory.java   |  36 --
 .../samza/sql/interfaces/SqlIOConfig.java       | 136 +++++
 .../samza/sql/interfaces/SqlIOResolver.java     |  45 ++
 .../sql/interfaces/SqlIOResolverFactory.java    |  36 ++
 .../sql/interfaces/SqlSystemStreamConfig.java   |  74 ---
 .../apache/samza/sql/planner/QueryPlanner.java  |  45 +-
 .../sql/planner/SamzaSqlScalarFunctionImpl.java |   7 +-
 .../sql/runner/SamzaSqlApplicationConfig.java   |  83 +--
 .../sql/runner/SamzaSqlApplicationRunner.java   |  23 +-
 .../SamzaSqlRelMessageSerdeFactory.java         |  67 +++
 .../SamzaSqlRelRecordSerdeFactory.java          |  67 +++
 .../samza/sql/testutil/SamzaSqlQueryParser.java |  67 +--
 .../samza/sql/translator/FilterTranslator.java  |  47 +-
 .../samza/sql/translator/JoinTranslator.java    | 294 +++++++++
 .../translator/LogicalAggregateTranslator.java  | 102 ++++
 .../samza/sql/translator/ProjectTranslator.java |  71 ++-
 .../samza/sql/translator/QueryTranslator.java   | 103 +++-
 .../SamzaSqlRelMessageJoinFunction.java         | 121 ++++
 .../samza/sql/translator/ScanTranslator.java    |  52 +-
 .../samza/sql/translator/TranslatorContext.java |  79 ++-
 .../apache/samza/sql/TestQueryTranslator.java   | 103 ----
 .../sql/TestSamzaSqlApplicationConfig.java      |  92 ---
 .../samza/sql/TestSamzaSqlFileParser.java       |  58 --
 .../samza/sql/TestSamzaSqlQueryParser.java      |  70 ---
 .../samza/sql/TestSamzaSqlRelMessage.java       |  46 --
 .../samza/sql/TestSamzaSqlRelMessageSerde.java  | 102 ++++
 .../samza/sql/TestSamzaSqlRelRecordSerde.java   |  86 +++
 .../samza/sql/avro/TestAvroRelConversion.java   | 132 +++-
 .../samza/sql/avro/schemas/AddressRecord.java   |  52 ++
 .../apache/samza/sql/avro/schemas/Company.avsc  |  39 ++
 .../apache/samza/sql/avro/schemas/Company.java  |  52 ++
 .../sql/avro/schemas/EnrichedPageView.avsc      |  81 +++
 .../sql/avro/schemas/EnrichedPageView.java      |  60 ++
 .../org/apache/samza/sql/avro/schemas/Kind.java |  30 +
 .../apache/samza/sql/avro/schemas/PageView.avsc |  39 ++
 .../apache/samza/sql/avro/schemas/PageView.java |  52 ++
 .../samza/sql/avro/schemas/PageViewCount.avsc   |  45 ++
 .../samza/sql/avro/schemas/PageViewCount.java   |  56 ++
 .../samza/sql/avro/schemas/PhoneNumber.java     |  50 ++
 .../apache/samza/sql/avro/schemas/Profile.avsc  | 149 +++++
 .../apache/samza/sql/avro/schemas/Profile.java  |  72 +++
 .../samza/sql/avro/schemas/SimpleRecord.avsc    |   2 +-
 .../samza/sql/avro/schemas/SimpleRecord.java    |   2 +-
 .../samza/sql/avro/schemas/StreetNumRecord.java |  48 ++
 .../samza/sql/data/TestSamzaSqlRelMessage.java  |  46 ++
 .../samza/sql/e2e/TestSamzaSqlEndToEnd.java     | 137 -----
 .../apache/samza/sql/e2e/TestSamzaSqlTable.java |  69 +++
 .../runner/TestSamzaSqlApplicationConfig.java   |  95 +++
 .../runner/TestSamzaSqlApplicationRunner.java   |  56 ++
 .../samza/sql/system/SimpleSystemAdmin.java     |  11 +-
 .../samza/sql/system/TestAvroSystemFactory.java | 186 +++++-
 .../samza/sql/testutil/MyTestArrayUdf.java      |   5 +-
 .../apache/samza/sql/testutil/MyTestUdf.java    |   2 +-
 .../samza/sql/testutil/SamzaSqlTestConfig.java  |  97 ++-
 .../sql/testutil/TestIOResolverFactory.java     | 196 ++++++
 .../sql/testutil/TestSamzaSqlFileParser.java    |  58 ++
 .../sql/testutil/TestSamzaSqlQueryParser.java   |  75 +++
 .../sql/translator/TestFilterTranslator.java    | 136 +++++
 .../sql/translator/TestJoinTranslator.java      | 191 ++++++
 .../sql/translator/TestProjectTranslator.java   | 289 +++++++++
 .../sql/translator/TestQueryTranslator.java     | 596 ++++++++++++++++++
 .../TestSamzaSqlRelMessageJoinFunction.java     | 118 ++++
 .../sql/translator/TranslatorTestBase.java      |  72 +++
 samza-sql/src/test/resources/log4j.xml          |   6 +
 samza-test/src/main/config/join/README          |   8 +-
 .../example/AppWithGlobalConfigExample.java     |  86 +++
 .../apache/samza/example/BroadcastExample.java  |  70 +++
 .../samza/example/KeyValueStoreExample.java     | 138 +++++
 .../org/apache/samza/example/MergeExample.java  |  62 ++
 .../samza/example/OrderShipmentJoinExample.java | 121 ++++
 .../samza/example/PageViewCounterExample.java   | 100 +++
 .../samza/example/RepartitionExample.java       |  96 +++
 .../org/apache/samza/example/WindowExample.java |  86 +++
 .../samza/test/framework/StreamAssert.java      | 181 ++++++
 samza-test/src/main/python/configs/tests.json   |   2 +-
 .../performance/TestKeyValuePerformance.scala   |   6 +-
 .../samza/processor/TestZkStreamProcessor.java  |  11 +-
 .../processor/TestZkStreamProcessorBase.java    |   8 +-
 .../TestZkStreamProcessorFailures.java          |   8 +-
 .../EndOfStreamIntegrationTest.java             |   8 +-
 .../WatermarkIntegrationTest.java               |   7 +-
 .../samza/test/operator/BroadcastAssertApp.java |  59 ++
 .../test/operator/RepartitionJoinWindowApp.java |  80 ++-
 .../test/operator/RepartitionWindowApp.java     |  72 +++
 .../samza/test/operator/SessionWindowApp.java   |  21 +-
 ...StreamApplicationIntegrationTestHarness.java |  21 +-
 .../operator/TestRepartitionJoinWindowApp.java  | 120 +++-
 .../test/operator/TestRepartitionWindowApp.java |  90 +++
 .../samza/test/operator/TumblingWindowApp.java  |  20 +-
 .../samza/test/operator/data/PageView.java      |  63 +-
 .../test/processor/SharedContextFactories.java  | 117 ++++
 .../test/processor/TestStreamApplication.java   | 148 +++++
 .../test/processor/TestStreamProcessor.java     |   9 +-
 .../processor/TestZkLocalApplicationRunner.java | 363 +++++------
 .../test/samzasql/TestSamzaSqlEndToEnd.java     | 469 +++++++++++++++
 .../apache/samza/test/table/TestLocalTable.java | 257 +++++---
 .../samza/test/table/TestRemoteTable.java       | 248 ++++++++
 .../apache/samza/test/timer/TestTimerApp.java   |  87 +++
 .../org/apache/samza/test/timer/TimerTest.java  |  51 ++
 .../samza/storage/kv/TestKeyValueStores.scala   | 240 ++++----
 .../AbstractIntegrationTestHarness.scala        |   8 +-
 .../AbstractKafkaServerTestHarness.scala        |  11 +-
 .../harness/AbstractZookeeperTestHarness.scala  |  10 +-
 .../test/integration/StreamTaskTestUtil.scala   |  39 +-
 .../integration/TestShutdownStatefulTask.scala  |   2 +-
 .../test/integration/TestStatefulTask.scala     |   4 +-
 samza-tools/config/bench-log4j.xml              |  35 ++
 samza-tools/config/eh-bench.properties          |  26 +
 samza-tools/scripts/eh-consumer.sh              |   2 +-
 samza-tools/scripts/generate-kafka-events.sh    |   2 +-
 samza-tools/scripts/samza-sql-console.sh        |   2 +-
 samza-tools/scripts/system-consumer-bench.sh    |  34 ++
 .../scripts/system-consumer-with-samza-bench.sh |  34 ++
 samza-tools/scripts/system-producer-bench.sh    |  34 ++
 .../tools/ConsoleLoggingSystemFactory.java      |  27 +-
 .../samza/tools/EventHubConsoleConsumer.java    |  64 +-
 .../apache/samza/tools/GenerateKafkaEvents.java |   4 +-
 .../org/apache/samza/tools/SamzaSqlConsole.java |  40 +-
 .../tools/avro/AvroSchemaGenRelConverter.java   |   4 +-
 .../tools/benchmark/AbstractSamzaBench.java     | 153 +++++
 .../benchmark/ConfigBasedSspGrouperFactory.java |  87 +++
 .../tools/benchmark/SystemConsumerBench.java    |  91 +++
 .../benchmark/SystemConsumerWithSamzaBench.java | 117 ++++
 .../tools/benchmark/SystemProducerBench.java    | 124 ++++
 .../tools/json/JsonRelConverterFactory.java     |   4 +-
 .../apache/samza/tools/udf/RegexMatchUdf.java   |  40 --
 .../job/yarn/YarnClusterResourceManager.java    |  38 +-
 .../samza/validation/YarnJobValidationTool.java |  15 +-
 .../webapp/ApplicationMasterRestClient.java     | 111 ++++
 .../apache/samza/job/yarn/ClientHelper.scala    |  54 +-
 .../yarn/SamzaAppMasterSecurityManager.scala    |  31 +-
 .../yarn/SamzaContainerSecurityManager.scala    |  34 +-
 .../apache/samza/job/yarn/YarnContainer.scala   |   4 +-
 .../org/apache/samza/job/yarn/YarnJob.scala     |  14 +-
 .../webapp/ApplicationMasterRestServlet.scala   |  76 ++-
 .../webapp/TestApplicationMasterRestClient.java | 330 ++++++++++
 .../TestYarnContainerHeartbeatServlet.java      |   8 +-
 .../samza/job/yarn/TestClientHelper.scala       |  36 +-
 .../yarn/TestSamzaYarnAppMasterService.scala    |  16 +-
 settings.gradle                                 |   4 +-
 sonar-project.properties                        |   6 +-
 565 files changed, 25625 insertions(+), 8630 deletions(-)
----------------------------------------------------------------------



[14/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/7887d884
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/7887d884
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/7887d884

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 7887d884e8a062563a7e4b8b418d817828f51f23
Parents: 88f8559 a8ddede
Author: Boris S <bo...@apache.org>
Authored: Sun Aug 12 23:48:39 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Sun Aug 12 23:48:39 2018 -0700

----------------------------------------------------------------------
 .../java/org/apache/samza/config/Config.java    |   3 +
 .../org/apache/samza/metrics/ListGauge.java     |   3 +-
 .../org/apache/samza/runtime/LocationId.java    |  60 +++
 .../samza/runtime/LocationIdProvider.java       |  28 ++
 .../runtime/LocationIdProviderFactory.java      |  28 ++
 .../org/apache/samza/table/ReadWriteTable.java  |  43 +-
 .../org/apache/samza/table/ReadableTable.java   |  19 +
 .../org/apache/samza/config/TestConfig.java     |  41 ++
 .../DefaultLocationIdProviderFactory.java       |  32 ++
 .../samza/runtime/RemoteApplicationRunner.java  |  69 +---
 .../storage/TaskSideInputStorageManager.java    |  38 +-
 .../samza/system/inmemory/InMemoryManager.java  |  10 +-
 .../system/inmemory/InMemorySystemAdmin.java    |   6 +-
 .../system/inmemory/InMemorySystemFactory.java  |   2 +-
 .../samza/table/caching/CachingTable.java       | 237 ++++++++---
 .../table/caching/CachingTableDescriptor.java   |  14 -
 .../table/caching/CachingTableProvider.java     |   4 +-
 .../table/caching/guava/GuavaCacheTable.java    | 140 ++++++-
 .../samza/table/remote/CreditFunction.java      |  36 --
 .../table/remote/RemoteReadWriteTable.java      | 157 ++++---
 .../samza/table/remote/RemoteReadableTable.java | 243 ++++++++---
 .../table/remote/RemoteTableDescriptor.java     |  47 ++-
 .../samza/table/remote/RemoteTableProvider.java |  73 +++-
 .../samza/table/remote/TableRateLimiter.java    | 167 ++++++++
 .../samza/table/remote/TableReadFunction.java   |  54 ++-
 .../samza/table/remote/TableWriteFunction.java  |  86 +++-
 .../table/utils/DefaultTableReadMetrics.java    |   2 +
 .../table/utils/DefaultTableWriteMetrics.java   |   4 +
 .../org/apache/samza/config/JobConfig.scala     |   5 +
 .../MetricsSnapshotReporterFactory.scala        |   4 +-
 .../runtime/TestRemoteApplicationRunner.java    |  70 ++++
 .../TestTaskSideInputStorageManager.java        | 295 +++++++++++++
 .../samza/table/caching/TestCachingTable.java   | 275 +++++++-----
 .../samza/table/remote/TestRemoteTable.java     | 413 +++++++++++++++++++
 .../table/remote/TestRemoteTableDescriptor.java |  55 +--
 .../table/remote/TestTableRateLimiter.java      | 103 +++++
 .../samza/storage/kv/RocksDbTableProvider.java  |   4 +
 .../kv/LocalStoreBackedReadWriteTable.java      |  49 +++
 .../kv/LocalStoreBackedReadableTable.java       |  23 ++
 .../sql/impl/ConfigBasedIOResolverFactory.java  |   4 +-
 .../sql/interfaces/SqlIOResolverFactory.java    |   5 +-
 .../sql/runner/SamzaSqlApplicationConfig.java   |   2 +-
 .../sql/testutil/TestIOResolverFactory.java     |  34 +-
 .../apache/samza/test/table/TestLocalTable.java |  61 +++
 .../table/TestLocalTableWithSideInputs.java     | 161 ++++++++
 .../samza/test/table/TestRemoteTable.java       |  40 +-
 .../apache/samza/test/table/TestTableData.java  |  22 +-
 .../table/TestTableDescriptorsProvider.java     |   3 +-
 48 files changed, 2722 insertions(+), 552 deletions(-)
----------------------------------------------------------------------



[38/47] samza git commit: debug

Posted by bo...@apache.org.
debug


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/ddada94d
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/ddada94d
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/ddada94d

Branch: refs/heads/NewKafkaSystemConsumer
Commit: ddada94d09a8ac78ec7a88eff9dc77cd39dba32d
Parents: 2655221
Author: Boris S <bo...@apache.org>
Authored: Mon Sep 10 16:28:12 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Mon Sep 10 16:28:12 2018 -0700

----------------------------------------------------------------------
 .../org/apache/samza/system/kafka/KafkaConsumerProxy.java     | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/ddada94d/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 0825c90..92f9183 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -100,7 +100,7 @@ public class KafkaConsumerProxy<K, V> {
       consumerPollThread.start();
 
       // we need to wait until the thread starts
-      while (!isRunning) {
+      while (!isRunning && failureCause == null) {
         try {
           consumerPollThreadStartLatch.await(3000, TimeUnit.MILLISECONDS);
         } catch (InterruptedException e) {
@@ -378,9 +378,8 @@ public class KafkaConsumerProxy<K, V> {
       kafkaConsumerMetrics.incClientReads(metricName);
 
       Map<SystemStreamPartition, List<IncomingMessageEnvelope>> response;
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("pollConsumer from following SSPs: {}; total#={}", SSPsToFetch, SSPsToFetch.size());
-      }
+      LOG.debug("pollConsumer from following SSPs: {}; total#={}", SSPsToFetch, SSPsToFetch.size());
+
       response = pollConsumer(SSPsToFetch, 500); // TODO should be default value from ConsumerConfig
 
       // move the responses into the queue


[29/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/add733b8
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/add733b8
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/add733b8

Branch: refs/heads/NewKafkaSystemConsumer
Commit: add733b85f78046badd9af36ebf533d19388151c
Parents: 8ab04b2 b0b2922
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 4 17:23:06 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 4 17:23:06 2018 -0700

----------------------------------------------------------------------
 .../org/apache/samza/execution/JobNode.java     |  5 +++++
 .../runtime/AbstractApplicationRunner.java      | 17 ++++++++-------
 .../sql/runner/SamzaSqlApplicationConfig.java   |  6 ++++--
 .../runner/TestSamzaSqlApplicationConfig.java   | 22 +++++++++++++++++++-
 .../sql/testutil/TestSamzaSqlFileParser.java    |  1 -
 .../table/TestLocalTableWithSideInputs.java     |  3 ++-
 6 files changed, 41 insertions(+), 13 deletions(-)
----------------------------------------------------------------------



[35/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/728dc181
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/728dc181
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/728dc181

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 728dc18143618b80df6e74a373c0024ced34544b
Parents: add733b abf49ea
Author: Boris S <bo...@apache.org>
Authored: Fri Sep 7 15:17:47 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Sep 7 15:17:47 2018 -0700

----------------------------------------------------------------------
 .../application/ApplicationDescriptor.java      |  80 +++
 .../samza/application/SamzaApplication.java     |  40 ++
 .../samza/application/StreamApplication.java    |  75 +--
 .../StreamApplicationDescriptor.java            | 113 ++++
 .../samza/application/TaskApplication.java      |  86 +++
 .../application/TaskApplicationDescriptor.java  |  64 ++
 .../java/org/apache/samza/config/Config.java    |   3 +-
 .../samza/metrics/MetricsReporterFactory.java   |   5 +-
 .../apache/samza/operators/MessageStream.java   |   9 +-
 .../org/apache/samza/operators/StreamGraph.java | 120 ----
 .../operators/functions/ClosableFunction.java   |   7 +-
 .../operators/functions/InitableFunction.java   |   6 +-
 .../operators/functions/StreamExpander.java     |  16 +-
 .../apache/samza/runtime/ApplicationRunner.java |  92 +--
 .../samza/runtime/ApplicationRunners.java       |  82 +++
 .../apache/samza/runtime/ProcessorContext.java  |  31 +
 .../runtime/ProcessorLifecycleListener.java     |  55 ++
 .../ProcessorLifecycleListenerFactory.java      |  40 ++
 .../samza/task/AsyncStreamTaskFactory.java      |  10 +-
 .../apache/samza/task/StreamTaskFactory.java    |   6 +-
 .../java/org/apache/samza/task/TaskFactory.java |  38 ++
 .../samza/runtime/TestApplicationRunners.java   |  88 +++
 .../application/ApplicationDescriptorImpl.java  | 179 ++++++
 .../application/ApplicationDescriptorUtil.java  |  51 ++
 .../samza/application/ApplicationUtil.java      |  63 ++
 .../application/LegacyTaskApplication.java      |  37 ++
 .../StreamApplicationDescriptorImpl.java        | 381 ++++++++++++
 .../TaskApplicationDescriptorImpl.java          | 129 ++++
 .../samza/container/SamzaContainerListener.java |  22 +-
 .../samza/execution/ExecutionPlanner.java       |   7 +-
 .../org/apache/samza/execution/JobGraph.java    |   6 -
 .../org/apache/samza/execution/JobPlanner.java  | 188 ++++++
 .../apache/samza/execution/LocalJobPlanner.java | 134 +++++
 .../samza/execution/RemoteJobPlanner.java       |  96 +++
 .../samza/operators/MessageStreamImpl.java      |  57 +-
 .../samza/operators/OperatorSpecGraph.java      |  26 +-
 .../apache/samza/operators/StreamGraphSpec.java | 336 -----------
 .../samza/operators/spec/OperatorSpec.java      |   2 +-
 .../stream/IntermediateMessageStreamImpl.java   |   6 +-
 .../apache/samza/processor/StreamProcessor.java | 122 ++--
 .../StreamProcessorLifecycleListener.java       |  49 --
 .../runtime/AbstractApplicationRunner.java      | 135 -----
 .../samza/runtime/ApplicationRunnerMain.java    |  42 +-
 .../samza/runtime/LocalApplicationRunner.java   | 355 ++++-------
 .../samza/runtime/LocalContainerRunner.java     |  56 +-
 .../samza/runtime/RemoteApplicationRunner.java  | 123 ++--
 .../apache/samza/task/StreamOperatorTask.java   |   5 +-
 .../org/apache/samza/task/TaskFactoryUtil.java  | 137 ++---
 .../apache/samza/container/SamzaContainer.scala |  16 +-
 .../scala/org/apache/samza/job/JobRunner.scala  |   2 -
 .../samza/job/local/ThreadJobFactory.scala      |  48 +-
 .../application/MockStreamApplication.java      |  29 +
 .../samza/application/TestApplicationUtil.java  |  96 +++
 .../TestStreamApplicationDescriptorImpl.java    | 584 +++++++++++++++++++
 .../TestTaskApplicationDescriptorImpl.java      | 144 +++++
 .../samza/execution/TestExecutionPlanner.java   | 192 +++---
 .../execution/TestJobGraphJsonGenerator.java    | 120 ++--
 .../org/apache/samza/execution/TestJobNode.java |  53 +-
 .../samza/execution/TestLocalJobPlanner.java    | 211 +++++++
 .../samza/execution/TestRemoteJobPlanner.java   |  88 +++
 .../samza/operators/TestJoinOperator.java       | 103 ++--
 .../samza/operators/TestMessageStreamImpl.java  |  29 +-
 .../samza/operators/TestOperatorSpecGraph.java  |  19 +-
 .../samza/operators/TestStreamGraphSpec.java    | 506 ----------------
 .../operators/impl/TestOperatorImplGraph.java   | 190 +++---
 .../operators/impl/TestWindowOperator.java      | 147 ++---
 .../spec/TestPartitionByOperatorSpec.java       |  70 ++-
 .../samza/processor/TestStreamProcessor.java    | 139 +++--
 .../runtime/TestApplicationRunnerMain.java      |  47 +-
 .../runtime/TestLocalApplicationRunner.java     | 311 +++-------
 .../runtime/TestRemoteApplicationRunner.java    |  35 +-
 .../apache/samza/task/MockAsyncStreamTask.java  |  31 +
 .../org/apache/samza/task/MockStreamTask.java   |  31 +
 .../apache/samza/task/TestTaskFactoryUtil.java  | 215 ++-----
 .../samza/testUtils/TestAsyncStreamTask.java    |  35 --
 .../samza/testUtils/TestStreamApplication.java  |  33 --
 .../apache/samza/testUtils/TestStreamTask.java  |  34 --
 .../samza/container/TestSamzaContainer.scala    |  76 ++-
 .../samza/sql/runner/SamzaSqlApplication.java   |  13 +-
 .../sql/runner/SamzaSqlApplicationRunner.java   |  53 +-
 .../samza/sql/translator/JoinTranslator.java    |   2 +-
 .../samza/sql/translator/QueryTranslator.java   |  27 +-
 .../samza/sql/translator/ScanTranslator.java    |   8 +-
 .../samza/sql/translator/TranslatorContext.java |  19 +-
 .../apache/samza/sql/e2e/TestSamzaSqlTable.java |   8 +-
 .../runner/TestSamzaSqlApplicationRunner.java   |   2 -
 .../sql/translator/TestFilterTranslator.java    |   6 +-
 .../sql/translator/TestJoinTranslator.java      |  16 +-
 .../sql/translator/TestProjectTranslator.java   |  14 +-
 .../sql/translator/TestQueryTranslator.java     | 162 +++--
 .../example/AppWithGlobalConfigExample.java     |  25 +-
 .../apache/samza/example/BroadcastExample.java  |  22 +-
 .../samza/example/KeyValueStoreExample.java     |  19 +-
 .../org/apache/samza/example/MergeExample.java  |  18 +-
 .../samza/example/OrderShipmentJoinExample.java |  19 +-
 .../samza/example/PageViewCounterExample.java   |  15 +-
 .../samza/example/RepartitionExample.java       |  19 +-
 .../samza/example/TaskApplicationExample.java   |  77 +++
 .../org/apache/samza/example/WindowExample.java |  18 +-
 .../samza/system/mock/MockSystemConsumer.java   |   4 +-
 .../apache/samza/test/framework/TestRunner.java |  41 +-
 .../integration/LocalApplicationRunnerMain.java |  21 +-
 .../TestStandaloneIntegrationApplication.java   |   9 +-
 .../processor/TestZkStreamProcessorBase.java    |  20 +-
 .../EndOfStreamIntegrationTest.java             |  37 +-
 .../WatermarkIntegrationTest.java               |  62 +-
 .../test/framework/BroadcastAssertApp.java      |   7 +-
 .../StreamApplicationIntegrationTest.java       |   9 +-
 ...StreamApplicationIntegrationTestHarness.java |  42 +-
 .../samza/test/framework/TestTimerApp.java      |   7 +-
 .../apache/samza/test/framework/TimerTest.java  |  18 +-
 .../test/operator/RepartitionJoinWindowApp.java |  25 +-
 .../test/operator/RepartitionWindowApp.java     |  20 +-
 .../samza/test/operator/SessionWindowApp.java   |  17 +-
 .../operator/TestRepartitionJoinWindowApp.java  |  30 +-
 .../test/operator/TestRepartitionWindowApp.java |  10 +-
 .../samza/test/operator/TumblingWindowApp.java  |  16 +-
 .../test/processor/TestStreamApplication.java   |  82 +--
 .../test/processor/TestStreamProcessor.java     |  18 +-
 .../processor/TestZkLocalApplicationRunner.java | 317 +++++-----
 .../apache/samza/test/table/TestLocalTable.java |  39 +-
 .../table/TestLocalTableWithSideInputs.java     |  13 +-
 .../samza/test/table/TestRemoteTable.java       |  27 +-
 .../benchmark/SystemConsumerWithSamzaBench.java |  14 +-
 124 files changed, 5280 insertions(+), 3631 deletions(-)
----------------------------------------------------------------------



[18/47] samza git commit: Added new KafkaProxy and KafkaConsumer for default KafkaSystem

Posted by bo...@apache.org.
Added new KafkaProxy and KafkaConsumer for default KafkaSystem


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/72544606
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/72544606
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/72544606

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 72544606bfffc67aeaa7f509ca54cfd6db52e2b4
Parents: 4801709
Author: Boris S <bo...@apache.org>
Authored: Fri Aug 17 18:08:52 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Aug 17 18:08:52 2018 -0700

----------------------------------------------------------------------
 .../clients/consumer/KafkaConsumerConfig.java   | 152 ++++++
 .../samza/system/kafka/KafkaConsumerProxy.java  | 463 +++++++++++++++++++
 .../samza/system/kafka/KafkaSystemFactory.scala |  54 ++-
 .../system/kafka/NewKafkaSystemConsumer.java    | 403 ++++++++++++++++
 .../kafka/TestKafkaCheckpointManager.scala      |   8 +-
 5 files changed, 1064 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/72544606/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
new file mode 100644
index 0000000..97360e2
--- /dev/null
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -0,0 +1,152 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.kafka.clients.consumer;
+
+import java.util.Map;
+import java.util.Properties;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.ConfigException;
+import org.apache.samza.config.JobConfig;
+import scala.Option;
+
+
+/**
+ * The configuration class for KafkaConsumer
+ */
+public class KafkaConsumerConfig extends ConsumerConfig {
+
+  private static final String PRODUCER_CLIENT_ID_PREFIX = "kafka-producer";
+  private static final String CONSUMER_CLIENT_ID_PREFIX = "kafka-consumer";
+  private static final String SAMZA_OFFSET_LARGEST = "largest";
+  private static final String SAMZA_OFFSET_SMALLEST = "smallest";
+  private static final String KAFKA_OFFSET_LATEST = "latest";
+  private static final String KAFKA_OFFSET_EARLIEST = "earliest";
+  /*
+   * By default, KafkaConsumer will fetch ALL available messages for all the partitions.
+   * This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
+   */
+  private static final String KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT = "100";
+
+
+  public KafkaConsumerConfig(Properties props) {
+    super(props);
+  }
+
+  public static KafkaConsumerConfig getKafkaSystemConsumerConfig(Config config,
+      String systemName, String clientId, Map<String, String> injectProps) {
+
+    Config subConf = config.subset(String.format("systems.%s.consumer.", systemName), true);
+
+    String groupId = getConsumerGroupId(config);
+
+    Properties consumerProps = new Properties();
+    consumerProps.putAll(subConf);
+
+    consumerProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
+    consumerProps.setProperty(ConsumerConfig.CLIENT_ID_CONFIG, clientId);
+
+    /********************************************
+     * Open-source Kafka Consumer configuration *
+     *******************************************/
+    consumerProps.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); // Disable consumer auto-commit
+
+    consumerProps.setProperty(
+        ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
+        getAutoOffsetResetValue(consumerProps));  // Translate samza config value to kafka config value
+
+    // makesure bootstrap configs are in ?? SHOULD WE FAIL IF THEY ARE NOT?
+    if (! subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
+      // get it from the producer config
+      String bootstrapServer = config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
+      consumerProps.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer);
+    }
+
+    // Always use default partition assignment strategy. Do not allow override.
+    consumerProps.setProperty(
+        ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,
+        RangeAssignor.class.getName());
+
+
+    // NOT SURE THIS IS NEEDED TODO
+    String maxPollRecords = subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT);;
+    consumerProps.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
+
+    // put overrides
+    consumerProps.putAll(injectProps);
+
+    return new KafkaConsumerConfig(consumerProps);
+  }
+
+  // group id should be unique per job
+  static String getConsumerGroupId(Config config) {
+    JobConfig jobConfig = new JobConfig(config);
+    Option<String> jobIdOption = jobConfig.getJobId();
+    Option<String> jobNameOption = jobConfig.getName();
+    return (jobNameOption.isDefined()? jobNameOption.get() : "undefined_job_name") + "-"
+        + (jobIdOption.isDefined()? jobIdOption.get() : "undefined_job_id");
+  }
+  // client id should be unique per job
+  public static String getClientId(String id, Config config) {
+    if (config.get(JobConfig.JOB_NAME()) == null) {
+      throw new ConfigException("Missing job name");
+    }
+    String jobName = config.get(JobConfig.JOB_NAME());
+    String jobId = "1";
+    if (config.get(JobConfig.JOB_ID()) != null) {
+      jobId = config.get(JobConfig.JOB_ID());
+    }
+    return getClientId(id, jobName, jobId);
+  }
+
+  private static String getClientId(String id, String jobName, String jobId) {
+    return String.format(
+        "%s-%s-%s",
+        id.replaceAll("[^A-Za-z0-9]", "_"),
+        jobName.replaceAll("[^A-Za-z0-9]", "_"),
+        jobId.replaceAll("[^A-Za-z0-9]", "_"));
+  }
+
+  public static String getProducerClientId(Config config) {
+    return getClientId(PRODUCER_CLIENT_ID_PREFIX, config);
+  }
+
+  /**
+   * Settings for auto.reset in samza are different from settings in Kafka (auto.offset.reset) - need to convert
+   * "largest" -> "latest"
+   * "smallest" -> "earliest"
+   * "none" - will fail the kafka consumer, if offset is out of range
+   * @param properties All consumer related {@link Properties} parsed from samza config
+   * @return String representing the config value for "auto.offset.reset" property
+   */
+  static String getAutoOffsetResetValue(Properties properties) {
+    String autoOffsetReset = properties.getProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, KAFKA_OFFSET_LATEST);
+    switch (autoOffsetReset) {
+      case SAMZA_OFFSET_LARGEST:
+        return KAFKA_OFFSET_LATEST;
+      case SAMZA_OFFSET_SMALLEST:
+        return KAFKA_OFFSET_EARLIEST;
+      default:
+        return KAFKA_OFFSET_LATEST;
+    }
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/samza/blob/72544606/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
new file mode 100644
index 0000000..66971af
--- /dev/null
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -0,0 +1,463 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.samza.system.kafka;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import kafka.common.KafkaException;
+import kafka.common.TopicAndPartition;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.InvalidOffsetException;
+import org.apache.kafka.common.Metric;
+import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.samza.SamzaException;
+import org.apache.samza.system.IncomingMessageEnvelope;
+import org.apache.samza.system.SystemStreamPartition;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Separate thread that reads messages from kafka and puts them int the BlockingEnvelopeMap
+ * This class is not thread safe. There will be only one instance of this class per LiKafkaSystemConsumer object
+ * We still need some synchronization around kafkaConsumer. See pollConsumer() method for details.
+ */
+public class KafkaConsumerProxy<K, V> {
+  private static final Logger LOG = LoggerFactory.getLogger(KafkaConsumerProxy.class);
+
+  private static final int SLEEP_MS_WHILE_NO_TOPIC_PARTITION = 100;
+
+  /* package private */ final Thread consumerPollThread;
+  private final Consumer<K, V> kafkaConsumer;
+  private final NewKafkaSystemConsumer.KafkaConsumerMessageSink sink;
+  private final KafkaSystemConsumerMetrics kafkaConsumerMetrics;
+  private final String metricName;
+  private final String systemName;
+  private final String clientId;
+  private final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
+  private final Map<SystemStreamPartition, MetricName> ssp2MetricName = new HashMap<>();
+  // list of all the SSPs we poll from with their next offsets correspondingly.
+  private final Map<SystemStreamPartition, Long> nextOffsets = new ConcurrentHashMap<>();
+  // lags behind the high water mark, as reported by the Kafka consumer.
+  private final Map<SystemStreamPartition, Long> latestLags = new HashMap<>();
+  private final NewKafkaSystemConsumer.ValueUnwrapper<V> valueUnwrapper;
+
+  private volatile boolean isRunning = false;
+  private volatile Throwable failureCause = null;
+  private CountDownLatch consumerPollThreadStartLatch = new CountDownLatch(1);
+
+  public KafkaConsumerProxy(Consumer<K, V> kafkaConsumer, String systemName, String clientId,
+      NewKafkaSystemConsumer.KafkaConsumerMessageSink messageSink, KafkaSystemConsumerMetrics samzaConsumerMetrics,
+      String metricName, NewKafkaSystemConsumer.ValueUnwrapper<V> valueUnwrapper) {
+
+    this.kafkaConsumer = kafkaConsumer;
+    this.systemName = systemName;
+    this.sink = messageSink;
+    this.kafkaConsumerMetrics = samzaConsumerMetrics;
+    this.metricName = metricName;
+    this.clientId = clientId;
+    this.valueUnwrapper = valueUnwrapper;
+
+    // TODO - see if we need new metrics (not host:port based)
+    this.kafkaConsumerMetrics.registerBrokerProxy(metricName, 0);
+
+    consumerPollThread = new Thread(createProxyThreadRunnable());
+  }
+
+  public void start() {
+    if (!consumerPollThread.isAlive()) {
+      LOG.info("Starting LiKafkaConsumerProxy polling thread for system " + systemName + " " + this.toString());
+      consumerPollThread.setDaemon(true);
+      consumerPollThread.setName(
+          "Samza LiKafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
+      consumerPollThread.start();
+
+      // we need to wait until the thread starts
+      while (!isRunning) {
+        try {
+          consumerPollThreadStartLatch.await(3000, TimeUnit.MILLISECONDS);
+        } catch (InterruptedException e) {
+        }
+      }
+    } else {
+      LOG.debug("Tried to start an already started LiKafkaConsumerProxy (%s). Ignoring.", this.toString());
+    }
+  }
+
+  // add new partition to the list of polled partitions
+  // this method is called only at the beginning, before the thread is started
+  public void addTopicPartition(SystemStreamPartition ssp, long nextOffset) {
+    LOG.info(String.format("Adding new topic and partition %s, offset = %s to queue for consumer %s", ssp, nextOffset,
+        this));
+    topicPartitions2SSP.put(NewKafkaSystemConsumer.toTopicPartition(ssp), ssp); //registered SSPs
+
+    // this is already vetted offset so there is no need to validate it
+    LOG.info(String.format("Got offset %s for new topic and partition %s.", nextOffset, ssp));
+
+    nextOffsets.put(ssp, nextOffset);
+
+    // we reuse existing metrics. They assume host and port for the broker
+    // for now fake the port with the consumer name
+    kafkaConsumerMetrics.setTopicPartitionValue(metricName, 0, nextOffsets.size());
+  }
+
+  /**
+   * creates a separate thread for pulling messages
+   */
+  private Runnable createProxyThreadRunnable() {
+    return () -> {
+      isRunning = true;
+
+      try {
+        consumerPollThreadStartLatch.countDown();
+        initializeLags();
+        while (isRunning) {
+          fetchMessages();
+        }
+      } catch (Throwable throwable) {
+        LOG.error(String.format("Error in LiKafkaConsumerProxy poll thread for system: %s.", systemName), throwable);
+        // SamzaLiKafkaSystemConsumer uses the failureCause to propagate the throwable to the container
+        failureCause = throwable;
+        isRunning = false;
+      }
+
+      if (!isRunning) {
+        LOG.info("Stopping the LiKafkaConsumerProxy poll thread for system: {}.", systemName);
+      }
+    };
+  }
+
+  private void initializeLags() {
+    // This is expensive, so only do it once at the beginning. After the first poll, we can rely on metrics for lag.
+    Map<TopicPartition, Long> endOffsets = kafkaConsumer.endOffsets(topicPartitions2SSP.keySet());
+    endOffsets.forEach((tp, offset) -> {
+      SystemStreamPartition ssp = topicPartitions2SSP.get(tp);
+      long startingOffset = nextOffsets.get(ssp);
+      // End offsets are the offset of the newest message + 1
+      // If the message we are about to consume is < end offset, we are starting with a lag.
+      long initialLag = endOffsets.get(tp) - startingOffset;
+
+      LOG.info("Initial lag is {} for SSP {}", initialLag, ssp);
+      latestLags.put(ssp, initialLag);
+      sink.setIsAtHighWatermark(ssp, initialLag == 0);
+    });
+
+    // initialize lag metrics
+    refreshLatencyMetrics();
+  }
+
+  // the actual polling of the messages from kafka
+  public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> pollConsumer(
+      Set<SystemStreamPartition> systemStreamPartitions, long timeout) {
+
+    if (topicPartitions2SSP.size() == 0) {
+      throw new SamzaException("cannot poll empty set of TopicPartitions");
+    }
+
+    // Since we need to poll only from some subset of TopicPartitions (passed as the argument),
+    // we need to pause the rest.
+    List<TopicPartition> topicPartitionsToPause = new ArrayList<>();
+    List<TopicPartition> topicPartitionsToPoll = new ArrayList<>();
+
+    for (Map.Entry<TopicPartition, SystemStreamPartition> e : topicPartitions2SSP.entrySet()) {
+      TopicPartition tp = e.getKey();
+      SystemStreamPartition ssp = e.getValue();
+      if (systemStreamPartitions.contains(ssp)) {
+        topicPartitionsToPoll.add(tp);  // consume
+      } else {
+        topicPartitionsToPause.add(tp); // ignore
+      }
+    }
+
+    ConsumerRecords<K, V> records;
+    // make a call on the client
+    try {
+      // Currently, when doing checkpoint we are making a safeOffset request through this client, thus we need to synchronize
+      // them. In the future we may use this client for the actually checkpointing.
+      synchronized (kafkaConsumer) {
+        // Since we are not polling from ALL the subscribed topics, so we need to "change" the subscription temporarily
+        kafkaConsumer.pause(topicPartitionsToPause);
+        kafkaConsumer.resume(topicPartitionsToPoll);
+        records = kafkaConsumer.poll(timeout);
+        // resume original set of subscription - may be required for checkpointing
+        kafkaConsumer.resume(topicPartitionsToPause);
+      }
+    } catch (InvalidOffsetException e) {
+      LOG.error("LiKafkaConsumer with invalidOffsetException", e);
+      // If the consumer has thrown this exception it means that auto reset is not set for this consumer.
+      // So we just rethrow.
+      LOG.error("Caught InvalidOffsetException in pollConsumer", e);
+      throw e;
+    } catch (KafkaException e) {
+      // we may get InvalidOffsetException | AuthorizationException | KafkaException exceptions,
+      // but we still just rethrow, and log it up the stack.
+      LOG.error("Caught a Kafka exception in pollConsumer", e);
+      throw e;
+    }
+
+    return processResults(records);
+  }
+
+  private Map<SystemStreamPartition, List<IncomingMessageEnvelope>> processResults(ConsumerRecords<K, V> records) {
+    if (records == null) {
+      return Collections.emptyMap();
+    }
+
+    int capacity = (int) (records.count() / 0.75 + 1); // to avoid rehash, allocate more then 75% of expected capacity.
+    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> results = new HashMap<>(capacity);
+    // Parse the returned records and convert them into the IncomingMessageEnvelope.
+    // Note. They have been already de-serialized by the consumer.
+    for (ConsumerRecord<K, V> r : records) {
+      int partition = r.partition();
+      String topic = r.topic();
+      TopicPartition tp = new TopicPartition(topic, partition);
+
+      updateMetrics(r, tp);
+
+      SystemStreamPartition ssp = topicPartitions2SSP.get(tp);
+      List<IncomingMessageEnvelope> listMsgs = results.get(ssp);
+      if (listMsgs == null) {
+        listMsgs = new ArrayList<>();
+        results.put(ssp, listMsgs);
+      }
+
+      // TODO - add calculation of the size of the message, when available from Kafka
+      int msgSize = 0;
+      // if (fetchLimitByBytesEnabled) {
+      msgSize = getRecordSize(r);
+      //}
+
+      final K key = r.key();
+      final Object value =
+          valueUnwrapper == null ? r.value() : valueUnwrapper.unwrapValue(ssp.getSystemStream(), r.value());
+      IncomingMessageEnvelope imEnvelope =
+          new IncomingMessageEnvelope(ssp, String.valueOf(r.offset()), key, value, msgSize);
+      listMsgs.add(imEnvelope);
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("# records per SSP:");
+      for (Map.Entry<SystemStreamPartition, List<IncomingMessageEnvelope>> e : results.entrySet()) {
+        List<IncomingMessageEnvelope> list = e.getValue();
+        LOG.debug(e.getKey() + " = " + ((list == null) ? 0 : list.size()));
+      }
+    }
+
+    return results;
+  }
+
+  private int getRecordSize(ConsumerRecord<K, V> r) {
+    int keySize = 0; //(r.key() == null) ? 0 : r.key().getSerializedKeySize();
+    return keySize;  // + r.getSerializedMsgSize();  // TODO -enable when functionality available from Kafka
+
+    //int getMessageSize (Message message) {
+    // Approximate additional shallow heap overhead per message in addition to the raw bytes
+    // received from Kafka  4 + 64 + 4 + 4 + 4 = 80 bytes overhead.
+    // As this overhead is a moving target, and not very large
+    // compared to the message size its being ignore in the computation for now.
+    // int MESSAGE_SIZE_OVERHEAD =  4 + 64 + 4 + 4 + 4;
+
+    //      return message.size() + MESSAGE_SIZE_OVERHEAD;
+    // }
+  }
+
+  private void updateMetrics(ConsumerRecord<K, V> r, TopicPartition tp) {
+    TopicAndPartition tap = NewKafkaSystemConsumer.toTopicAndPartition(tp);
+    SystemStreamPartition ssp = NewKafkaSystemConsumer.toSystemStreamPartition(systemName, tap);
+    long currentSSPLag = getLatestLag(ssp); // lag between the current offset and the highwatermark
+    if (currentSSPLag < 0) {
+      return;
+    }
+    long recordOffset = r.offset();
+    long highWatermark = recordOffset + currentSSPLag; // derived value for the highwatermark
+
+    int size = getRecordSize(r);
+    kafkaConsumerMetrics.incReads(tap);
+    kafkaConsumerMetrics.incBytesReads(tap, size);
+    kafkaConsumerMetrics.setOffsets(tap, recordOffset);
+    kafkaConsumerMetrics.incBrokerBytesReads(metricName, 0, size);
+    kafkaConsumerMetrics.setHighWatermarkValue(tap, highWatermark);
+  }
+
+  /*
+   This method put messages into blockingEnvelopeMap.
+   */
+  private void moveMessagesToTheirQueue(SystemStreamPartition ssp, List<IncomingMessageEnvelope> envelopes) {
+    long nextOffset = nextOffsets.get(ssp);
+
+    for (IncomingMessageEnvelope env : envelopes) {
+      sink.addMessage(ssp, env);  // move message to the BlockingEnvelopeMap's queue
+
+      LOG.trace("IncomingMessageEnvelope. got envelope with offset:{} for ssp={}", env.getOffset(), ssp);
+      nextOffset = Long.valueOf(env.getOffset()) + 1;
+    }
+
+    nextOffsets.put(ssp, nextOffset);
+  }
+
+  private void populateMetricNames(Set<SystemStreamPartition> ssps) {
+    HashMap<String, String> tags = new HashMap<>();
+    tags.put("client-id", clientId);// this is required by the KafkaConsumer to get the metrics
+
+    for (SystemStreamPartition ssp : ssps) {
+      TopicPartition tp = NewKafkaSystemConsumer.toTopicPartition(ssp);
+      ssp2MetricName.put(ssp, new MetricName(tp + ".records-lag", "consumer-fetch-manager-metrics", "", tags));
+    }
+  }
+
+  /*
+    The only way to figure out lag for the LiKafkaConsumer is to look at the metrics after each poll() call.
+    One of the metrics (records-lag) shows how far behind the HighWatermark the consumer is.
+    This method populates the lag information for each SSP into latestLags member variable.
+   */
+  private void populateCurrentLags(Set<SystemStreamPartition> ssps) {
+
+    Map<MetricName, ? extends Metric> consumerMetrics = kafkaConsumer.metrics();
+
+    // populate the MetricNames first time
+    if (ssp2MetricName.isEmpty()) {
+      populateMetricNames(ssps);
+    }
+
+    for (SystemStreamPartition ssp : ssps) {
+      MetricName mn = ssp2MetricName.get(ssp);
+      Metric currentLagM = consumerMetrics.get(mn);
+
+      // In linkedin-kafka-client 5.*, high watermark is fixed to be the offset of last available message,
+      // so the lag is now at least 0, which is the same as Samza's definition.
+      // If the lag is not 0, then isAtHead is not true, and kafkaClient keeps polling.
+      long currentLag = (currentLagM != null) ? (long) currentLagM.value() : -1L;
+      /*
+      Metric averageLagM = consumerMetrics.get(new MetricName(tp + ".records-lag-avg", "consumer-fetch-manager-metrics", "", tags));
+      double averageLag = (averageLagM != null) ? averageLagM.value() : -1.0;
+      Metric maxLagM = consumerMetrics.get(new MetricName(tp + ".records-lag-max", "consumer-fetch-manager-metrics", "", tags));
+      double maxLag = (maxLagM != null) ? maxLagM.value() : -1.0;
+      */
+      latestLags.put(ssp, currentLag);
+
+      // calls the setIsAtHead for the BlockingEnvelopeMap
+      sink.setIsAtHighWatermark(ssp, currentLag == 0);
+    }
+  }
+
+  /*
+    Get the latest lag for a specific SSP.
+   */
+  public long getLatestLag(SystemStreamPartition ssp) {
+    Long lag = latestLags.get(ssp);
+    if (lag == null) {
+      throw new SamzaException("Unknown/unregistered ssp in latestLags request: " + ssp);
+    }
+    return lag;
+  }
+
+  /*
+    Using the consumer to poll the messages from the stream.
+   */
+  private void fetchMessages() {
+    Set<SystemStreamPartition> SSPsToFetch = new HashSet<>();
+    for (SystemStreamPartition ssp : nextOffsets.keySet()) {
+      if (sink.needsMoreMessages(ssp)) {
+        SSPsToFetch.add(ssp);
+      }
+    }
+    LOG.debug("pollConsumer {}", SSPsToFetch.size());
+    if (!SSPsToFetch.isEmpty()) {
+      kafkaConsumerMetrics.incBrokerReads(metricName, 0);
+
+      Map<SystemStreamPartition, List<IncomingMessageEnvelope>> response;
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("pollConsumer from following SSPs: {}; total#={}", SSPsToFetch, SSPsToFetch.size());
+      }
+      response = pollConsumer(SSPsToFetch, 500); // TODO should be default value from ConsumerConfig
+
+      // move the responses into the queue
+      for (Map.Entry<SystemStreamPartition, List<IncomingMessageEnvelope>> e : response.entrySet()) {
+        List<IncomingMessageEnvelope> envelopes = e.getValue();
+        if (envelopes != null) {
+          moveMessagesToTheirQueue(e.getKey(), envelopes);
+        }
+      }
+
+      populateCurrentLags(SSPsToFetch); // find current lags for for each SSP
+    } else { // nothing to read
+
+      LOG.debug("No topic/partitions need to be fetched for consumer {} right now. Sleeping {}ms.", kafkaConsumer,
+          SLEEP_MS_WHILE_NO_TOPIC_PARTITION);
+
+      kafkaConsumerMetrics.incBrokerSkippedFetchRequests(metricName, 0);
+
+      try {
+        Thread.sleep(SLEEP_MS_WHILE_NO_TOPIC_PARTITION);
+      } catch (InterruptedException e) {
+        LOG.warn("Sleep in fetchMessages was interrupted");
+      }
+    }
+    refreshLatencyMetrics();
+  }
+
+  private void refreshLatencyMetrics() {
+    for (Map.Entry<SystemStreamPartition, Long> e : nextOffsets.entrySet()) {
+      SystemStreamPartition ssp = e.getKey();
+      Long offset = e.getValue();
+      TopicAndPartition tp = NewKafkaSystemConsumer.toTopicAndPartition(ssp);
+      Long lag = latestLags.get(ssp);
+      LOG.trace("Latest offset of {} is  {}; lag = {}", ssp, offset, lag);
+      if (lag != null && offset != null && lag >= 0) {
+        long streamEndOffset = offset.longValue() + lag.longValue();
+        // update the metrics
+        kafkaConsumerMetrics.setHighWatermarkValue(tp, streamEndOffset);
+        kafkaConsumerMetrics.setLagValue(tp, lag.longValue());
+      }
+    }
+  }
+
+  boolean isRunning() {
+    return isRunning;
+  }
+
+  Throwable getFailureCause() {
+    return failureCause;
+  }
+
+  public void stop(long timeout) {
+    LOG.info("Shutting down LiKafkaConsumerProxy poll thread:" + toString());
+
+    isRunning = false;
+    try {
+      consumerPollThread.join(timeout);
+    } catch (InterruptedException e) {
+      LOG.warn("Join in LiKafkaConsumerProxy has failed", e);
+      consumerPollThread.interrupt();
+    }
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/samza/blob/72544606/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index 9f0b5f2..c7f6aed 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -19,16 +19,21 @@
 
 package org.apache.samza.system.kafka
 
+import java.util
 import java.util.Properties
+
+import kafka.consumer.ConsumerConfig
 import kafka.utils.ZkUtils
+import org.apache.kafka.clients.consumer.KafkaConsumer
 import org.apache.samza.SamzaException
 import org.apache.samza.config.ApplicationConfig.ApplicationMode
-import org.apache.samza.util.{Logging, KafkaUtil, ExponentialSleepStrategy, ClientUtilTopicMetadataStore}
-import org.apache.samza.config.{KafkaConfig, ApplicationConfig, StreamConfig, Config}
+import org.apache.samza.util._
+import org.apache.samza.config.{ApplicationConfig, Config, KafkaConfig, StreamConfig}
 import org.apache.samza.metrics.MetricsRegistry
 import org.apache.samza.config.KafkaConfig.Config2Kafka
 import org.apache.samza.config.TaskConfig.Config2Task
 import org.apache.kafka.clients.producer.KafkaProducer
+import org.apache.kafka.common.serialization.ByteArrayDeserializer
 import org.apache.samza.system.SystemFactory
 import org.apache.samza.config.StorageConfig._
 import org.apache.samza.system.SystemProducer
@@ -53,21 +58,35 @@ class KafkaSystemFactory extends SystemFactory with Logging {
     // Kind of goofy to need a producer config for consumers, but we need metadata.
     val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId)
     val bootstrapServers = producerConfig.bootsrapServers
-    val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)
+    //val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)
 
-    val timeout = consumerConfig.socketTimeoutMs
-    val bufferSize = consumerConfig.socketReceiveBufferBytes
-    val fetchSize = new StreamFetchSizes(consumerConfig.fetchMessageMaxBytes, config.getFetchMessageMaxBytesTopics(systemName))
-    val consumerMinSize = consumerConfig.fetchMinBytes
-    val consumerMaxWait = consumerConfig.fetchWaitMaxMs
-    val autoOffsetResetDefault = consumerConfig.autoOffsetReset
+    //val kafkaConfig = new KafkaConfig(config)
+
+
+   // val timeout = consumerConfig.socketTimeoutMs
+    //val bufferSize = consumerConfig.socketReceiveBufferBytes
+    //val fetchSize = new StreamFetchSizes(consumerConfig.fetchMessageMaxBytes, config.getFetchMessageMaxBytesTopics(systemName))
+    //val consumerMinSize = consumerConfig.fetchMinBytes
+    //val consumerMaxWait = consumerConfig.fetchWaitMaxMs
+    //val autoOffsetResetDefault = consumerConfig.autoOffsetReset
     val autoOffsetResetTopics = config.getAutoOffsetResetTopics(systemName)
     val fetchThreshold = config.getConsumerFetchThreshold(systemName).getOrElse("50000").toInt
     val fetchThresholdBytes = config.getConsumerFetchThresholdBytes(systemName).getOrElse("-1").toLong
-    val offsetGetter = new GetOffset(autoOffsetResetDefault, autoOffsetResetTopics)
-    val metadataStore = new ClientUtilTopicMetadataStore(bootstrapServers, clientId, timeout)
+    //val offsetGetter = new GetOffset(autoOffsetResetDefault, autoOffsetResetTopics)
+    //val metadataStore = new ClientUtilTopicMetadataStore(bootstrapServers, clientId, timeout)
 
-    new KafkaSystemConsumer(
+
+    val kafkaConsumer: KafkaConsumer[Array[Byte], Array[Byte]] =
+      NewKafkaSystemConsumer.getKafkaConsumerImpl(systemName, clientId, config)
+
+    def valueUnwrapper: NewKafkaSystemConsumer.ValueUnwrapper[Array[Byte]] = null;// TODO add real unrapper from
+    val kc = new NewKafkaSystemConsumer (
+      kafkaConsumer, systemName, config, clientId,
+      metrics, new SystemClock, false, valueUnwrapper)
+
+    kc
+    /*
+      new KafkaSystemConsumer(
       systemName = systemName,
       systemAdmin = getAdmin(systemName, config),
       metrics = metrics,
@@ -82,7 +101,18 @@ class KafkaSystemFactory extends SystemFactory with Logging {
       fetchThresholdBytes = fetchThresholdBytes,
       fetchLimitByBytesEnabled = config.isConsumerFetchThresholdBytesEnabled(systemName),
       offsetGetter = offsetGetter)
+      */
+  }
+
+  /*
+  def getKafkaConsumerImpl(systemName: String, config: KafkaConfig) = {
+    info("Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}", systemName, config)
+
+    val byteArrayDeserializer = new ByteArrayDeserializer
+    new KafkaConsumer[Array[Byte], Array[Byte]](config.configForVanillaConsumer(),
+      byteArrayDeserializer, byteArrayDeserializer)
   }
+  */
 
   def getProducer(systemName: String, config: Config, registry: MetricsRegistry): SystemProducer = {
     val clientId = KafkaUtil.getClientId("samza-producer", config)

http://git-wip-us.apache.org/repos/asf/samza/blob/72544606/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
new file mode 100644
index 0000000..26db610
--- /dev/null
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -0,0 +1,403 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.samza.system.kafka;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
+import kafka.common.TopicAndPartition;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.samza.Partition;
+import org.apache.samza.SamzaException;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.system.IncomingMessageEnvelope;
+import org.apache.samza.system.SystemConsumer;
+import org.apache.samza.system.SystemStream;
+import org.apache.samza.system.SystemStreamPartition;
+import org.apache.samza.util.BlockingEnvelopeMap;
+import org.apache.samza.util.Clock;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Option;
+
+
+public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements SystemConsumer{
+
+  private static final Logger LOG = LoggerFactory.getLogger(NewKafkaSystemConsumer.class);
+
+  /**
+   * Provides a way to unwrap the value further. It is used for intermediate stream messages.
+   * @param <T> value type
+   */
+  public interface ValueUnwrapper<T> {
+    Object unwrapValue(SystemStream systemStream, T value);
+  }
+
+  private static final long FETCH_THRESHOLD = 50000;
+  private static final long FETCH_THRESHOLD_BYTES = -1L;
+  private final Consumer<K,V> kafkaConsumer;
+  private final String systemName;
+  private final KafkaSystemConsumerMetrics samzaConsumerMetrics;
+  private final String clientId;
+  private final String metricName;
+  private final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
+  private final AtomicBoolean stopped = new AtomicBoolean(false);
+  private final AtomicBoolean started = new AtomicBoolean(false);
+  private final Config config;
+  private final boolean fetchThresholdBytesEnabled;
+  private final ValueUnwrapper<V> valueUnwrapper;
+
+  // This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
+  private KafkaConsumerMessageSink messageSink;
+  // proxy is doing the actual reading
+  private KafkaConsumerProxy proxy;
+
+  /* package private */final Map<TopicPartition, String> topicPartitions2Offset = new HashMap<>();
+  /* package private */long perPartitionFetchThreshold;
+  /* package private */long perPartitionFetchThresholdBytes;
+
+  // TODO - consider new class for KafkaSystemConsumerMetrics
+
+  /**
+   * @param systemName
+   * @param config
+   * @param metrics
+   */
+  public NewKafkaSystemConsumer(
+      Consumer<K,V> kafkaConsumer,
+      String systemName,
+      Config config,
+      String clientId,
+      KafkaSystemConsumerMetrics metrics,
+      Clock clock,
+      boolean fetchThresholdBytesEnabled,
+      ValueUnwrapper<V> valueUnwrapper) {
+
+    super(metrics.registry(),clock, metrics.getClass().getName());
+
+    this.samzaConsumerMetrics = metrics;
+    this.clientId = clientId;
+    this.systemName = systemName;
+    this.config = config;
+    this.fetchThresholdBytesEnabled = fetchThresholdBytesEnabled;
+    this.metricName = systemName + " " + clientId;
+
+    this.kafkaConsumer = kafkaConsumer;
+    this.valueUnwrapper = valueUnwrapper;
+
+    LOG.info(String.format(
+        "Created SamzaLiKafkaSystemConsumer for system=%s, clientId=%s, metricName=%s with liKafkaConsumer=%s",
+        systemName, clientId, metricName, this.kafkaConsumer.toString()));
+  }
+
+  public static KafkaConsumer<byte[], byte[]> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
+
+    Map<String, String> injectProps = new HashMap<>();
+    injectProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+    injectProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+
+    KafkaConsumerConfig consumerConfig =
+        KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
+
+    LOG.info("==============>Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}", systemName, consumerConfig.originals());
+    /*
+    Map<String, Object> kafkaConsumerConfig = consumerConfig.originals().entrySet().stream()
+        .collect(Collectors.toMap((kv)->kv.getKey(), (kv)->(Object)kv.getValue()));
+*/
+
+    return new KafkaConsumer<byte[], byte[]>(consumerConfig.originals());
+  }
+
+  /**
+   * return system name for this consumer
+   * @return system name
+   */
+  public String getSystemName() {
+    return systemName;
+  }
+
+  @Override
+  public void start() {
+    if (!started.compareAndSet(false, true)) {
+      LOG.warn("attempting to start the consumer for the second (or more) time.");
+      return;
+    }
+    if(stopped.get()) {
+      LOG.warn("attempting to start a stopped consumer");
+      return;
+    }
+LOG.info("==============>About to start consumer");
+    // initialize the subscriptions for all the registered TopicPartitions
+    startSubscription();
+    LOG.info("==============>subscription started");
+    // needs to be called after all the registrations are completed
+    setFetchThresholds();
+    LOG.info("==============>thresholds ste");
+    // Create the proxy to do the actual message reading. It is a separate thread that reads the messages from the stream
+    // and puts them into the sink.
+    createConsumerProxy();
+    LOG.info("==============>proxy  started");
+    startConsumer();
+    LOG.info("==============>consumer started");
+  }
+
+  private void startSubscription() {
+    //subscribe to all the TopicPartitions
+    LOG.info("==============>startSubscription for TP: " + topicPartitions2SSP.keySet());
+    try {
+      synchronized (kafkaConsumer) {
+        // we are using assign (and not subscribe), so we need to specify both topic and partition
+        //topicPartitions2SSP.put(new TopicPartition("FAKE PARTITION", 0), new SystemStreamPartition("Some","Another", new Partition(0)));
+        //topicPartitions2Offset.put(new TopicPartition("FAKE PARTITION", 0), "1234");
+        kafkaConsumer.assign(topicPartitions2SSP.keySet());
+      }
+    } catch (Exception e) {
+      LOG.warn("startSubscription failed.", e);
+      throw new SamzaException(e);
+    }
+  }
+
+  private void createConsumerProxy() {
+    // create a sink for passing the messages between the proxy and the consumer
+    messageSink = new KafkaConsumerMessageSink();
+
+    // create the thread with the consumer
+    proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink,
+        samzaConsumerMetrics, metricName, valueUnwrapper);
+
+    LOG.info("==============>Created consumer proxy: " + proxy);
+  }
+
+  /*
+   Set the offsets to start from.
+   Add the TopicPartitions to the proxy.
+   Start the proxy thread.
+   */
+  private void startConsumer() {
+    //set the offset for each TopicPartition
+    topicPartitions2Offset.forEach((tp, startingOffsetString) -> {
+      long startingOffset = Long.valueOf(startingOffsetString);
+
+      try {
+        synchronized (kafkaConsumer) {
+          // TODO in the future we may need to add special handling here for BEGIN/END_OFFSET
+          // this will call liKafkaConsumer.seekToBegin/End()
+          kafkaConsumer.seek(tp, startingOffset); // this value should already be the 'upcoming' value
+        }
+      } catch (Exception e) {
+        // all other exceptions - non recoverable
+        LOG.error("Got Exception while seeking to " + startingOffsetString + " for " + tp, e);
+        throw new SamzaException(e);
+      }
+
+      LOG.info("==============>Changing Consumer's position for tp = " + tp + " to " + startingOffsetString);
+
+      // add the partition to the proxy
+      proxy.addTopicPartition(topicPartitions2SSP.get(tp), startingOffset);
+    });
+
+    // start the proxy thread
+    if (proxy != null && !proxy.isRunning()) {
+      proxy.start();
+    }
+  }
+
+  private void setFetchThresholds() {
+    // get the thresholds, and set defaults if not defined.
+    KafkaConfig kafkaConfig = new KafkaConfig(config);
+    Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName);
+    long fetchThreshold = FETCH_THRESHOLD;
+    if(fetchThresholdOption.isDefined()) {
+      fetchThreshold = Long.valueOf(fetchThresholdOption.get());
+      LOG.info("fetchThresholdOption is defined. fetchThreshold=" + fetchThreshold);
+    }
+    Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName);
+    long fetchThresholdBytes = FETCH_THRESHOLD_BYTES;
+    if(fetchThresholdBytesOption.isDefined()) {
+      fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get());
+      LOG.info("fetchThresholdBytesOption is defined. fetchThresholdBytes=" + fetchThresholdBytes);
+    }
+    LOG.info("fetchThresholdBytes = " + fetchThresholdBytes + "; fetchThreshold=" + fetchThreshold);
+    LOG.info("topicPartitions2Offset #=" + topicPartitions2Offset.size() + "; topicPartition2SSP #=" + topicPartitions2SSP.size());
+
+    if (topicPartitions2SSP.size() > 0) {
+      perPartitionFetchThreshold = fetchThreshold / topicPartitions2SSP.size();
+      LOG.info("perPartitionFetchThreshold=" + perPartitionFetchThreshold);
+      if(fetchThresholdBytesEnabled) {
+        // currently this feature cannot be enabled, because we do not have the size of the messages available.
+        // messages get double buffered, hence divide by 2
+        perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / topicPartitions2SSP.size();
+        LOG.info("perPartitionFetchThresholdBytes is enabled. perPartitionFetchThresholdBytes=" + perPartitionFetchThresholdBytes);
+      }
+    }
+  }
+
+  @Override
+  public void stop() {
+    if (!stopped.compareAndSet(false, true)) {
+      LOG.warn("attempting to stop stopped consumer.");
+      return;
+    }
+
+    LOG.warn("Stopping SamzaRawLiKafkaConsumer + " + this);
+    // stop the proxy (with 5 minutes timeout)
+    if(proxy != null)
+      proxy.stop(TimeUnit.MINUTES.toMillis(5));
+
+    try {
+      synchronized (kafkaConsumer) {
+        kafkaConsumer.close();
+      }
+    } catch (Exception e) {
+      LOG.warn("failed to stop SamzaRawLiKafkaConsumer + " + this, e);
+    }
+  }
+
+  /*
+   record the ssp and the offset. Do not submit it to the consumer yet.
+   */
+  @Override
+  public void register(SystemStreamPartition systemStreamPartition, String offset) {
+    if (!systemStreamPartition.getSystem().equals(systemName)) {
+      LOG.warn("ignoring SSP " + systemStreamPartition + ", because this consumer's system is " + systemName);
+      return;
+    }
+    super.register(systemStreamPartition, offset);
+
+    TopicPartition tp = toTopicPartition(systemStreamPartition);
+
+    topicPartitions2SSP.put(tp, systemStreamPartition);
+
+    LOG.info("==============>registering ssp = " + systemStreamPartition + " with offset " + offset);
+
+    String existingOffset = topicPartitions2Offset.get(tp);
+    // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages.
+    if (existingOffset == null || compareOffsets(existingOffset, offset) > 0) {
+      topicPartitions2Offset.put(tp, offset);
+    }
+
+    samzaConsumerMetrics.registerTopicAndPartition(toTopicAndPartition(tp));
+  }
+
+  /**
+   * Compare two String offsets.
+   * Note. There is a method in KafkaAdmin that does that, but that would require instantiation of systemadmin for each consumer.
+   * @param off1
+   * @param off2
+   * @return see {@link Long#compareTo(Long)}
+   */
+  public static int compareOffsets(String off1, String off2) {
+    return Long.valueOf(off1).compareTo(Long.valueOf(off2));
+  }
+
+  @Override
+  public String toString() {
+    return systemName + " " + clientId + "/" + super.toString();
+  }
+
+  @Override
+  public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
+      Set<SystemStreamPartition> systemStreamPartitions, long timeout)
+      throws InterruptedException {
+
+    // check if the proxy is running
+    if(!proxy.isRunning()) {
+      stop();
+      if (proxy.getFailureCause() != null) {
+        String message = "LiKafkaConsumerProxy has stopped";
+        if(proxy.getFailureCause() instanceof org.apache.kafka.common.errors.TopicAuthorizationException)
+          message += " due to TopicAuthorizationException Please refer to go/samzaacluserguide to correctly set up acls for your topic";
+        throw new SamzaException(message, proxy.getFailureCause());
+      } else {
+        LOG.warn("Failure cause not populated for LiKafkaConsumerProxy");
+        throw new SamzaException("LiKafkaConsumerProxy has stopped");
+      }
+    }
+
+    return super.poll(systemStreamPartitions, timeout);
+  }
+
+  public static TopicAndPartition toTopicAndPartition(TopicPartition tp) {
+    return new TopicAndPartition(tp.topic(), tp.partition());
+  }
+
+  public static TopicAndPartition toTopicAndPartition(SystemStreamPartition ssp) {
+    return new TopicAndPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
+  }
+
+  public static TopicPartition toTopicPartition(SystemStreamPartition ssp) {
+    return new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
+  }
+
+  public static SystemStreamPartition toSystemStreamPartition(String systemName, TopicAndPartition tp) {
+    return new SystemStreamPartition(systemName, tp.topic(), new Partition(tp.partition()));
+  }
+
+  ////////////////////////////////////
+  // inner class for the message sink
+  ////////////////////////////////////
+  public class KafkaConsumerMessageSink {
+
+    public void setIsAtHighWatermark(SystemStreamPartition ssp, boolean isAtHighWatermark) {
+      setIsAtHead(ssp, isAtHighWatermark);
+    }
+
+    boolean needsMoreMessages(SystemStreamPartition ssp) {
+      if(LOG.isDebugEnabled()) {
+        LOG.debug("needsMoreMessages from following SSP: {}. fetchLimitByBytes enabled={}; messagesSizeInQueue={};"
+                + "(limit={}); messagesNumInQueue={}(limit={};", ssp, fetchThresholdBytesEnabled, getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes,
+            getNumMessagesInQueue(ssp), perPartitionFetchThreshold);
+      }
+
+      if (fetchThresholdBytesEnabled) {
+        return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes; // TODO Validate
+      } else {
+        return getNumMessagesInQueue(ssp) < perPartitionFetchThreshold;
+      }
+    }
+
+    void addMessage(SystemStreamPartition ssp, IncomingMessageEnvelope envelope) {
+      LOG.info("==============>Incoming message ssp = {}: envelope = {}.", ssp, envelope);
+
+      try {
+        put(ssp, envelope);
+      } catch (InterruptedException e) {
+        throw new SamzaException(
+            String.format("Interrupted while trying to add message with offset %s for ssp %s",
+                envelope.getOffset(),
+                ssp));
+      }
+    }
+  }  // end of KafkaMessageSink class
+  ///////////////////////////////////////////////////////////////////////////
+}

http://git-wip-us.apache.org/repos/asf/samza/blob/72544606/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala b/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
index 065170c..8544dbf 100644
--- a/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
+++ b/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
@@ -88,12 +88,12 @@ class TestKafkaCheckpointManager extends KafkaServerTestHarness {
     zkClient.close
 
     // read before topic exists should result in a null checkpoint
-    //val readCp = readCheckpoint(checkpointTopic, taskName)
-    //assertNull(readCp)
+    val readCp = readCheckpoint(checkpointTopic, taskName)
+    assertNull(readCp)
 
     writeCheckpoint(checkpointTopic, taskName, checkpoint1)
+
     assertEquals(checkpoint1, readCheckpoint(checkpointTopic, taskName))
-try {Thread.sleep(20000)} catch { case e:Exception =>() }
     // writing a second message and reading it returns a more recent checkpoint
     writeCheckpoint(checkpointTopic, taskName, checkpoint2)
     assertEquals(checkpoint2, readCheckpoint(checkpointTopic, taskName))
@@ -194,7 +194,7 @@ try {Thread.sleep(20000)} catch { case e:Exception =>() }
     val systemFactory = Util.getObj(systemFactoryClassName, classOf[SystemFactory])
 
     val spec = new KafkaStreamSpec("id", cpTopic, checkpointSystemName, 1, 1, props)
-    System.out.println("CONFIG:" + config)
+    System.out.println("CONFIG = " + config)
     new KafkaCheckpointManager(spec, systemFactory, failOnTopicValidation, config, new NoOpMetricsRegistry, serde)
   }
 


[06/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/5e6f5fb5
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/5e6f5fb5
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/5e6f5fb5

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 5e6f5fb5f9a9ee12ce35ee8eb1836a058521df20
Parents: 410ce78 5f81b8d
Author: Boris Shkolnik <bs...@linkedin.com>
Authored: Wed Oct 25 09:50:37 2017 -0700
Committer: Boris Shkolnik <bs...@linkedin.com>
Committed: Wed Oct 25 09:50:37 2017 -0700

----------------------------------------------------------------------
 build.gradle                                    |   1 +
 .../apache/samza/storage/kv/KeyValueStore.java  |  69 ++++---------
 .../apache/samza/task/StreamOperatorTask.java   |   2 +-
 .../samza/execution/TestExecutionPlanner.java   |   8 +-
 .../samza/system/kafka/KafkaStreamSpec.java     |   9 ++
 .../kafka/KafkaCheckpointManager.scala          | 103 +++++++------------
 .../kafka/KafkaCheckpointManagerFactory.scala   |  21 +---
 .../org/apache/samza/config/KafkaConfig.scala   |  37 ++++++-
 .../samza/system/kafka/KafkaSystemAdmin.scala   |   9 +-
 .../samza/system/kafka/KafkaSystemFactory.scala |  22 +++-
 .../TestKafkaCheckpointManagerFactory.java      |  51 +++++++++
 .../kafka/TestKafkaSystemFactoryJava.java       |  60 +++++++++++
 .../kafka/TestKafkaCheckpointManager.scala      |   6 +-
 .../apache/samza/config/TestKafkaConfig.scala   |  13 +++
 .../kv/inmemory/InMemoryKeyValueStore.scala     |   8 --
 .../samza/storage/kv/RocksDbKeyValueStore.scala |  86 ++++++----------
 .../storage/kv/TestRocksDbKeyValueStore.scala   |   4 +-
 .../apache/samza/storage/kv/CachedStore.scala   |   2 +-
 .../samza/storage/kv/MockKeyValueStore.scala    |   8 --
 19 files changed, 303 insertions(+), 216 deletions(-)
----------------------------------------------------------------------



[31/47] samza git commit: Replaced KafkaSystemConsumer, based on SimpleConsumer, with NewKafkaSystemConsumer, based on high level Kafka consumer

Posted by bo...@apache.org.
Replaced KafkaSystemConsumer, based on SimpleConsumer, with NewKafkaSystemConsumer, based on high level Kafka consumer


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/332a0481
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/332a0481
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/332a0481

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 332a04815bbc5d526b736d82e5f05262b0922d57
Parents: bab5bdd
Author: Boris S <bo...@apache.org>
Authored: Wed Sep 5 11:51:58 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Sep 5 11:51:58 2018 -0700

----------------------------------------------------------------------
 .../samza/system/IncomingMessageEnvelope.java   |   3 +-
 .../ClusterBasedJobCoordinator.java             |   2 +-
 .../stream/CoordinatorStreamSystemConsumer.java |   4 +-
 .../apache/samza/storage/StorageRecovery.java   |   2 +-
 .../samza/checkpoint/CheckpointTool.scala       |   2 +-
 .../apache/samza/checkpoint/OffsetManager.scala |   4 +-
 .../samza/coordinator/JobModelManager.scala     |   5 +-
 .../samza/job/local/ProcessJobFactory.scala     |   3 +-
 .../samza/job/local/ThreadJobFactory.scala      |  14 +-
 .../samza/coordinator/TestJobCoordinator.scala  |   4 +-
 .../clients/consumer/KafkaConsumerConfig.java   |  81 ++--
 .../samza/system/kafka/KafkaConsumerProxy.java  |  32 +-
 .../kafka/KafkaSystemConsumerMetrics.scala      |  69 ++-
 .../samza/system/kafka/KafkaSystemFactory.scala |  47 +-
 .../system/kafka/NewKafkaSystemConsumer.java    |  93 ++--
 .../samza/system/kafka/TestBrokerProxy.scala    | 437 -------------------
 .../test/integration/StreamTaskTestUtil.scala   |   8 +-
 17 files changed, 170 insertions(+), 640 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-api/src/main/java/org/apache/samza/system/IncomingMessageEnvelope.java
----------------------------------------------------------------------
diff --git a/samza-api/src/main/java/org/apache/samza/system/IncomingMessageEnvelope.java b/samza-api/src/main/java/org/apache/samza/system/IncomingMessageEnvelope.java
index 4d0ce2f..c5aed31 100644
--- a/samza-api/src/main/java/org/apache/samza/system/IncomingMessageEnvelope.java
+++ b/samza-api/src/main/java/org/apache/samza/system/IncomingMessageEnvelope.java
@@ -59,7 +59,8 @@ public class IncomingMessageEnvelope {
    * @param message A deserialized message received from the partition offset.
    * @param size size of the message and key in bytes.
    */
-  public IncomingMessageEnvelope(SystemStreamPartition systemStreamPartition, String offset, Object key, Object message, int size) {
+  public IncomingMessageEnvelope(SystemStreamPartition systemStreamPartition, String offset,
+      Object key, Object message, int size) {
     this.systemStreamPartition = systemStreamPartition;
     this.offset = offset;
     this.key = key;

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/java/org/apache/samza/clustermanager/ClusterBasedJobCoordinator.java
----------------------------------------------------------------------
diff --git a/samza-core/src/main/java/org/apache/samza/clustermanager/ClusterBasedJobCoordinator.java b/samza-core/src/main/java/org/apache/samza/clustermanager/ClusterBasedJobCoordinator.java
index 016d171..12e26f7 100644
--- a/samza-core/src/main/java/org/apache/samza/clustermanager/ClusterBasedJobCoordinator.java
+++ b/samza-core/src/main/java/org/apache/samza/clustermanager/ClusterBasedJobCoordinator.java
@@ -174,7 +174,7 @@ public class ClusterBasedJobCoordinator {
 
     // build a JobModelManager and ChangelogStreamManager and perform partition assignments.
     changelogStreamManager = new ChangelogStreamManager(coordinatorStreamManager);
-    jobModelManager = JobModelManager.apply(coordinatorStreamManager, changelogStreamManager.readPartitionMapping());
+    jobModelManager = JobModelManager.apply(coordinatorStreamManager.getConfig(), changelogStreamManager.readPartitionMapping());
 
     config = jobModelManager.jobModel().getConfig();
     hasDurableStores = new StorageConfig(config).hasDurableStores();

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java b/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
index 0bdb874..38255a2 100644
--- a/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
+++ b/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
@@ -176,7 +176,7 @@ public class CoordinatorStreamSystemConsumer {
             valueMap = messageSerde.fromBytes((byte[]) envelope.getMessage());
           }
           CoordinatorStreamMessage coordinatorStreamMessage = new CoordinatorStreamMessage(keyArray, valueMap);
-          log.info("Received coordinator stream message: {}", coordinatorStreamMessage);
+          log.debug("Received coordinator stream message: {}", coordinatorStreamMessage);
           // Remove any existing entry. Set.add() does not add if the element already exists.
           if (bootstrappedMessages.remove(coordinatorStreamMessage)) {
             log.debug("Removed duplicate message: {}", coordinatorStreamMessage);
@@ -194,7 +194,7 @@ public class CoordinatorStreamSystemConsumer {
         }
 
         bootstrappedStreamSet = Collections.unmodifiableSet(bootstrappedMessages);
-        log.info("Bootstrapped configuration: {}", configMap);
+        log.debug("Bootstrapped configuration: {}", configMap);
         isBootstrapped = true;
       } catch (Exception e) {
         throw new SamzaException(e);

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/java/org/apache/samza/storage/StorageRecovery.java
----------------------------------------------------------------------
diff --git a/samza-core/src/main/java/org/apache/samza/storage/StorageRecovery.java b/samza-core/src/main/java/org/apache/samza/storage/StorageRecovery.java
index f9c6c0c..c6dd9a7 100644
--- a/samza-core/src/main/java/org/apache/samza/storage/StorageRecovery.java
+++ b/samza-core/src/main/java/org/apache/samza/storage/StorageRecovery.java
@@ -131,7 +131,7 @@ public class StorageRecovery extends CommandLine {
     coordinatorStreamManager.start();
     coordinatorStreamManager.bootstrap();
     ChangelogStreamManager changelogStreamManager = new ChangelogStreamManager(coordinatorStreamManager);
-    JobModel jobModel = JobModelManager.apply(coordinatorStreamManager, changelogStreamManager.readPartitionMapping()).jobModel();
+    JobModel jobModel = JobModelManager.apply(coordinatorStreamManager.getConfig(), changelogStreamManager.readPartitionMapping()).jobModel();
     containers = jobModel.getContainers();
     coordinatorStreamManager.stop();
   }

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/scala/org/apache/samza/checkpoint/CheckpointTool.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/checkpoint/CheckpointTool.scala b/samza-core/src/main/scala/org/apache/samza/checkpoint/CheckpointTool.scala
index 0ca8a3d..65fb419 100644
--- a/samza-core/src/main/scala/org/apache/samza/checkpoint/CheckpointTool.scala
+++ b/samza-core/src/main/scala/org/apache/samza/checkpoint/CheckpointTool.scala
@@ -170,7 +170,7 @@ class CheckpointTool(config: Config, newOffsets: TaskNameToCheckpointMap, manage
     coordinatorStreamManager.start
     coordinatorStreamManager.bootstrap
     val changelogManager = new ChangelogStreamManager(coordinatorStreamManager)
-    val jobModelManager = JobModelManager(coordinatorStreamManager, changelogManager.readPartitionMapping())
+    val jobModelManager = JobModelManager(coordinatorStreamManager.getConfig, changelogManager.readPartitionMapping())
     val taskNames = jobModelManager
       .jobModel
       .getContainers

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala b/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
index d2b6667..53d5e98 100644
--- a/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
+++ b/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
@@ -304,7 +304,7 @@ class OffsetManager(
    */
   private def loadOffsetsFromCheckpointManager {
     if (checkpointManager != null) {
-      info("Loading offsets from checkpoint manager.")
+      debug("Loading offsets from checkpoint manager.")
 
       checkpointManager.start
       val result = systemStreamPartitions
@@ -332,7 +332,7 @@ class OffsetManager(
    * Loads last processed offsets for a single taskName.
    */
   private def restoreOffsetsFromCheckpoint(taskName: TaskName): Map[TaskName, Map[SystemStreamPartition, String]] = {
-    info("Loading checkpoints for taskName: %s." format taskName)
+    debug("Loading checkpoints for taskName: %s." format taskName)
 
     val checkpoint = checkpointManager.readLastCheckpoint(taskName)
 

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala b/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
index f939736..f7ffd4e 100644
--- a/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
+++ b/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
@@ -64,12 +64,11 @@ object JobModelManager extends Logging {
    * a) Reads the jobModel from coordinator stream using the job's configuration.
    * b) Recomputes changelog partition mapping based on jobModel and job's configuration.
    * c) Builds JobModelManager using the jobModel read from coordinator stream.
-   * @param coordinatorStreamManager Coordinator stream manager.
+   * @param config Coordinator stream manager config
    * @param changelogPartitionMapping The changelog partition-to-task mapping.
    * @return JobModelManager
    */
-  def apply(coordinatorStreamManager: CoordinatorStreamManager, changelogPartitionMapping: util.Map[TaskName, Integer]) = {
-    val config = coordinatorStreamManager.getConfig
+  def apply(config: Config, changelogPartitionMapping: util.Map[TaskName, Integer]) = {
     val localityManager = new LocalityManager(config, new MetricsRegistryMap())
 
     // Map the name of each system to the corresponding SystemAdmin

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/scala/org/apache/samza/job/local/ProcessJobFactory.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ProcessJobFactory.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ProcessJobFactory.scala
index 642a484..64f516b 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ProcessJobFactory.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ProcessJobFactory.scala
@@ -50,7 +50,7 @@ class ProcessJobFactory extends StreamJobFactory with Logging {
     coordinatorStreamManager.bootstrap
     val changelogStreamManager = new ChangelogStreamManager(coordinatorStreamManager)
 
-    val coordinator = JobModelManager(coordinatorStreamManager, changelogStreamManager.readPartitionMapping())
+    val coordinator = JobModelManager(coordinatorStreamManager.getConfig, changelogStreamManager.readPartitionMapping())
     val jobModel = coordinator.jobModel
 
     val taskPartitionMappings: util.Map[TaskName, Integer] = new util.HashMap[TaskName, Integer]
@@ -61,6 +61,7 @@ class ProcessJobFactory extends StreamJobFactory with Logging {
     }
 
     changelogStreamManager.writePartitionMapping(taskPartitionMappings)
+    coordinatorStreamManager.stop()
 
     //create necessary checkpoint and changelog streams
     val checkpointManager = new TaskConfigJava(jobModel.getConfig).getCheckpointManager(metricsRegistry)

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
index 34cc2a0..15aa5a6 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
@@ -19,11 +19,9 @@
 
 package org.apache.samza.job.local
 
-import java.util.concurrent.{CountDownLatch, TimeUnit}
-
-import org.apache.samza.config.{Config, TaskConfigJava}
 import org.apache.samza.config.JobConfig._
 import org.apache.samza.config.ShellCommandConfig._
+import org.apache.samza.config.{Config, TaskConfigJava}
 import org.apache.samza.container.{SamzaContainer, SamzaContainerListener, TaskName}
 import org.apache.samza.coordinator.JobModelManager
 import org.apache.samza.coordinator.stream.CoordinatorStreamManager
@@ -38,8 +36,8 @@ import scala.collection.JavaConversions._
 import scala.collection.mutable
 
 /**
- * Creates a new Thread job with the given config
- */
+  * Creates a new Thread job with the given config
+  */
 class ThreadJobFactory extends StreamJobFactory with Logging {
   def getJob(config: Config): StreamJob = {
     info("Creating a ThreadJob, which is only meant for debugging.")
@@ -51,7 +49,8 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
     coordinatorStreamManager.bootstrap
     val changelogStreamManager = new ChangelogStreamManager(coordinatorStreamManager)
 
-    val coordinator = JobModelManager(coordinatorStreamManager, changelogStreamManager.readPartitionMapping())
+    val coordinator = JobModelManager(coordinatorStreamManager.getConfig, changelogStreamManager.readPartitionMapping())
+    coordinatorStreamManager.stop()
     val jobModel = coordinator.jobModel
 
     val taskPartitionMappings: mutable.Map[TaskName, Integer] = mutable.Map[TaskName, Integer]()
@@ -85,7 +84,7 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
 
     // Give developers a nice friendly warning if they've specified task.opts and are using a threaded job.
     config.getTaskOpts match {
-      case Some(taskOpts) => warn("%s was specified in config, but is not being used because job is being executed with ThreadJob. You probably want to run %s=%s." format (TASK_JVM_OPTS, STREAM_JOB_FACTORY_CLASS, classOf[ProcessJobFactory].getName))
+      case Some(taskOpts) => warn("%s was specified in config, but is not being used because job is being executed with ThreadJob. You probably want to run %s=%s." format(TASK_JVM_OPTS, STREAM_JOB_FACTORY_CLASS, classOf[ProcessJobFactory].getName))
       case _ => None
     }
 
@@ -117,7 +116,6 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
       threadJob
     } finally {
       coordinator.stop
-      coordinatorStreamManager.stop
       jmxServer.stop
     }
   }

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-core/src/test/scala/org/apache/samza/coordinator/TestJobCoordinator.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/test/scala/org/apache/samza/coordinator/TestJobCoordinator.scala b/samza-core/src/test/scala/org/apache/samza/coordinator/TestJobCoordinator.scala
index 42610ae..b85b4a4 100644
--- a/samza-core/src/test/scala/org/apache/samza/coordinator/TestJobCoordinator.scala
+++ b/samza-core/src/test/scala/org/apache/samza/coordinator/TestJobCoordinator.scala
@@ -275,7 +275,9 @@ class TestJobCoordinator extends FlatSpec with PrivateMethodTester {
     coordinatorStreamManager.start
     coordinatorStreamManager.bootstrap
     val changelogPartitionManager = new ChangelogStreamManager(coordinatorStreamManager)
-    JobModelManager(coordinatorStreamManager, changelogPartitionManager.readPartitionMapping())
+    val jobModelManager = JobModelManager(coordinatorStreamManager.getConfig, changelogPartitionManager.readPartitionMapping())
+    coordinatorStreamManager.stop()
+    jobModelManager
   }
 
   @Before

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index 88437ee..843e03d 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -43,11 +43,13 @@ public class KafkaConsumerConfig extends ConsumerConfig {
 
   private static final String PRODUCER_CLIENT_ID_PREFIX = "kafka-producer";
   private static final String CONSUMER_CLIENT_ID_PREFIX = "kafka-consumer";
+  private static final String ADMIN_CLIENT_ID_PREFIX = "samza-admin";
   private static final String SAMZA_OFFSET_LARGEST = "largest";
   private static final String SAMZA_OFFSET_SMALLEST = "smallest";
   private static final String KAFKA_OFFSET_LATEST = "latest";
   private static final String KAFKA_OFFSET_EARLIEST = "earliest";
   private static final String KAFKA_OFFSET_NONE = "none";
+
   /*
    * By default, KafkaConsumer will fetch ALL available messages for all the partitions.
    * This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
@@ -59,8 +61,8 @@ public class KafkaConsumerConfig extends ConsumerConfig {
     super(props);
   }
 
-  public static KafkaConsumerConfig getKafkaSystemConsumerConfig(Config config,
-      String systemName, String clientId, Map<String, String> injectProps) {
+  public static KafkaConsumerConfig getKafkaSystemConsumerConfig(Config config, String systemName, String clientId,
+      Map<String, String> injectProps) {
 
     Config subConf = config.subset(String.format("systems.%s.consumer.", systemName), true);
 
@@ -72,17 +74,20 @@ public class KafkaConsumerConfig extends ConsumerConfig {
     consumerProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
     consumerProps.setProperty(ConsumerConfig.CLIENT_ID_CONFIG, clientId);
 
-    //Open-source Kafka Consumer configuration
-    consumerProps.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); // Disable consumer auto-commit
+    //Kafka client configuration
+
+    // Disable consumer auto-commit because Samza controls commits
+    consumerProps.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
 
-    consumerProps.setProperty(
-        ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
-        getAutoOffsetResetValue(consumerProps));  // Translate samza config value to kafka config value
+    // Translate samza config value to kafka config value
+    consumerProps.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
+        getAutoOffsetResetValue(consumerProps));
 
     // make sure bootstrap configs are in ?? SHOULD WE FAIL IF THEY ARE NOT?
-    if (! subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
+    if (!subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
       // get it from the producer config
-      String bootstrapServer = config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
+      String bootstrapServer =
+          config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
       if (StringUtils.isEmpty(bootstrapServer)) {
         throw new SamzaException("Missing " + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + " config  for " + systemName);
       }
@@ -90,25 +95,22 @@ public class KafkaConsumerConfig extends ConsumerConfig {
     }
 
     // Always use default partition assignment strategy. Do not allow override.
-    consumerProps.setProperty(
-        ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,
-        RangeAssignor.class.getName());
-
+    consumerProps.setProperty(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG, RangeAssignor.class.getName());
 
     // the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
     // default to byte[]
-    if ( !config.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
-      LOG.info("default key serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
+    if (!config.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
+      LOG.info("setting default key serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
       consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
     }
-    if ( !config.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
-      LOG.info("default value serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
+    if (!config.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
+      LOG.info("setting default value serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
       consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
     }
 
-
     // NOT SURE THIS IS NEEDED TODO
-    String maxPollRecords = subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT);;
+    String maxPollRecords =
+        subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT);
     consumerProps.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
 
     // put overrides
@@ -122,38 +124,37 @@ public class KafkaConsumerConfig extends ConsumerConfig {
     JobConfig jobConfig = new JobConfig(config);
     Option<String> jobIdOption = jobConfig.getJobId();
     Option<String> jobNameOption = jobConfig.getName();
-    return (jobNameOption.isDefined()? jobNameOption.get() : "undefined_job_name") + "-"
-        + (jobIdOption.isDefined()? jobIdOption.get() : "undefined_job_id");
+    return (jobNameOption.isDefined() ? jobNameOption.get() : "undefined_job_name") + "-" + (jobIdOption.isDefined()
+        ? jobIdOption.get() : "undefined_job_id");
   }
+
   // client id should be unique per job
-  public static String getClientId(String id, Config config) {
+  public static String getClientId(Config config) {
+    return getClientId(CONSUMER_CLIENT_ID_PREFIX, config);
+  }
+  public static String getProducerClientId(Config config) {
+    return getClientId(PRODUCER_CLIENT_ID_PREFIX, config);
+  }
+  public static String getAdminClientId(Config config) {
+    return getClientId(ADMIN_CLIENT_ID_PREFIX, config);
+  }
+
+  private static String getClientId(String id, Config config) {
     if (config.get(JobConfig.JOB_NAME()) == null) {
       throw new ConfigException("Missing job name");
     }
     String jobName = config.get(JobConfig.JOB_NAME());
-    String jobId = "1";
-    if (config.get(JobConfig.JOB_ID()) != null) {
-      jobId = config.get(JobConfig.JOB_ID());
-    }
-    return getClientId(id, jobName, jobId);
-  }
+    String jobId = (config.get(JobConfig.JOB_ID()) != null) ? config.get(JobConfig.JOB_ID()) : "1";
 
-  private static String getClientId(String id, String jobName, String jobId) {
-    return String.format(
-        "%s-%s-%s",
-        id.replaceAll("[^A-Za-z0-9]", "_"),
-        jobName.replaceAll("[^A-Za-z0-9]", "_"),
+    return String.format("%s-%s-%s", id.replaceAll("[^A-Za-z0-9]", "_"), jobName.replaceAll("[^A-Za-z0-9]", "_"),
         jobId.replaceAll("[^A-Za-z0-9]", "_"));
   }
 
-  public static String getProducerClientId(Config config) {
-    return getClientId(PRODUCER_CLIENT_ID_PREFIX, config);
-  }
-
   /**
    * Settings for auto.reset in samza are different from settings in Kafka (auto.offset.reset) - need to convert
    * "largest" -> "latest"
    * "smallest" -> "earliest"
+   * "none" -> "none"
    * "none" - will fail the kafka consumer, if offset is out of range
    * @param properties All consumer related {@link Properties} parsed from samza config
    * @return String representing the config value for "auto.offset.reset" property
@@ -162,9 +163,8 @@ public class KafkaConsumerConfig extends ConsumerConfig {
     String autoOffsetReset = properties.getProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, KAFKA_OFFSET_LATEST);
 
     // accept kafka values directly
-    if (autoOffsetReset.equals(KAFKA_OFFSET_EARLIEST) ||
-        autoOffsetReset.equals(KAFKA_OFFSET_LATEST) ||
-        autoOffsetReset.equals(KAFKA_OFFSET_NONE)) {
+    if (autoOffsetReset.equals(KAFKA_OFFSET_EARLIEST) || autoOffsetReset.equals(KAFKA_OFFSET_LATEST)
+        || autoOffsetReset.equals(KAFKA_OFFSET_NONE)) {
       return autoOffsetReset;
     }
 
@@ -177,5 +177,4 @@ public class KafkaConsumerConfig extends ConsumerConfig {
         return KAFKA_OFFSET_LATEST;
     }
   }
-
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index cddfdfd..a6272cd 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -86,7 +86,7 @@ public class KafkaConsumerProxy<K, V> {
     this.clientId = clientId;
 
     // TODO - see if we need new metrics (not host:port based)
-    this.kafkaConsumerMetrics.registerBrokerProxy(metricName, 0);
+    this.kafkaConsumerMetrics.registerClientProxy(metricName);
 
     consumerPollThread = new Thread(createProxyThreadRunnable());
   }
@@ -132,7 +132,7 @@ public class KafkaConsumerProxy<K, V> {
 
     // we reuse existing metrics. They assume host and port for the broker
     // for now fake the port with the consumer name
-    kafkaConsumerMetrics.setTopicPartitionValue(metricName, 0, nextOffsets.size());
+    kafkaConsumerMetrics.setTopicPartitionValue(metricName, nextOffsets.size());
   }
 
   /**
@@ -258,16 +258,10 @@ public class KafkaConsumerProxy<K, V> {
         results.put(ssp, listMsgs);
       }
 
-      // TODO - add calculation of the size of the message, when available from Kafka
-      int msgSize = 0;
-      // if (fetchLimitByBytesEnabled) {
-      msgSize = getRecordSize(r);
-      //}
-
       final K key = r.key();
       final Object value = r.value();
       IncomingMessageEnvelope imEnvelope =
-          new IncomingMessageEnvelope(ssp, String.valueOf(r.offset()), key, value, msgSize);
+          new IncomingMessageEnvelope(ssp, String.valueOf(r.offset()), key, value, getRecordSize(r));
       listMsgs.add(imEnvelope);
     }
     if (LOG.isDebugEnabled()) {
@@ -282,18 +276,8 @@ public class KafkaConsumerProxy<K, V> {
   }
 
   private int getRecordSize(ConsumerRecord<K, V> r) {
-    int keySize = 0; //(r.key() == null) ? 0 : r.key().getSerializedKeySize();
-    return keySize;  // + r.getSerializedMsgSize();  // TODO -enable when functionality available from Kafka
-
-    //int getMessageSize (Message message) {
-    // Approximate additional shallow heap overhead per message in addition to the raw bytes
-    // received from Kafka  4 + 64 + 4 + 4 + 4 = 80 bytes overhead.
-    // As this overhead is a moving target, and not very large
-    // compared to the message size its being ignore in the computation for now.
-    // int MESSAGE_SIZE_OVERHEAD =  4 + 64 + 4 + 4 + 4;
-
-    //      return message.size() + MESSAGE_SIZE_OVERHEAD;
-    // }
+    int keySize = (r.key() == null) ? 0 : r.serializedKeySize();
+    return keySize + r.serializedValueSize();
   }
 
   private void updateMetrics(ConsumerRecord<K, V> r, TopicPartition tp) {
@@ -310,7 +294,7 @@ public class KafkaConsumerProxy<K, V> {
     kafkaConsumerMetrics.incReads(tap);
     kafkaConsumerMetrics.incBytesReads(tap, size);
     kafkaConsumerMetrics.setOffsets(tap, recordOffset);
-    kafkaConsumerMetrics.incBrokerBytesReads(metricName, 0, size);
+    kafkaConsumerMetrics.incClientBytesReads(metricName, size);
     kafkaConsumerMetrics.setHighWatermarkValue(tap, highWatermark);
   }
 
@@ -398,7 +382,7 @@ public class KafkaConsumerProxy<K, V> {
     }
     LOG.debug("pollConsumer {}", SSPsToFetch.size());
     if (!SSPsToFetch.isEmpty()) {
-      kafkaConsumerMetrics.incBrokerReads(metricName, 0);
+      kafkaConsumerMetrics.incClientReads(metricName);
 
       Map<SystemStreamPartition, List<IncomingMessageEnvelope>> response;
       if (LOG.isDebugEnabled()) {
@@ -420,7 +404,7 @@ public class KafkaConsumerProxy<K, V> {
       LOG.debug("No topic/partitions need to be fetched for consumer {} right now. Sleeping {}ms.", kafkaConsumer,
           SLEEP_MS_WHILE_NO_TOPIC_PARTITION);
 
-      kafkaConsumerMetrics.incBrokerSkippedFetchRequests(metricName, 0);
+      kafkaConsumerMetrics.incClientSkippedFetchRequests(metricName);
 
       try {
         Thread.sleep(SLEEP_MS_WHILE_NO_TOPIC_PARTITION);

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
index 1aa66dc..415bd38 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
@@ -19,13 +19,10 @@
 
 package org.apache.samza.system.kafka
 
-import org.apache.samza.metrics.MetricsHelper
-import org.apache.samza.metrics.MetricsRegistryMap
-import org.apache.samza.metrics.MetricsRegistry
 import java.util.concurrent.ConcurrentHashMap
+
 import kafka.common.TopicAndPartition
-import org.apache.samza.metrics.Counter
-import org.apache.samza.metrics.Gauge
+import org.apache.samza.metrics._
 
 class KafkaSystemConsumerMetrics(val systemName: String = "unknown", val registry: MetricsRegistry = new MetricsRegistryMap) extends MetricsHelper {
   val offsets = new ConcurrentHashMap[TopicAndPartition, Counter]
@@ -34,68 +31,66 @@ class KafkaSystemConsumerMetrics(val systemName: String = "unknown", val registr
   val lag = new ConcurrentHashMap[TopicAndPartition, Gauge[Long]]
   val highWatermark = new ConcurrentHashMap[TopicAndPartition, Gauge[Long]]
 
-  /*
-  TODO Fix
-   * (String, Int) = (host, port) of BrokerProxy.
-   */
-
-  val reconnects = new ConcurrentHashMap[(String, Int), Counter]
-  val brokerBytesRead = new ConcurrentHashMap[(String, Int), Counter]
-  val brokerReads = new ConcurrentHashMap[(String, Int), Counter]
-  val brokerSkippedFetchRequests = new ConcurrentHashMap[(String, Int), Counter]
-  val topicPartitions = new ConcurrentHashMap[(String, Int), Gauge[Int]]
+  val clientBytesRead = new ConcurrentHashMap[String, Counter]
+  val clientReads = new ConcurrentHashMap[String, Counter]
+  val clientSkippedFetchRequests = new ConcurrentHashMap[String, Counter]
+  val topicPartitions = new ConcurrentHashMap[String, Gauge[Int]]
 
   def registerTopicAndPartition(tp: TopicAndPartition) = {
     if (!offsets.contains(tp)) {
-      offsets.put(tp, newCounter("%s-%s-offset-change" format (tp.topic, tp.partition)))
-      bytesRead.put(tp, newCounter("%s-%s-bytes-read" format (tp.topic, tp.partition)))
-      reads.put(tp, newCounter("%s-%s-messages-read" format (tp.topic, tp.partition)))
-      highWatermark.put(tp, newGauge("%s-%s-high-watermark" format (tp.topic, tp.partition), -1L))
-      lag.put(tp, newGauge("%s-%s-messages-behind-high-watermark" format (tp.topic, tp.partition), 0L))
+      offsets.put(tp, newCounter("%s-%s-offset-change" format(tp.topic, tp.partition)))
+      bytesRead.put(tp, newCounter("%s-%s-bytes-read" format(tp.topic, tp.partition)))
+      reads.put(tp, newCounter("%s-%s-messages-read" format(tp.topic, tp.partition)))
+      highWatermark.put(tp, newGauge("%s-%s-high-watermark" format(tp.topic, tp.partition), -1L))
+      lag.put(tp, newGauge("%s-%s-messages-behind-high-watermark" format(tp.topic, tp.partition), 0L))
     }
   }
 
-  def registerBrokerProxy(host: String, port: Int) {
-    reconnects.put((host, port), newCounter("%s-%s-reconnects" format (host, port)))
-    brokerBytesRead.put((host, port), newCounter("%s-%s-bytes-read" format (host, port)))
-    brokerReads.put((host, port), newCounter("%s-%s-messages-read" format (host, port)))
-    brokerSkippedFetchRequests.put((host, port), newCounter("%s-%s-skipped-fetch-requests" format (host, port)))
-    topicPartitions.put((host, port), newGauge("%s-%s-topic-partitions" format (host, port), 0))
+  def registerClientProxy(clientName: String) {
+    clientBytesRead.put(clientName, newCounter("%s-%s-bytes-read" format clientName))
+    clientReads.put((clientName), newCounter("%s-%s-messages-read" format clientName))
+    clientSkippedFetchRequests.put((clientName), newCounter("%s-%s-skipped-fetch-requests" format clientName))
+    topicPartitions.put(clientName, newGauge("%s-%s-topic-partitions" format clientName, 0))
   }
 
   // java friendlier interfaces
   // Gauges
-  def setTopicPartitionValue(host: String, port: Int, value: Int) {
-    topicPartitions.get((host,port)).set(value)
+  def setTopicPartitionValue(clientName: String, value: Int) {
+    topicPartitions.get(clientName).set(value)
   }
+
   def setLagValue(topicAndPartition: TopicAndPartition, value: Long) {
     lag.get((topicAndPartition)).set(value);
   }
+
   def setHighWatermarkValue(topicAndPartition: TopicAndPartition, value: Long) {
     highWatermark.get((topicAndPartition)).set(value);
   }
 
   // Counters
-  def incBrokerReads(host: String, port: Int) {
-    brokerReads.get((host,port)).inc
+  def incClientReads(clientName: String) {
+    clientReads.get(clientName).inc
   }
+
   def incReads(topicAndPartition: TopicAndPartition) {
     reads.get(topicAndPartition).inc;
   }
+
   def incBytesReads(topicAndPartition: TopicAndPartition, inc: Long) {
     bytesRead.get(topicAndPartition).inc(inc);
   }
-  def incBrokerBytesReads(host: String, port: Int, incBytes: Long) {
-    brokerBytesRead.get((host,port)).inc(incBytes)
+
+  def incClientBytesReads(clientName: String, incBytes: Long) {
+    clientBytesRead.get(clientName).inc(incBytes)
   }
-  def incBrokerSkippedFetchRequests(host: String, port: Int) {
-    brokerSkippedFetchRequests.get((host,port)).inc()
+
+  def incClientSkippedFetchRequests(clientName: String) {
+    clientSkippedFetchRequests.get(clientName).inc()
   }
+
   def setOffsets(topicAndPartition: TopicAndPartition, offset: Long) {
     offsets.get(topicAndPartition).set(offset)
   }
-  def incReconnects(host: String, port: Int) {
-    reconnects.get((host,port)).inc()
-  }
+
   override def getPrefix = systemName + "-"
 }

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index 6a5eda9..892d400 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -19,27 +19,21 @@
 
 package org.apache.samza.system.kafka
 
-import java.util
 import java.util.Properties
 
-import kafka.consumer.ConsumerConfig
 import kafka.utils.ZkUtils
-import org.apache.kafka.clients.consumer.KafkaConsumer
+import org.apache.kafka.clients.consumer.KafkaConsumerConfig
+import org.apache.kafka.clients.producer.KafkaProducer
 import org.apache.samza.SamzaException
 import org.apache.samza.config.ApplicationConfig.ApplicationMode
-import org.apache.samza.util._
-import org.apache.samza.config.{ApplicationConfig, Config, KafkaConfig, StreamConfig}
-import org.apache.samza.metrics.MetricsRegistry
 import org.apache.samza.config.KafkaConfig.Config2Kafka
-import org.apache.samza.config.TaskConfig.Config2Task
-import org.apache.kafka.clients.producer.KafkaProducer
-import org.apache.kafka.common.serialization.ByteArrayDeserializer
-import org.apache.samza.system.SystemFactory
 import org.apache.samza.config.StorageConfig._
-import org.apache.samza.system.SystemProducer
-import org.apache.samza.system.SystemAdmin
 import org.apache.samza.config.SystemConfig.Config2System
-import org.apache.samza.system.SystemConsumer
+import org.apache.samza.config.TaskConfig.Config2Task
+import org.apache.samza.config.{ApplicationConfig, Config, KafkaConfig, StreamConfig}
+import org.apache.samza.metrics.MetricsRegistry
+import org.apache.samza.system.{SystemAdmin, SystemConsumer, SystemFactory, SystemProducer}
+import org.apache.samza.util._
 
 object KafkaSystemFactory extends Logging {
   def getInjectedProducerProperties(systemName: String, config: Config) = if (config.isChangelogSystem(systemName)) {
@@ -51,8 +45,9 @@ object KafkaSystemFactory extends Logging {
 }
 
 class KafkaSystemFactory extends SystemFactory with Logging {
+
   def getConsumer(systemName: String, config: Config, registry: MetricsRegistry): SystemConsumer = {
-    val clientId = KafkaUtil.getClientId("samza-consumer", config)
+    val clientId = KafkaConsumerConfig.getClientId( config)
     val metrics = new KafkaSystemConsumerMetrics(systemName, registry)
 
     NewKafkaSystemConsumer.getNewKafkaSystemConsumer(
@@ -60,10 +55,12 @@ class KafkaSystemFactory extends SystemFactory with Logging {
   }
 
   def getProducer(systemName: String, config: Config, registry: MetricsRegistry): SystemProducer = {
-    val clientId = KafkaUtil.getClientId("samza-producer", config)
+    val clientId = KafkaConsumerConfig.getProducerClientId(config)
     val injectedProps = KafkaSystemFactory.getInjectedProducerProperties(systemName, config)
     val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId, injectedProps)
-    val getProducer = () => { new KafkaProducer[Array[Byte], Array[Byte]](producerConfig.getProducerProperties) }
+    val getProducer = () => {
+      new KafkaProducer[Array[Byte], Array[Byte]](producerConfig.getProducerProperties)
+    }
     val metrics = new KafkaSystemProducerMetrics(systemName, registry)
 
     // Unlike consumer, no need to use encoders here, since they come for free
@@ -79,7 +76,7 @@ class KafkaSystemFactory extends SystemFactory with Logging {
   }
 
   def getAdmin(systemName: String, config: Config): SystemAdmin = {
-    val clientId = KafkaUtil.getClientId("samza-admin", config)
+    val clientId = KafkaConsumerConfig.getClientId(config)
     val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId)
     val bootstrapServers = producerConfig.bootsrapServers
     val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)
@@ -94,13 +91,13 @@ class KafkaSystemFactory extends SystemFactory with Logging {
     val coordinatorStreamReplicationFactor = config.getCoordinatorReplicationFactor.toInt
     val storeToChangelog = config.getKafkaChangelogEnabledStores()
     // Construct the meta information for each topic, if the replication factor is not defined, we use 2 as the number of replicas for the change log stream.
-    val topicMetaInformation = storeToChangelog.map{case (storeName, topicName) =>
-    {
-       val replicationFactor = config.getChangelogStreamReplicationFactor(storeName).toInt
-       val changelogInfo = ChangelogInfo(replicationFactor, config.getChangelogKafkaProperties(storeName))
-       info("Creating topic meta information for topic: %s with replication factor: %s" format (topicName, replicationFactor))
-       (topicName, changelogInfo)
-    }}
+    val topicMetaInformation = storeToChangelog.map { case (storeName, topicName) => {
+      val replicationFactor = config.getChangelogStreamReplicationFactor(storeName).toInt
+      val changelogInfo = ChangelogInfo(replicationFactor, config.getChangelogKafkaProperties(storeName))
+      info("Creating topic meta information for topic: %s with replication factor: %s" format(topicName, replicationFactor))
+      (topicName, changelogInfo)
+    }
+    }
 
     val deleteCommittedMessages = config.deleteCommittedMessages(systemName).exists(isEnabled => isEnabled.toBoolean)
     val intermediateStreamProperties: Map[String, Properties] = getIntermediateStreamProperties(config)
@@ -125,7 +122,7 @@ class KafkaSystemFactory extends SystemFactory with Logging {
       "segment.bytes" -> segmentBytes)) { case (props, (k, v)) => props.put(k, v); props }
   }
 
-  def getIntermediateStreamProperties(config : Config): Map[String, Properties] = {
+  def getIntermediateStreamProperties(config: Config): Map[String, Properties] = {
     val appConfig = new ApplicationConfig(config)
     if (appConfig.getAppMode == ApplicationMode.BATCH) {
       val streamConfig = new StreamConfig(config)

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index b33db42..717b45d 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -53,12 +53,12 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
 
   private static final long FETCH_THRESHOLD = 50000;
   private static final long FETCH_THRESHOLD_BYTES = -1L;
+
   private final Consumer<K, V> kafkaConsumer;
   private final String systemName;
   private final KafkaSystemConsumerMetrics samzaConsumerMetrics;
   private final String clientId;
   private final String metricName;
-  /* package private */final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
   private final AtomicBoolean stopped = new AtomicBoolean(false);
   private final AtomicBoolean started = new AtomicBoolean(false);
   private final Config config;
@@ -66,15 +66,16 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
 
   // This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
   /* package private */ KafkaConsumerMessageSink messageSink;
+
   // proxy is doing the actual reading
   private KafkaConsumerProxy proxy;
 
   /* package private */final Map<TopicPartition, String> topicPartitions2Offset = new HashMap<>();
+  /* package private */final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
+
   /* package private */ long perPartitionFetchThreshold;
   /* package private */ long perPartitionFetchThresholdBytes;
 
-  // TODO - consider new class for KafkaSystemConsumerMetrics
-
   /**
    * @param systemName
    * @param config
@@ -85,32 +86,28 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
 
     super(metrics.registry(), clock, metrics.getClass().getName());
 
+    this.kafkaConsumer = kafkaConsumer;
     this.samzaConsumerMetrics = metrics;
     this.clientId = clientId;
     this.systemName = systemName;
     this.config = config;
     this.metricName = systemName + " " + clientId;
 
-    this.kafkaConsumer = kafkaConsumer;
-
     this.fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName);
 
-    LOG.info(String.format(
-        "Created SamzaKafkaSystemConsumer for system=%s, clientId=%s, metricName=%s with KafkaConsumer=%s", systemName,
-        clientId, metricName, this.kafkaConsumer.toString()));
+    LOG.info("Created SamzaKafkaSystemConsumer for system={}, clientId={}, metricName={}, KafkaConsumer={}", systemName,
+        clientId, metricName, this.kafkaConsumer.toString());
   }
 
   public static <K, V> NewKafkaSystemConsumer getNewKafkaSystemConsumer(String systemName, Config config,
       String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) {
 
-
-
     // extract consumer configs and create kafka consumer
     KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
-
+    LOG.info("Created kafka consumer for system {}, clientId {}: {}", systemName, clientId, kafkaConsumer);
 
     NewKafkaSystemConsumer kc = new NewKafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
-    System.out.println("kc=" + kc + "!!!!!!!!!!!!!!!!!GETTING FOR NKC for " + systemName);
+    LOG.info("Created samza system consumer {}", kc.toString());
 
     return kc;
   }
@@ -126,12 +123,11 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
 
     Map<String, String> injectProps = new HashMap<>();
 
-    // extract kafka consumer configs
+    // extract kafka client configs
     KafkaConsumerConfig consumerConfig =
         KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
 
-    LOG.info("==============>Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}",
-        systemName, consumerConfig.originals());
+    LOG.info("KafkaClient properties for systemName {}: {}", systemName, consumerConfig.originals());
 
     return new KafkaConsumer<>(consumerConfig.originals());
   }
@@ -146,29 +142,23 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
       LOG.warn("attempting to start a stopped consumer");
       return;
     }
-    LOG.info("==============>About to start consumer");
     // initialize the subscriptions for all the registered TopicPartitions
     startSubscription();
-    LOG.info("==============>subscription started");
     // needs to be called after all the registrations are completed
     setFetchThresholds();
-    LOG.info("==============>thresholds ste");
     // Create the proxy to do the actual message reading. It is a separate thread that reads the messages from the stream
     // and puts them into the sink.
     createConsumerProxy();
-    LOG.info("==============>proxy  started");
     startConsumer();
-    LOG.info("==============>consumer started");
+    LOG.info("consumer {} started", this);
   }
 
   private void startSubscription() {
-    //subscribe to all the TopicPartitions
-    LOG.info("==============>startSubscription for TP: " + topicPartitions2SSP.keySet());
+    //subscribe to all the registered TopicPartitions
+    LOG.info("consumer {}, subscribes to {} ", this, topicPartitions2SSP.keySet());
     try {
       synchronized (kafkaConsumer) {
         // we are using assign (and not subscribe), so we need to specify both topic and partition
-        //topicPartitions2SSP.put(new TopicPartition("FAKE PARTITION", 0), new SystemStreamPartition("Some","Another", new Partition(0)));
-        //topicPartitions2Offset.put(new TopicPartition("FAKE PARTITION", 0), "1234");
         kafkaConsumer.assign(topicPartitions2SSP.keySet());
       }
     } catch (Exception e) {
@@ -184,7 +174,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
     // create the thread with the consumer
     proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, samzaConsumerMetrics, metricName);
 
-    LOG.info("==============>Created consumer proxy: " + proxy);
+    LOG.info("Created consumer proxy: " + proxy);
   }
 
   /*
@@ -194,6 +184,10 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
    */
   void startConsumer() {
     //set the offset for each TopicPartition
+    if (topicPartitions2Offset.size() <= 0) {
+      LOG.warn("Consumer {} is not subscribed to any SSPs", this);
+    }
+
     topicPartitions2Offset.forEach((tp, startingOffsetString) -> {
       long startingOffset = Long.valueOf(startingOffsetString);
 
@@ -209,16 +203,15 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
         throw new SamzaException(e);
       }
 
-      LOG.info("==============>Changing Consumer's position for tp = " + tp + " to " + startingOffsetString);
+      LOG.info("Changing consumer's starting offset for tp = " + tp + " to " + startingOffsetString);
 
       // add the partition to the proxy
       proxy.addTopicPartition(topicPartitions2SSP.get(tp), startingOffset);
     });
 
-    System.out.println("#####################started " + this + "; kc=" + kafkaConsumer);
     // start the proxy thread
     if (proxy != null && !proxy.isRunning()) {
-      System.out.println("#####################starting proxy " + proxy);
+      LOG.info("Starting proxy: " + proxy);
       proxy.start();
     }
   }
@@ -226,29 +219,34 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
   private void setFetchThresholds() {
     // get the thresholds, and set defaults if not defined.
     KafkaConfig kafkaConfig = new KafkaConfig(config);
+
     Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName);
     long fetchThreshold = FETCH_THRESHOLD;
     if (fetchThresholdOption.isDefined()) {
       fetchThreshold = Long.valueOf(fetchThresholdOption.get());
-      LOG.info("fetchThresholdOption is defined. fetchThreshold=" + fetchThreshold);
+      LOG.info("fetchThresholdOption is configured. fetchThreshold=" + fetchThreshold);
     }
+
     Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName);
     long fetchThresholdBytes = FETCH_THRESHOLD_BYTES;
     if (fetchThresholdBytesOption.isDefined()) {
       fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get());
-      LOG.info("fetchThresholdBytesOption is defined. fetchThresholdBytes=" + fetchThresholdBytes);
+      LOG.info("fetchThresholdBytesOption is configured. fetchThresholdBytes=" + fetchThresholdBytes);
     }
+
+    int numTPs = topicPartitions2SSP.size();
+    assert (numTPs == topicPartitions2Offset.size());
+
     LOG.info("fetchThresholdBytes = " + fetchThresholdBytes + "; fetchThreshold=" + fetchThreshold);
-    LOG.info("topicPartitions2Offset #=" + topicPartitions2Offset.size() + "; topicPartition2SSP #="
-        + topicPartitions2SSP.size());
+    LOG.info("number of topicPartitions " + numTPs);
 
-    if (topicPartitions2SSP.size() > 0) {
-      perPartitionFetchThreshold = fetchThreshold / topicPartitions2SSP.size();
+    if (numTPs > 0) {
+      perPartitionFetchThreshold = fetchThreshold / numTPs;
       LOG.info("perPartitionFetchThreshold=" + perPartitionFetchThreshold);
       if (fetchThresholdBytesEnabled) {
         // currently this feature cannot be enabled, because we do not have the size of the messages available.
         // messages get double buffered, hence divide by 2
-        perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / topicPartitions2SSP.size();
+        perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / numTPs;
         LOG.info("perPartitionFetchThresholdBytes is enabled. perPartitionFetchThresholdBytes="
             + perPartitionFetchThresholdBytes);
       }
@@ -257,23 +255,22 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
 
   @Override
   public void stop() {
-    System.out.println("kc=" + this + "!!!!!!!!!!!!!!!!!!!!!! stopping "+ "; kc=" + kafkaConsumer);
-    System.out.println("kc=" + this + "!!!!!!!!!!!!!!!!!!!!!!TPs = " + topicPartitions2Offset);
+    LOG.info("Stopping Samza kafkaConsumer " + this);
 
     if (!stopped.compareAndSet(false, true)) {
       LOG.warn("attempting to stop stopped consumer.");
       return;
     }
 
-    LOG.warn("Stopping SamzaRawLiKafkaConsumer + " + this);
     // stop the proxy (with 5 minutes timeout)
     if (proxy != null) {
-      System.out.println("##################### stopping proxy " + proxy);
+      LOG.info("Stopping proxy " + proxy);
       proxy.stop(TimeUnit.MINUTES.toMillis(5));
     }
 
     try {
       synchronized (kafkaConsumer) {
+        LOG.info("Closing kafka consumer " + kafkaConsumer);
         kafkaConsumer.close();
       }
     } catch (Exception e) {
@@ -304,7 +301,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
 
     topicPartitions2SSP.put(tp, systemStreamPartition);
 
-    LOG.info("============>registering ssp = " + systemStreamPartition + " with offset " + offset + "; kc=" + this);
+    LOG.info("Registering ssp = " + systemStreamPartition + " with offset " + offset);
 
     String existingOffset = topicPartitions2Offset.get(tp);
     // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages.
@@ -328,7 +325,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
 
   @Override
   public String toString() {
-    return systemName + " " + clientId + "/" + super.toString();
+    return systemName + "/" + clientId + "/" + super.toString();
   }
 
   @Override
@@ -339,21 +336,15 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
     if (!proxy.isRunning()) {
       stop();
       if (proxy.getFailureCause() != null) {
-        String message = "LiKafkaConsumerProxy has stopped";
-        if (proxy.getFailureCause() instanceof org.apache.kafka.common.errors.TopicAuthorizationException) {
-          message +=
-              " due to TopicAuthorizationException Please refer to go/samzaacluserguide to correctly set up acls for your topic";
-        }
+        String message = "KafkaConsumerProxy has stopped";
         throw new SamzaException(message, proxy.getFailureCause());
       } else {
-        LOG.warn("Failure cause not populated for LiKafkaConsumerProxy");
+        LOG.warn("Failure cause is not populated for KafkaConsumerProxy");
         throw new SamzaException("LiKafkaConsumerProxy has stopped");
       }
     }
 
     Map<SystemStreamPartition, List<IncomingMessageEnvelope>> res = super.poll(systemStreamPartitions, timeout);
-    //LOG.info("=============================>. Res for " + systemStreamPartitions);
-    //LOG.info("=============================>. Res:" + res.toString());
     return res;
   }
 
@@ -399,14 +390,14 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
       }
 
       if (fetchThresholdBytesEnabled) {
-        return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes; // TODO Validate
+        return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes;
       } else {
         return getNumMessagesInQueue(ssp) < perPartitionFetchThreshold;
       }
     }
 
     void addMessage(SystemStreamPartition ssp, IncomingMessageEnvelope envelope) {
-      LOG.info("==============>Incoming message ssp = {}: envelope = {}.", ssp, envelope);
+      LOG.trace("Incoming message ssp = {}: envelope = {}.", ssp, envelope);
 
       try {
         put(ssp, envelope);

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
deleted file mode 100644
index a3f76e7..0000000
--- a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- */
-package org.apache.samza.system.kafka
-
-import java.nio.ByteBuffer
-import java.util.concurrent.CountDownLatch
-
-import kafka.api.{PartitionOffsetsResponse, _}
-import kafka.common.TopicAndPartition
-import kafka.consumer.SimpleConsumer
-import kafka.message.{ByteBufferMessageSet, Message, MessageAndOffset, MessageSet}
-import org.apache.kafka.common.protocol.Errors
-import org.apache.samza.SamzaException
-import org.apache.samza.util.Logging
-import org.junit.Assert._
-import org.junit._
-import org.mockito.Matchers._
-import org.mockito.Mockito._
-import org.mockito.invocation.InvocationOnMock
-import org.mockito.stubbing.Answer
-import org.mockito.{Matchers, Mockito}
-
-import scala.collection.JavaConverters._
-
-class TestBrokerProxy extends Logging {
-  /*
-  val tp2 = new TopicAndPartition("Redbird", 2013)
-  var fetchTp1 = true // control whether fetching tp1 messages or not
-
-  @Test def brokerProxyRetrievesMessagesCorrectly() = {
-    val (bp, tp, sink) = getMockBrokerProxy()
-
-    bp.start
-    bp.addTopicPartition(tp, Option("0"))
-    // Add tp2, which should never receive messages since sink disables it.
-    bp.addTopicPartition(tp2, Option("0"))
-    Thread.sleep(1000)
-    assertEquals(2, sink.receivedMessages.size)
-    assertEquals(42, sink.receivedMessages(0)._2.offset)
-    assertEquals(84, sink.receivedMessages(1)._2.offset)
-  }
-
-  @Test def brokerProxySkipsFetchForEmptyRequests() = {
-    val (bp, tp, sink) = getMockBrokerProxy()
-
-    bp.start
-    // Only add tp2, which should never receive messages since sink disables it.
-    bp.addTopicPartition(tp2, Option("0"))
-    Thread.sleep(1000)
-    assertEquals(0, sink.receivedMessages.size)
-    assertTrue(bp.metrics.brokerSkippedFetchRequests.get((bp.host, bp.port)).getCount > 0)
-    assertEquals(0, bp.metrics.brokerReads.get((bp.host, bp.port)).getCount)
-  }
-
-  @Test def brokerProxyThrowsExceptionOnDuplicateTopicPartitions() = {
-    val (bp, tp, _) = getMockBrokerProxy()
-    bp.start
-    bp.addTopicPartition(tp, Option("0"))
-
-    try {
-      bp.addTopicPartition(tp, Option("1"))
-      fail("Should have thrown an exception")
-    } catch {
-      case se: SamzaException => assertEquals(se.getMessage, "Already consuming TopicPartition [Redbird,2012]")
-      case other: Exception => fail("Got some other exception than what we were expecting: " + other)
-    }
-  }
-
-  def getMockBrokerProxy() = {
-    val sink = new MessageSink {
-      val receivedMessages = new scala.collection.mutable.ListBuffer[(TopicAndPartition, MessageAndOffset, Boolean)]()
-
-      def abdicate(tp: TopicAndPartition, nextOffset: Long) {}
-
-      def refreshDropped() {}
-
-      def addMessage(tp: TopicAndPartition, msg: MessageAndOffset, highWatermark: Long) {
-        receivedMessages += ((tp, msg, msg.offset.equals(highWatermark)))
-      }
-
-      def setIsAtHighWatermark(tp: TopicAndPartition, isAtHighWatermark: Boolean) {
-      }
-
-      // Never need messages for tp2.
-      def needsMoreMessages(tp: TopicAndPartition): Boolean = !tp.equals(tp2) && fetchTp1
-    }
-
-    val system = "daSystem"
-    val host = "host"
-    val port = 2222
-    val tp = new TopicAndPartition("Redbird", 2012)
-    val metrics = new KafkaSystemConsumerMetrics(system)
-
-    metrics.registerBrokerProxy(host, port)
-    metrics.registerTopicAndPartition(tp)
-    metrics.topicPartitions.get((host, port)).set(1)
-
-    val bp = new BrokerProxy(
-      host,
-      port,
-      system,
-      "daClientId",
-      metrics,
-      sink,
-      offsetGetter = new GetOffset("fail", Map("Redbird" -> "largest"))) {
-
-      override val sleepMSWhileNoTopicPartitions = 100
-      // Speed up for test
-      var alreadyCreatedConsumer = false
-
-      // Scala traits and Mockito mocks don't mix, unfortunately.
-      override def createSimpleConsumer() = {
-        if (alreadyCreatedConsumer) {
-          System.err.println("Should only be creating one consumer in this test!")
-          throw new InterruptedException("Should only be creating one consumer in this test!")
-        }
-        alreadyCreatedConsumer = true
-
-        new DefaultFetchSimpleConsumer("a", 1, 2, 3, "b", new StreamFetchSizes(42)) {
-          val sc = Mockito.mock(classOf[SimpleConsumer])
-          val mockOffsetResponse = {
-            val offsetResponse = Mockito.mock(classOf[OffsetResponse])
-            val partitionOffsetResponse = {
-              val por = Mockito.mock(classOf[PartitionOffsetsResponse])
-              when(por.offsets).thenReturn(List(1l).toSeq)
-              por
-            }
-
-            val map = scala.Predef.Map[TopicAndPartition, PartitionOffsetsResponse](tp -> partitionOffsetResponse, tp2 -> partitionOffsetResponse)
-            when(offsetResponse.partitionErrorAndOffsets).thenReturn(map)
-            offsetResponse
-          }
-
-          when(sc.getOffsetsBefore(any(classOf[OffsetRequest]))).thenReturn(mockOffsetResponse)
-
-          val fetchResponse = {
-            val fetchResponse = Mockito.mock(classOf[FetchResponse])
-
-            val messageSet = {
-              val messageSet = Mockito.mock(classOf[ByteBufferMessageSet])
-
-              def getMessage() = new Message(Mockito.mock(classOf[ByteBuffer]))
-              val messages = List(new MessageAndOffset(getMessage, 42), new MessageAndOffset(getMessage, 84))
-
-              when(messageSet.sizeInBytes).thenReturn(43)
-              when(messageSet.size).thenReturn(44)
-              when(messageSet.iterator).thenReturn(messages.iterator)
-              when(messageSet.head).thenReturn(messages.head)
-              messageSet
-            }
-
-            val fetchResponsePartitionData = FetchResponsePartitionData(Errors.NONE, 500, messageSet)
-            val map = scala.Predef.Map[TopicAndPartition, FetchResponsePartitionData](tp -> fetchResponsePartitionData)
-
-            when(fetchResponse.data).thenReturn(map.toSeq)
-            when(fetchResponse.messageSet(any(classOf[String]), any(classOf[Int]))).thenReturn(messageSet)
-            fetchResponse
-          }
-          when(sc.fetch(any(classOf[FetchRequest]))).thenReturn(fetchResponse)
-
-          override def close() = sc.close()
-
-          override def send(request: TopicMetadataRequest): TopicMetadataResponse = sc.send(request)
-
-          override def fetch(request: FetchRequest): FetchResponse = {
-            // Verify that we only get fetch requests for one tp, even though
-            // two were registered. This is to verify that
-            // sink.needsMoreMessages works.
-            assertEquals(1, request.requestInfo.size)
-            sc.fetch(request)
-          }
-
-          when(sc.earliestOrLatestOffset(any(classOf[TopicAndPartition]), any(classOf[Long]), any(classOf[Int]))).thenReturn(100)
-
-          override def getOffsetsBefore(request: OffsetRequest): OffsetResponse = sc.getOffsetsBefore(request)
-
-          override def commitOffsets(request: OffsetCommitRequest): OffsetCommitResponse = sc.commitOffsets(request)
-
-          override def fetchOffsets(request: OffsetFetchRequest): OffsetFetchResponse = sc.fetchOffsets(request)
-
-          override def earliestOrLatestOffset(topicAndPartition: TopicAndPartition, earliestOrLatest: Long, consumerId: Int): Long = sc.earliestOrLatestOffset(topicAndPartition, earliestOrLatest, consumerId)
-        }
-      }
-
-    }
-
-    (bp, tp, sink)
-  }
-
-  @Test def brokerProxyUpdateLatencyMetrics() = {
-    val (bp, tp, _) = getMockBrokerProxy()
-
-    bp.start
-    bp.addTopicPartition(tp, Option("0"))
-    Thread.sleep(1000)
-    // update when fetching messages
-    assertEquals(500, bp.metrics.highWatermark.get(tp).getValue)
-    assertEquals(415, bp.metrics.lag.get(tp).getValue)
-
-    fetchTp1 = false
-    Thread.sleep(1000)
-    // update when not fetching messages
-    assertEquals(100, bp.metrics.highWatermark.get(tp).getValue)
-    assertEquals(15, bp.metrics.lag.get(tp).getValue)
-
-    fetchTp1 = true
-  }
-
- @Test def brokerProxyCorrectlyHandlesOffsetOutOfRange(): Unit = {
-    // Need to wait for the thread to do some work before ending the test
-    val countdownLatch = new CountDownLatch(1)
-    var failString: String = null
-
-    val mockMessageSink = mock(classOf[MessageSink])
-    when(mockMessageSink.needsMoreMessages(any())).thenReturn(true)
-
-    val doNothingMetrics = new KafkaSystemConsumerMetrics()
-
-    val tp = new TopicAndPartition("topic", 42)
-
-    val mockOffsetGetter = mock(classOf[GetOffset])
-    // This will be used by the simple consumer below, and this is the response that simple consumer needs
-    when(mockOffsetGetter.isValidOffset(any(classOf[DefaultFetchSimpleConsumer]), Matchers.eq(tp), Matchers.eq("0"))).thenReturn(true)
-    when(mockOffsetGetter.getResetOffset(any(classOf[DefaultFetchSimpleConsumer]), Matchers.eq(tp))).thenReturn(1492l)
-
-    var callsToCreateSimpleConsumer = 0
-    val mockSimpleConsumer = mock(classOf[DefaultFetchSimpleConsumer])
-
-    // Create an answer that first indicates offset out of range on first invocation and on second
-    // verifies that the parameters have been updated to what we expect them to be
-    val answer = new Answer[FetchResponse]() {
-      var invocationCount = 0
-
-      def answer(invocation: InvocationOnMock): FetchResponse = {
-        val arguments = invocation.getArguments()(0).asInstanceOf[List[Object]](0).asInstanceOf[(String, Long)]
-
-        if (invocationCount == 0) {
-          if (arguments !=(tp, 0)) {
-            failString = "First invocation did not have the right arguments: " + arguments
-            countdownLatch.countDown()
-          }
-          val mfr = mock(classOf[FetchResponse])
-          when(mfr.hasError).thenReturn(true)
-          when(mfr.error("topic", 42)).thenReturn(Errors.OFFSET_OUT_OF_RANGE)
-
-          val messageSet = mock(classOf[MessageSet])
-          when(messageSet.iterator).thenReturn(Iterator.empty)
-          val response = mock(classOf[FetchResponsePartitionData])
-          when(response.error).thenReturn(Errors.OFFSET_OUT_OF_RANGE)
-          val responseMap = Map(tp -> response)
-          when(mfr.data).thenReturn(responseMap.toSeq)
-          invocationCount += 1
-          mfr
-        } else {
-          if (arguments !=(tp, 1492)) {
-            failString = "On second invocation, arguments were not correct: " + arguments
-          }
-          countdownLatch.countDown()
-          Thread.currentThread().interrupt()
-          null
-        }
-      }
-    }
-
-    when(mockSimpleConsumer.defaultFetch(any())).thenAnswer(answer)
-
-    // So now we have a fetch response that will fail.  Prime the mockGetOffset to send us to a new offset
-
-    val bp = new BrokerProxy("host", 423, "system", "clientID", doNothingMetrics, mockMessageSink, Int.MaxValue, 1024000, new StreamFetchSizes(256 * 1024), 524288, 1000, mockOffsetGetter) {
-
-      override def createSimpleConsumer() = {
-        if (callsToCreateSimpleConsumer > 1) {
-          failString = "Tried to create more than one simple consumer"
-          countdownLatch.countDown()
-        }
-        callsToCreateSimpleConsumer += 1
-        mockSimpleConsumer
-      }
-    }
-
-    bp.addTopicPartition(tp, Option("0"))
-    bp.start
-    countdownLatch.await()
-    bp.stop
-    if (failString != null) {
-      fail(failString)
-    }
-  }
-
-  /**
-    * TODO fix
-   * Test that makes sure that BrokerProxy abdicates all TopicAndPartitions
-   * that it owns when a consumer failure occurs.
-   */
-  @Test def brokerProxyAbdicatesOnConnectionFailure(): Unit = {
-    val countdownLatch = new CountDownLatch(1)
-    var abdicated: Option[TopicAndPartition] = None
-    @volatile var refreshDroppedCount = 0
-    val mockMessageSink = new MessageSink {
-      override def setIsAtHighWatermark(tp: TopicAndPartition, isAtHighWatermark: Boolean) {
-      }
-
-      override def addMessage(tp: TopicAndPartition, msg: MessageAndOffset, highWatermark: Long) {
-      }
-
-      override def abdicate(tp: TopicAndPartition, nextOffset: Long) {
-        abdicated = Some(tp)
-        countdownLatch.countDown
-      }
-
-      override def refreshDropped() {
-        refreshDroppedCount += 1
-      }
-
-      override def needsMoreMessages(tp: TopicAndPartition): Boolean = {
-        true
-      }
-    }
-
-    val doNothingMetrics = new KafkaSystemConsumerMetrics()
-    val tp = new TopicAndPartition("topic", 42)
-    val mockOffsetGetter = mock(classOf[GetOffset])
-    val mockSimpleConsumer = mock(classOf[DefaultFetchSimpleConsumer])
-
-    when(mockOffsetGetter.isValidOffset(any(classOf[DefaultFetchSimpleConsumer]), Matchers.eq(tp), Matchers.eq("0"))).thenReturn(true)
-    when(mockOffsetGetter.getResetOffset(any(classOf[DefaultFetchSimpleConsumer]), Matchers.eq(tp))).thenReturn(1492l)
-    when(mockSimpleConsumer.defaultFetch(any())).thenThrow(new SamzaException("Pretend this is a ClosedChannelException. Can't use ClosedChannelException because it's checked, and Mockito doesn't like that."))
-
-    val bp = new BrokerProxy("host", 567, "system", "clientID", doNothingMetrics, mockMessageSink, Int.MaxValue, 1024000, new StreamFetchSizes(256 * 1024), 524288, 1000, mockOffsetGetter) {
-      override def createSimpleConsumer() = {
-        mockSimpleConsumer
-      }
-    }
-
-    val waitForRefresh = () => {
-      val currentRefreshDroppedCount = refreshDroppedCount
-      while (refreshDroppedCount == currentRefreshDroppedCount) {
-        Thread.sleep(100)
-      }
-    }
-
-    bp.addTopicPartition(tp, Option("0"))
-    bp.start
-    // BP should refresh on startup.
-    waitForRefresh()
-    countdownLatch.await()
-    // BP should continue refreshing after it's abdicated all TopicAndPartitions.
-    waitForRefresh()
-    bp.stop
-    assertEquals(tp, abdicated.getOrElse(null))
-  }
-
-  @Test def brokerProxyAbdicatesHardErrors(): Unit = {
-    val doNothingMetrics = new KafkaSystemConsumerMetrics
-    val mockMessageSink = new MessageSink {
-      override def needsMoreMessages(tp: TopicAndPartition): Boolean = true
-      override def abdicate(tp: TopicAndPartition, nextOffset: Long) {}
-      override def addMessage(tp: TopicAndPartition, msg: MessageAndOffset, highWatermark: Long) {}
-      override def refreshDropped() {throw new OutOfMemoryError("Test - OOME")}
-      override def setIsAtHighWatermark(tp: TopicAndPartition, isAtHighWatermark: Boolean): Unit = {}
-    }
-    val mockOffsetGetter = mock(classOf[GetOffset])
-    val mockSimpleConsumer = mock(classOf[DefaultFetchSimpleConsumer])
-
-    val bp = new BrokerProxy("host", 658, "system", "clientID", doNothingMetrics, mockMessageSink, Int.MaxValue, 1024000, new StreamFetchSizes(256 * 1024), 524288, 1000, mockOffsetGetter) {
-      override def createSimpleConsumer() = {
-        mockSimpleConsumer
-      }
-    }
-    var caughtError = false
-    try {
-      bp.thread.run
-    } catch {
-      case e: SamzaException => {
-        assertEquals(e.getMessage, "Got out of memory error in broker proxy thread.")
-        info("Received OutOfMemoryError in broker proxy.")
-        caughtError = true
-      }
-    }
-    assertEquals(true, caughtError)
-    val mockMessageSink2 = new MessageSink {
-      override def needsMoreMessages(tp: TopicAndPartition): Boolean = true
-      override def abdicate(tp: TopicAndPartition, nextOffset: Long): Unit = {}
-      override def addMessage(tp: TopicAndPartition, msg: MessageAndOffset, highWatermark: Long): Unit = {}
-      override def refreshDropped(): Unit = {throw new StackOverflowError("Test - SOE")}
-      override def setIsAtHighWatermark(tp: TopicAndPartition, isAtHighWatermark: Boolean): Unit = {}
-    }
-    caughtError = false
-    val bp2 = new BrokerProxy("host", 689, "system", "clientID2", doNothingMetrics, mockMessageSink2, Int.MaxValue, 1024000, new StreamFetchSizes(256 * 1024), 524288, 1000, mockOffsetGetter) {
-      override def createSimpleConsumer() = {
-        mockSimpleConsumer
-      }
-    }
-    try {
-      bp2.thread.run
-    } catch {
-      case e: SamzaException => {
-        assertEquals(e.getMessage, "Got stack overflow error in broker proxy thread.")
-        info("Received StackOverflowError in broker proxy.")
-        caughtError = true
-      }
-    }
-    assertEquals(true, caughtError)
-  }
-
-  @Test
-	def brokerProxyStopCloseConsumer: Unit = {
-    val mockSimpleConsumer = mock(classOf[DefaultFetchSimpleConsumer])
-    val bp = new BrokerProxy("host", 0, "system", "clientID", new KafkaSystemConsumerMetrics(), null){
-      override def createSimpleConsumer() = {
-        mockSimpleConsumer
-      }
-    }
-    bp.start
-    bp.stop
-    verify(mockSimpleConsumer).close
-  }
-  */
-}

http://git-wip-us.apache.org/repos/asf/samza/blob/332a0481/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
----------------------------------------------------------------------
diff --git a/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala b/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
index 2ea9a5f..8405c63 100644
--- a/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
+++ b/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
@@ -223,16 +223,16 @@ class StreamTaskTestUtil {
    * interrupt, which is forwarded on to ThreadJob, and marked as a failure).
    */
   def stopJob(job: StreamJob) {
-    // make sure we don't kill the job before it was started
+    // make sure we don't kill the job before it was started.
+    // eventProcesses guarantees all the consumers have been initialized
     val tasks = TestTask.tasks
     val task = tasks.values.toList.head
     task.eventProcessed.await(60, TimeUnit.SECONDS)
-    System.out.println("THREAD: JOB KILL BEFORE")
+    assertEquals(0, task.eventProcessed.getCount)
+
     // Shutdown task.
     job.kill
-    System.out.println("THREAD: JOB KILL")
     val status = job.waitForFinish(60000)
-    System.out.println("THREAD: JOB KILL WAIT")
     assertEquals(ApplicationStatus.UnsuccessfulFinish, status)
   }
 


[44/47] samza git commit: added License message

Posted by bo...@apache.org.
added License message


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/5120740a
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/5120740a
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/5120740a

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 5120740aa04ab5dcb24ffd3ff5f7dc5114a32653
Parents: 32c9282
Author: Boris S <bo...@apache.org>
Authored: Wed Sep 12 15:58:55 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Sep 12 15:58:55 2018 -0700

----------------------------------------------------------------------
 .../samza/system/kafka/KafkaSystemConsumer.java     |  2 --
 .../clients/consumer/TestKafkaConsumerConfig.java   | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/5120740a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
index 9cdfce1..9101a89 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
@@ -363,12 +363,10 @@ public class KafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements Sy
     }
 
     boolean needsMoreMessages(SystemStreamPartition ssp) {
-      if (LOG.isDebugEnabled()) {
         LOG.debug("needsMoreMessages from following SSP: {}. fetchLimitByBytes enabled={}; messagesSizeInQueue={};"
                 + "(limit={}); messagesNumInQueue={}(limit={};", ssp, fetchThresholdBytesEnabled,
             getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes, getNumMessagesInQueue(ssp),
             perPartitionFetchThreshold);
-      }
 
       if (fetchThresholdBytesEnabled) {
         return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes;

http://git-wip-us.apache.org/repos/asf/samza/blob/5120740a/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java b/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
index ee300d0..264098b 100644
--- a/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
+++ b/samza-kafka/src/test/java/org/apache/kafka/clients/consumer/TestKafkaConsumerConfig.java
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.kafka.clients.consumer;
 
 import java.util.Collections;


[22/47] samza git commit: added test

Posted by bo...@apache.org.
added test


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/89f79829
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/89f79829
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/89f79829

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 89f79829107ed21dd88058922b6038835af1cfbd
Parents: 34ae8ba
Author: Boris S <bo...@apache.org>
Authored: Thu Aug 30 10:30:55 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Thu Aug 30 10:30:55 2018 -0700

----------------------------------------------------------------------
 .../clients/consumer/KafkaConsumerConfig.java   |  22 ++
 .../apache/samza/system/kafka/BrokerProxy.scala | 332 -------------------
 .../samza/system/kafka/KafkaConsumerProxy.java  |   6 +-
 .../system/kafka/KafkaSystemConsumer.scala      | 309 -----------------
 .../kafka/KafkaSystemConsumerMetrics.scala      |   1 +
 .../system/kafka/NewKafkaSystemConsumer.java    |  19 +-
 .../kafka/TestKafkaCheckpointManager.scala      |   3 +-
 .../samza/system/kafka/TestBrokerProxy.scala    |   3 +
 .../system/kafka/TestKafkaSystemConsumer.scala  | 191 -----------
 .../kafka/TestNewKafkaSystemConsumer.java       | 203 ++++++++++++
 10 files changed, 237 insertions(+), 852 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index b29a041..88437ee 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -23,9 +23,14 @@ package org.apache.kafka.clients.consumer;
 
 import java.util.Map;
 import java.util.Properties;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.apache.samza.SamzaException;
 import org.apache.samza.config.Config;
 import org.apache.samza.config.ConfigException;
 import org.apache.samza.config.JobConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import scala.Option;
 
 
@@ -34,6 +39,8 @@ import scala.Option;
  */
 public class KafkaConsumerConfig extends ConsumerConfig {
 
+  public static final Logger LOG = LoggerFactory.getLogger(KafkaConsumerConfig.class);
+
   private static final String PRODUCER_CLIENT_ID_PREFIX = "kafka-producer";
   private static final String CONSUMER_CLIENT_ID_PREFIX = "kafka-consumer";
   private static final String SAMZA_OFFSET_LARGEST = "largest";
@@ -76,6 +83,9 @@ public class KafkaConsumerConfig extends ConsumerConfig {
     if (! subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
       // get it from the producer config
       String bootstrapServer = config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
+      if (StringUtils.isEmpty(bootstrapServer)) {
+        throw new SamzaException("Missing " + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + " config  for " + systemName);
+      }
       consumerProps.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer);
     }
 
@@ -85,6 +95,18 @@ public class KafkaConsumerConfig extends ConsumerConfig {
         RangeAssignor.class.getName());
 
 
+    // the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
+    // default to byte[]
+    if ( !config.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
+      LOG.info("default key serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
+      consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+    }
+    if ( !config.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
+      LOG.info("default value serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
+      consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+    }
+
+
     // NOT SURE THIS IS NEEDED TODO
     String maxPollRecords = subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT);;
     consumerProps.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);

http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/main/scala/org/apache/samza/system/kafka/BrokerProxy.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/BrokerProxy.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/BrokerProxy.scala
deleted file mode 100644
index 423b68a..0000000
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/BrokerProxy.scala
+++ /dev/null
@@ -1,332 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- */
-
-package org.apache.samza.system.kafka
-
-import java.lang.Thread.UncaughtExceptionHandler
-import java.nio.channels.ClosedByInterruptException
-import java.util.concurrent.{ConcurrentHashMap, CountDownLatch}
-
-import kafka.api._
-import kafka.common.{ErrorMapping, NotLeaderForPartitionException, TopicAndPartition, UnknownTopicOrPartitionException}
-import kafka.consumer.ConsumerConfig
-import kafka.message.MessageSet
-import org.apache.samza.SamzaException
-import org.apache.samza.util.ExponentialSleepStrategy
-import org.apache.samza.util.KafkaUtil
-import org.apache.samza.util.Logging
-
-import scala.collection.JavaConverters._
-import scala.collection.concurrent
-
-/**
- * A BrokerProxy consolidates Kafka fetches meant for a particular broker and retrieves them all at once, providing
- * a way for consumers to retrieve those messages by topic and partition.
- */
-class BrokerProxy(
-  val host: String,
-  val port: Int,
-  val system: String,
-  val clientID: String,
-  val metrics: KafkaSystemConsumerMetrics,
-  val messageSink: MessageSink,
-  val timeout: Int = ConsumerConfig.SocketTimeout,
-  val bufferSize: Int = ConsumerConfig.SocketBufferSize,
-  val fetchSize: StreamFetchSizes = new StreamFetchSizes,
-  val consumerMinSize:Int = ConsumerConfig.MinFetchBytes,
-  val consumerMaxWait:Int = ConsumerConfig.MaxFetchWaitMs,
-  offsetGetter: GetOffset = new GetOffset("fail")) extends Toss with Logging {
-
-  /**
-   * How long should the fetcher thread sleep before checking if any TopicPartitions has been added to its purview
-   */
-  val sleepMSWhileNoTopicPartitions = 100
-
-  /** What's the next offset for a particular partition? **/
-  val nextOffsets:concurrent.Map[TopicAndPartition, Long] = new ConcurrentHashMap[TopicAndPartition, Long]().asScala
-
-  /** Block on the first call to get message if the fetcher has not yet returned its initial results **/
-  // TODO: It should be sufficient to just use the count down latch and await on it for each of the calls, but
-  // VisualVM was showing the consumer thread spending all its time in the await method rather than returning
-  // immediately, even though the process was proceeding normally.  Hence the extra boolean.  Should be investigated.
-  val firstCallBarrier = new CountDownLatch(1)
-  var firstCall = true
-
-  var simpleConsumer = createSimpleConsumer()
-
-  metrics.registerBrokerProxy(host, port)
-
-  def createSimpleConsumer() = {
-    val hostString = "%s:%d" format (host, port)
-    info("Creating new SimpleConsumer for host %s for system %s" format (hostString, system))
-
-    val sc = new DefaultFetchSimpleConsumer(host, port, timeout, bufferSize, clientID, fetchSize, consumerMinSize, consumerMaxWait)
-    sc
-  }
-
-  def addTopicPartition(tp: TopicAndPartition, nextOffset: Option[String]) = {
-    debug("Adding new topic and partition %s to queue for %s" format (tp, host))
-
-    if (nextOffsets.asJava.containsKey(tp)) {
-      toss("Already consuming TopicPartition %s" format tp)
-    }
-
-    val offset = if (nextOffset.isDefined && offsetGetter.isValidOffset(simpleConsumer, tp, nextOffset.get)) {
-      nextOffset
-        .get
-        .toLong
-    } else {
-      warn("It appears that we received an invalid or empty offset %s for %s. Attempting to use Kafka's auto.offset.reset setting. This can result in data loss if processing continues." format (nextOffset, tp))
-
-      offsetGetter.getResetOffset(simpleConsumer, tp)
-    }
-
-    debug("Got offset %s for new topic and partition %s." format (offset, tp))
-
-    nextOffsets += tp -> offset
-
-    metrics.topicPartitions.get((host, port)).set(nextOffsets.size)
-  }
-
-  def removeTopicPartition(tp: TopicAndPartition) = {
-    if (nextOffsets.asJava.containsKey(tp)) {
-      val offset = nextOffsets.remove(tp)
-      metrics.topicPartitions.get((host, port)).set(nextOffsets.size)
-      debug("Removed %s" format tp)
-      offset
-    } else {
-      warn("Asked to remove topic and partition %s, but not in map (keys = %s)" format (tp, nextOffsets.keys.mkString(",")))
-      None
-    }
-  }
-
-  val thread = new Thread(new Runnable {
-    def run {
-      var reconnect = false
-
-      try {
-        (new ExponentialSleepStrategy).run(
-          loop => {
-            if (reconnect) {
-              metrics.reconnects.get((host, port)).inc
-              simpleConsumer.close()
-              simpleConsumer = createSimpleConsumer()
-            }
-
-            while (!Thread.currentThread.isInterrupted) {
-              messageSink.refreshDropped
-              if (nextOffsets.size == 0) {
-                debug("No TopicPartitions to fetch. Sleeping.")
-                Thread.sleep(sleepMSWhileNoTopicPartitions)
-              } else {
-                fetchMessages
-
-                // If we got here, fetchMessages didn't throw an exception, i.e. it was successful.
-                // In that case, reset the loop delay, so that the next time an error occurs,
-                // we start with a short retry delay.
-                loop.reset
-              }
-            }
-          },
-
-          (exception, loop) => {
-            warn("Restarting consumer due to %s. Releasing ownership of all partitions, and restarting consumer. Turn on debugging to get a full stack trace." format exception)
-            debug("Exception detail:", exception)
-            abdicateAll
-            reconnect = true
-          })
-      } catch {
-        case e: InterruptedException       => info("Got interrupt exception in broker proxy thread.")
-        case e: ClosedByInterruptException => info("Got closed by interrupt exception in broker proxy thread.")
-        case e: OutOfMemoryError           => throw new SamzaException("Got out of memory error in broker proxy thread.")
-        case e: StackOverflowError         => throw new SamzaException("Got stack overflow error in broker proxy thread.")
-      }
-
-      if (Thread.currentThread.isInterrupted) info("Shutting down due to interrupt.")
-    }
-  }, "BrokerProxy thread pointed at %s:%d for client %s" format (host, port, clientID))
-
-  private def fetchMessages(): Unit = {
-    val topicAndPartitionsToFetch = nextOffsets.filterKeys(messageSink.needsMoreMessages(_)).toList
-
-    if (topicAndPartitionsToFetch.size > 0) {
-      metrics.brokerReads.get((host, port)).inc
-      val response: FetchResponse = simpleConsumer.defaultFetch(topicAndPartitionsToFetch: _*)
-      firstCall = false
-      firstCallBarrier.countDown()
-
-      // Split response into errors and non errors, processing the errors first
-      val (nonErrorResponses, errorResponses) = response.data.toSet.partition(_._2.error.code() == ErrorMapping.NoError)
-
-      handleErrors(errorResponses, response)
-
-      nonErrorResponses.foreach { case (tp, data) => moveMessagesToTheirQueue(tp, data) }
-    } else {
-      refreshLatencyMetrics
-
-      debug("No topic/partitions need to be fetched for %s:%s right now. Sleeping %sms." format (host, port, sleepMSWhileNoTopicPartitions))
-
-      metrics.brokerSkippedFetchRequests.get((host, port)).inc
-
-      Thread.sleep(sleepMSWhileNoTopicPartitions)
-    }
-  }
-
-  /**
-   * Releases ownership for a single TopicAndPartition. The
-   * KafkaSystemConsumer will try and find a new broker for the
-   * TopicAndPartition.
-   */
-  def abdicate(tp: TopicAndPartition) = removeTopicPartition(tp) match {
-    // Need to be mindful of a tp that was removed by another thread
-    case Some(offset) => messageSink.abdicate(tp, offset)
-    case None => warn("Tried to abdicate for topic partition not in map. Removed in interim?")
-  }
-
-  /**
-   * Releases all TopicAndPartition ownership for this BrokerProxy thread. The
-   * KafkaSystemConsumer will try and find a new broker for the
-   * TopicAndPartition.
-   */
-  def abdicateAll {
-    info("Abdicating all topic partitions.")
-    val immutableNextOffsetsCopy = nextOffsets.toMap
-    immutableNextOffsetsCopy.keySet.foreach(abdicate(_))
-  }
-
-  def handleErrors(errorResponses: Set[(TopicAndPartition, FetchResponsePartitionData)], response: FetchResponse) = {
-    // FetchResponse should really return Option and a list of the errors so we don't have to find them ourselves
-    case class Error(tp: TopicAndPartition, code: Short, exception: Exception)
-
-    // Now subdivide the errors into three types: non-recoverable, not leader (== abdicate) and offset out of range (== get new offset)
-
-    // Convert FetchResponse into easier-to-work-with Errors
-    val errors = for (
-      (topicAndPartition, responseData) <- errorResponses;
-      error <- Option(response.error(topicAndPartition.topic, topicAndPartition.partition)) // Scala's being cranky about referring to error.getKey values...
-    ) yield new Error(topicAndPartition, error.code(), error.exception())
-
-    val (notLeaderOrUnknownTopic, otherErrors) = errors.partition { case (e) => e.code == ErrorMapping.NotLeaderForPartitionCode || e.code == ErrorMapping.UnknownTopicOrPartitionCode }
-    val (offsetOutOfRangeErrors, remainingErrors) = otherErrors.partition(_.code == ErrorMapping.OffsetOutOfRangeCode)
-
-    // Can recover from two types of errors: not leader (go find the new leader) and offset out of range (go get the new offset)
-    // However, we want to bail as quickly as possible if there are non recoverable errors so that the state of the other
-    // topic-partitions remains the same.  That way, when we've rebuilt the simple consumer, we can come around and
-    // handle the recoverable errors.
-    remainingErrors.foreach(e => {
-      warn("Got non-recoverable error codes during multifetch. Throwing an exception to trigger reconnect. Errors: %s" format remainingErrors.mkString(","))
-      KafkaUtil.maybeThrowException(e.exception) })
-
-    notLeaderOrUnknownTopic.foreach(e => {
-      warn("Received (UnknownTopicOr|NotLeaderFor)Partition exception %s for %s. Abdicating" format(e.code, e.tp))
-      abdicate(e.tp)
-    })
-
-    offsetOutOfRangeErrors.foreach(e => {
-      warn("Received OffsetOutOfRange exception for %s. Current offset = %s" format (e.tp, nextOffsets.getOrElse(e.tp, "not found in map, likely removed in the interim")))
-
-      try {
-        val newOffset = offsetGetter.getResetOffset(simpleConsumer, e.tp)
-        // Put the new offset into the map (if the tp still exists).  Will catch it on the next go-around
-        nextOffsets.replace(e.tp, newOffset)
-      } catch {
-        // UnknownTopic or NotLeader are routine events and handled via abdication.  All others, bail.
-        case _ @ (_:UnknownTopicOrPartitionException | _: NotLeaderForPartitionException) => warn("Received (UnknownTopicOr|NotLeaderFor)Partition exception %s for %s. Abdicating" format(e.code, e.tp))
-                                                                                             abdicate(e.tp)
-      }
-    })
-  }
-
-  def moveMessagesToTheirQueue(tp: TopicAndPartition, data: FetchResponsePartitionData) = {
-    val messageSet: MessageSet = data.messages
-    var nextOffset = nextOffsets(tp)
-
-    messageSink.setIsAtHighWatermark(tp, data.hw == 0 || data.hw == nextOffset)
-    require(messageSet != null)
-    for (message <- messageSet.iterator) {
-      messageSink.addMessage(tp, message, data.hw) // TODO: Verify this is correct
-
-      nextOffset = message.nextOffset
-
-      val bytesSize = message.message.payloadSize + message.message.keySize
-      metrics.reads.get(tp).inc
-      metrics.bytesRead.get(tp).inc(bytesSize)
-      metrics.brokerBytesRead.get((host, port)).inc(bytesSize)
-      metrics.offsets.get(tp).set(nextOffset)
-    }
-
-    nextOffsets.replace(tp, nextOffset) // use replace rather than put in case this tp was removed while we were fetching.
-
-    // Update high water mark
-    val hw = data.hw
-    if (hw >= 0) {
-      metrics.highWatermark.get(tp).set(hw)
-      metrics.lag.get(tp).set(hw - nextOffset)
-    } else {
-      debug("Got a high water mark less than 0 (%d) for %s, so skipping." format (hw, tp))
-    }
-  }
-  override def toString() = "BrokerProxy for %s:%d" format (host, port)
-
-  def start {
-    if (!thread.isAlive) {
-      info("Starting " + toString)
-      thread.setDaemon(true)
-      thread.setName("Samza BrokerProxy " + thread.getName)
-      thread.setUncaughtExceptionHandler(new UncaughtExceptionHandler {
-        override def uncaughtException(t: Thread, e: Throwable) = error("Uncaught exception in broker proxy:", e)
-      })
-      thread.start
-    } else {
-      debug("Tried to start an already started broker proxy (%s). Ignoring." format toString)
-    }
-  }
-
-  def stop {
-    info("Shutting down " + toString)
-
-    if (simpleConsumer != null) {
-      info("closing simple consumer...")
-      simpleConsumer.close
-    }
-
-    thread.interrupt
-    thread.join
-  }
-
-  private def refreshLatencyMetrics {
-    nextOffsets.foreach{
-      case (topicAndPartition, offset) => {
-        val latestOffset = simpleConsumer.earliestOrLatestOffset(topicAndPartition, -1, Request.OrdinaryConsumerId)
-        trace("latest offset of %s is %s" format (topicAndPartition, latestOffset))
-        if (latestOffset >= 0) {
-          // only update the registered topicAndpartitions
-          if(metrics.highWatermark.containsKey(topicAndPartition)) {
-            metrics.highWatermark.get(topicAndPartition).set(latestOffset)
-          }
-          if(metrics.lag.containsKey(topicAndPartition)) {
-            metrics.lag.get(topicAndPartition).set(latestOffset - offset)
-          }
-        }
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 01b345a..e61e0ff 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -47,8 +47,8 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Separate thread that reads messages from kafka and puts them int the BlockingEnvelopeMap
- * This class is not thread safe. There will be only one instance of this class per LiKafkaSystemConsumer object
+ * Separate thread that reads messages from kafka and puts them into the BlockingEnvelopeMap.
+ * This class is not thread safe. There will be only one instance of this class per LiKafkaSystemConsumer object.
  * We still need some synchronization around kafkaConsumer. See pollConsumer() method for details.
  */
 public class KafkaConsumerProxy<K, V> {
@@ -65,7 +65,7 @@ public class KafkaConsumerProxy<K, V> {
   private final String clientId;
   private final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
   private final Map<SystemStreamPartition, MetricName> ssp2MetricName = new HashMap<>();
-  // list of all the SSPs we poll from with their next offsets correspondingly.
+  // list of all the SSPs we poll from, with their next offsets correspondingly.
   private final Map<SystemStreamPartition, Long> nextOffsets = new ConcurrentHashMap<>();
   // lags behind the high water mark, as reported by the Kafka consumer.
   private final Map<SystemStreamPartition, Long> latestLags = new HashMap<>();

http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.scala
deleted file mode 100644
index fd84c4a..0000000
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.scala
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.samza.system.kafka
-
-import kafka.common.TopicAndPartition
-import org.apache.samza.util.Logging
-import kafka.message.Message
-import kafka.message.MessageAndOffset
-import org.apache.samza.Partition
-import org.apache.kafka.common.utils.Utils
-import org.apache.samza.util.Clock
-import kafka.serializer.DefaultDecoder
-import kafka.serializer.Decoder
-import org.apache.samza.util.BlockingEnvelopeMap
-import org.apache.samza.system.SystemStreamPartition
-import org.apache.samza.system.IncomingMessageEnvelope
-import kafka.consumer.ConsumerConfig
-import org.apache.samza.util.TopicMetadataStore
-import kafka.api.PartitionMetadata
-import kafka.api.TopicMetadata
-import org.apache.samza.util.ExponentialSleepStrategy
-import java.util.concurrent.ConcurrentHashMap
-import scala.collection.JavaConverters._
-import org.apache.samza.system.SystemAdmin
-
-object KafkaSystemConsumer {
-
-  // Approximate additional shallow heap overhead per message in addition to the raw bytes
-  // received from Kafka  4 + 64 + 4 + 4 + 4 = 80 bytes overhead.
-  // As this overhead is a moving target, and not very large
-  // compared to the message size its being ignore in the computation for now.
-  val MESSAGE_SIZE_OVERHEAD =  4 + 64 + 4 + 4 + 4;
-
-  def toTopicAndPartition(systemStreamPartition: SystemStreamPartition) = {
-    val topic = systemStreamPartition.getStream
-    val partitionId = systemStreamPartition.getPartition.getPartitionId
-    TopicAndPartition(topic, partitionId)
-  }
-}
-
-/**
- *  Maintain a cache of BrokerProxies, returning the appropriate one for the
- *  requested topic and partition.
- */
-private[kafka] class KafkaSystemConsumer(
-  systemName: String,
-  systemAdmin: SystemAdmin,
-  metrics: KafkaSystemConsumerMetrics,
-  metadataStore: TopicMetadataStore,
-  clientId: String,
-  timeout: Int = ConsumerConfig.ConsumerTimeoutMs,
-  bufferSize: Int = ConsumerConfig.SocketBufferSize,
-  fetchSize: StreamFetchSizes = new StreamFetchSizes,
-  consumerMinSize: Int = ConsumerConfig.MinFetchBytes,
-  consumerMaxWait: Int = ConsumerConfig.MaxFetchWaitMs,
-
-  /**
-   * Defines a low water mark for how many messages we buffer before we start
-   * executing fetch requests against brokers to get more messages. This value
-   * is divided equally among all registered SystemStreamPartitions. For
-   * example, if fetchThreshold is set to 50000, and there are 50
-   * SystemStreamPartitions registered, then the per-partition threshold is
-   * 1000. As soon as a SystemStreamPartition's buffered message count drops
-   * below 1000, a fetch request will be executed to get more data for it.
-   *
-   * Increasing this parameter will decrease the latency between when a queue
-   * is drained of messages and when new messages are enqueued, but also leads
-   * to an increase in memory usage since more messages will be held in memory.
-   */
-  fetchThreshold: Int = 50000,
-  /**
-   * Defines a low water mark for how many bytes we buffer before we start
-   * executing fetch requests against brokers to get more messages. This
-   * value is divided by 2 because the messages are buffered twice, once in
-   * KafkaConsumer and then in SystemConsumers. This value
-   * is divided equally among all registered SystemStreamPartitions.
-   * However this is a soft limit per partition, as the
-   * bytes are cached at the message boundaries, and the actual usage can be
-   * 1000 bytes + size of max message in the partition for a given stream.
-   * The bytes if the size of the bytebuffer in Message. Hence, the
-   * Object overhead is not taken into consideration. In this codebase
-   * it seems to be quite small. Hence, even for 500000 messages this is around 4MB x 2 = 8MB,
-   * which is not considerable.
-   *
-   * For example,
-   * if fetchThresholdBytes is set to 100000 bytes, and there are 50
-   * SystemStreamPartitions registered, then the per-partition threshold is
-   * (100000 / 2) / 50 = 1000 bytes.
-   * As this is a soft limit, the actual usage can be 1000 bytes + size of max message.
-   * As soon as a SystemStreamPartition's buffered messages bytes drops
-   * below 1000, a fetch request will be executed to get more data for it.
-   *
-   * Increasing this parameter will decrease the latency between when a queue
-   * is drained of messages and when new messages are enqueued, but also leads
-   * to an increase in memory usage since more messages will be held in memory.
-   *
-   * The default value is -1, which means this is not used. When the value
-   * is > 0, then the fetchThreshold which is count based is ignored.
-   */
-  fetchThresholdBytes: Long = -1,
-  /**
-   * if(fetchThresholdBytes > 0) true else false
-   */
-  fetchLimitByBytesEnabled: Boolean = false,
-  offsetGetter: GetOffset = new GetOffset("fail"),
-  deserializer: Decoder[Object] = new DefaultDecoder().asInstanceOf[Decoder[Object]],
-  keyDeserializer: Decoder[Object] = new DefaultDecoder().asInstanceOf[Decoder[Object]],
-  retryBackoff: ExponentialSleepStrategy = new ExponentialSleepStrategy,
-  clock: () => Long = { System.currentTimeMillis }) extends BlockingEnvelopeMap(
-    metrics.registry,
-    new Clock {
-      def currentTimeMillis = clock()
-    },
-    classOf[KafkaSystemConsumerMetrics].getName) with Toss with Logging {
-
-  type HostPort = (String, Int)
-  val brokerProxies = scala.collection.mutable.Map[HostPort, BrokerProxy]()
-  val topicPartitionsAndOffsets: scala.collection.concurrent.Map[TopicAndPartition, String] = new ConcurrentHashMap[TopicAndPartition, String]().asScala
-  var perPartitionFetchThreshold = fetchThreshold
-  var perPartitionFetchThresholdBytes = 0L
-
-  def start() {
-    if (topicPartitionsAndOffsets.size > 0) {
-      perPartitionFetchThreshold = fetchThreshold / topicPartitionsAndOffsets.size
-      // messages get double buffered, hence divide by 2
-      if(fetchLimitByBytesEnabled) {
-        perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / topicPartitionsAndOffsets.size
-      }
-    }
-
-    systemAdmin.start()
-    refreshBrokers
-  }
-
-  override def register(systemStreamPartition: SystemStreamPartition, offset: String) {
-    super.register(systemStreamPartition, offset)
-
-    val topicAndPartition = KafkaSystemConsumer.toTopicAndPartition(systemStreamPartition)
-    val existingOffset = topicPartitionsAndOffsets.getOrElseUpdate(topicAndPartition, offset)
-    // register the older offset in the consumer
-    if (systemAdmin.offsetComparator(existingOffset, offset) >= 0) {
-      topicPartitionsAndOffsets.replace(topicAndPartition, offset)
-    }
-
-    metrics.registerTopicAndPartition(KafkaSystemConsumer.toTopicAndPartition(systemStreamPartition))
-  }
-
-  def stop() {
-    systemAdmin.stop()
-    brokerProxies.values.foreach(_.stop)
-  }
-
-  protected def createBrokerProxy(host: String, port: Int): BrokerProxy = {
-    info("Creating new broker proxy for host: %s and port: %s" format(host, port))
-    new BrokerProxy(host, port, systemName, clientId, metrics, sink, timeout, bufferSize, fetchSize, consumerMinSize, consumerMaxWait, offsetGetter)
-  }
-
-  protected def getPartitionMetadata(topicMetadata: TopicMetadata, partition: Int): Option[PartitionMetadata] = {
-    topicMetadata.partitionsMetadata.find(_.partitionId == partition)
-  }
-
-  protected def getLeaderHostPort(partitionMetadata: Option[PartitionMetadata]): Option[(String, Int)] = {
-    // Whatever we do, we can't say Broker, even though we're
-    // manipulating it here. Broker is a private type and Scala doesn't seem
-    // to care about that as long as you don't explicitly declare its type.
-    val brokerOption = partitionMetadata.flatMap(_.leader)
-
-    brokerOption match {
-      case Some(broker) => Some(broker.host, broker.port)
-      case _ => None
-    }
-  }
-
-  def refreshBrokers {
-    var tpToRefresh = topicPartitionsAndOffsets.keySet.toList
-    info("Refreshing brokers for: %s" format topicPartitionsAndOffsets)
-    retryBackoff.run(
-      loop => {
-        val topics = tpToRefresh.map(_.topic).toSet
-        val topicMetadata = TopicMetadataCache.getTopicMetadata(topics, systemName, (topics: Set[String]) => metadataStore.getTopicInfo(topics))
-
-        // addTopicPartition one at a time, leaving the to-be-done list intact in case of exceptions.
-        // This avoids trying to re-add the same topic partition repeatedly
-        def refresh() = {
-          val head = tpToRefresh.head
-          // refreshBrokers can be called from abdicate and refreshDropped,
-          // both of which are triggered from BrokerProxy threads. To prevent
-          // accidentally creating multiple objects for the same broker, or
-          // accidentally not updating the topicPartitionsAndOffsets variable,
-          // we need to lock.
-          this.synchronized {
-            // Check if we still need this TopicAndPartition inside the
-            // critical section. If we don't, then notAValidEvent it.
-            topicPartitionsAndOffsets.get(head) match {
-              case Some(nextOffset) =>
-                val partitionMetadata = getPartitionMetadata(topicMetadata(head.topic), head.partition)
-                getLeaderHostPort(partitionMetadata) match {
-                  case Some((host, port)) =>
-                    debug("Got partition metadata for %s: %s" format(head, partitionMetadata.get))
-                    val brokerProxy = brokerProxies.getOrElseUpdate((host, port), createBrokerProxy(host, port))
-                    brokerProxy.addTopicPartition(head, Option(nextOffset))
-                    brokerProxy.start
-                    debug("Claimed topic-partition (%s) for (%s)".format(head, brokerProxy))
-                    topicPartitionsAndOffsets -= head
-                  case None => info("No metadata available for: %s. Will try to refresh and add to a consumer thread later." format head)
-                }
-              case _ => debug("Ignoring refresh for %s because we already added it from another thread." format head)
-            }
-          }
-          tpToRefresh.tail
-        }
-
-        while (!tpToRefresh.isEmpty) {
-          tpToRefresh = refresh()
-        }
-
-        loop.done
-      },
-
-      (exception, loop) => {
-        warn("While refreshing brokers for %s: %s. Retrying." format (tpToRefresh.head, exception))
-        debug("Exception detail:", exception)
-      })
-  }
-
-  val sink = new MessageSink {
-    var lastDroppedRefresh = clock()
-
-    def refreshDropped() {
-      if (topicPartitionsAndOffsets.size > 0 && clock() - lastDroppedRefresh > 10000) {
-        refreshBrokers
-        lastDroppedRefresh = clock()
-      }
-    }
-
-    def setIsAtHighWatermark(tp: TopicAndPartition, isAtHighWatermark: Boolean) {
-      setIsAtHead(toSystemStreamPartition(tp), isAtHighWatermark)
-    }
-
-    def needsMoreMessages(tp: TopicAndPartition) = {
-      if(fetchLimitByBytesEnabled) {
-        getMessagesSizeInQueue(toSystemStreamPartition(tp)) < perPartitionFetchThresholdBytes
-      } else {
-        getNumMessagesInQueue(toSystemStreamPartition(tp)) < perPartitionFetchThreshold
-      }
-    }
-
-    def getMessageSize(message: Message): Integer = {
-      message.size + KafkaSystemConsumer.MESSAGE_SIZE_OVERHEAD
-    }
-
-    def addMessage(tp: TopicAndPartition, msg: MessageAndOffset, highWatermark: Long) = {
-      trace("Incoming message %s: %s." format (tp, msg))
-
-      val systemStreamPartition = toSystemStreamPartition(tp)
-      val isAtHead = highWatermark == msg.offset
-      val offset = msg.offset.toString
-      val key = if (msg.message.key != null) {
-        keyDeserializer.fromBytes(Utils.readBytes(msg.message.key))
-      } else {
-        null
-      }
-      val message = if (!msg.message.isNull) {
-        deserializer.fromBytes(Utils.readBytes(msg.message.payload))
-      } else {
-        null
-      }
-
-      if(fetchLimitByBytesEnabled ) {
-        val ime = new IncomingMessageEnvelope(systemStreamPartition, offset, key, message, getMessageSize(msg.message))
-        ime.setTimestamp(if (!msg.message.isNull) msg.message.timestamp else 0L)
-        put(systemStreamPartition, ime)
-      } else {
-        val ime = new IncomingMessageEnvelope(systemStreamPartition, offset, key, message)
-        ime.setTimestamp(if (!msg.message.isNull) msg.message.timestamp else 0L)
-        put(systemStreamPartition, ime)
-      }
-
-      setIsAtHead(systemStreamPartition, isAtHead)
-    }
-
-    def abdicate(tp: TopicAndPartition, nextOffset: Long) {
-      info("Abdicating for %s" format (tp))
-      topicPartitionsAndOffsets += tp -> nextOffset.toString
-      refreshBrokers
-    }
-
-    private def toSystemStreamPartition(tp: TopicAndPartition) = {
-      new SystemStreamPartition(systemName, tp.topic, new Partition(tp.partition))
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
index 51545a0..1aa66dc 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
@@ -35,6 +35,7 @@ class KafkaSystemConsumerMetrics(val systemName: String = "unknown", val registr
   val highWatermark = new ConcurrentHashMap[TopicAndPartition, Gauge[Long]]
 
   /*
+  TODO Fix
    * (String, Int) = (host, port) of BrokerProxy.
    */
 

http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index dd7e584..b745628 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -66,14 +66,14 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
   private final KafkaSystemConsumerMetrics samzaConsumerMetrics;
   private final String clientId;
   private final String metricName;
-  private final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
+  /* package private */final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
   private final AtomicBoolean stopped = new AtomicBoolean(false);
   private final AtomicBoolean started = new AtomicBoolean(false);
   private final Config config;
   private final boolean fetchThresholdBytesEnabled;
 
   // This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
-  private KafkaConsumerMessageSink messageSink;
+  /* package private */ KafkaConsumerMessageSink messageSink;
   // proxy is doing the actual reading
   private KafkaConsumerProxy proxy;
 
@@ -142,17 +142,6 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
 
     Map<String, String> injectProps = new HashMap<>();
 
-    // the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
-    // default to byte[]
-    if ( !config.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
-      LOG.info("default key serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
-      injectProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
-    }
-    if ( !config.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
-      LOG.info("default value serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
-      injectProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
-    }
-
     // extract kafka consumer configs
     KafkaConsumerConfig consumerConfig =
         KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
@@ -203,7 +192,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
     }
   }
 
-  private void createConsumerProxy() {
+  void createConsumerProxy() {
     // create a sink for passing the messages between the proxy and the consumer
     messageSink = new KafkaConsumerMessageSink();
 
@@ -219,7 +208,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
    Add the TopicPartitions to the proxy.
    Start the proxy thread.
    */
-  private void startConsumer() {
+  void startConsumer() {
     //set the offset for each TopicPartition
     topicPartitions2Offset.forEach((tp, startingOffsetString) -> {
       long startingOffset = Long.valueOf(startingOffsetString);

http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala b/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
index 8544dbf..8d92f4d 100644
--- a/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
+++ b/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
@@ -92,8 +92,8 @@ class TestKafkaCheckpointManager extends KafkaServerTestHarness {
     assertNull(readCp)
 
     writeCheckpoint(checkpointTopic, taskName, checkpoint1)
-
     assertEquals(checkpoint1, readCheckpoint(checkpointTopic, taskName))
+
     // writing a second message and reading it returns a more recent checkpoint
     writeCheckpoint(checkpointTopic, taskName, checkpoint2)
     assertEquals(checkpoint2, readCheckpoint(checkpointTopic, taskName))
@@ -194,7 +194,6 @@ class TestKafkaCheckpointManager extends KafkaServerTestHarness {
     val systemFactory = Util.getObj(systemFactoryClassName, classOf[SystemFactory])
 
     val spec = new KafkaStreamSpec("id", cpTopic, checkpointSystemName, 1, 1, props)
-    System.out.println("CONFIG = " + config)
     new KafkaCheckpointManager(spec, systemFactory, failOnTopicValidation, config, new NoOpMetricsRegistry, serde)
   }
 

http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
index d510076..a3f76e7 100644
--- a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
+++ b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestBrokerProxy.scala
@@ -41,6 +41,7 @@ import org.mockito.{Matchers, Mockito}
 import scala.collection.JavaConverters._
 
 class TestBrokerProxy extends Logging {
+  /*
   val tp2 = new TopicAndPartition("Redbird", 2013)
   var fetchTp1 = true // control whether fetching tp1 messages or not
 
@@ -305,6 +306,7 @@ class TestBrokerProxy extends Logging {
   }
 
   /**
+    * TODO fix
    * Test that makes sure that BrokerProxy abdicates all TopicAndPartitions
    * that it owns when a consumer failure occurs.
    */
@@ -431,4 +433,5 @@ class TestBrokerProxy extends Logging {
     bp.stop
     verify(mockSimpleConsumer).close
   }
+  */
 }

http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.scala b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.scala
deleted file mode 100644
index 8656d10..0000000
--- a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.scala
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.samza.system.kafka
-
-import kafka.api.TopicMetadata
-import kafka.api.PartitionMetadata
-import kafka.cluster.Broker
-import kafka.common.TopicAndPartition
-import kafka.message.Message
-import kafka.message.MessageAndOffset
-import org.apache.kafka.common.protocol.Errors
-import org.apache.samza.system.IncomingMessageEnvelope
-import org.apache.samza.system.SystemStreamPartition
-import org.apache.samza.Partition
-import org.apache.samza.util.TopicMetadataStore
-import org.junit.Test
-import org.junit.Assert._
-import org.apache.samza.system.SystemAdmin
-import org.mockito.Mockito._
-import org.mockito.Matchers._
-
-class TestKafkaSystemConsumer {
-  val systemAdmin: SystemAdmin = mock(classOf[KafkaSystemAdmin])
-  private val SSP: SystemStreamPartition = new SystemStreamPartition("test", "test", new Partition(0))
-  private val envelope: IncomingMessageEnvelope = new IncomingMessageEnvelope(SSP, null, null, null)
-  private val envelopeWithSize: IncomingMessageEnvelope = new IncomingMessageEnvelope(SSP, null, null, null, 100)
-  private val clientId = "TestClientId"
-
-  @Test
-  def testFetchThresholdShouldDivideEvenlyAmongPartitions {
-    val metadataStore = new MockMetadataStore
-    val consumer = new KafkaSystemConsumer("", systemAdmin, new KafkaSystemConsumerMetrics, metadataStore, clientId, fetchThreshold = 50000) {
-      override def refreshBrokers {
-      }
-    }
-
-    for (i <- 0 until 50) {
-      consumer.register(new SystemStreamPartition("test-system", "test-stream", new Partition(i)), "0")
-    }
-
-    consumer.start
-
-    assertEquals(1000, consumer.perPartitionFetchThreshold)
-  }
-
-  @Test
-  def testBrokerCreationShouldTriggerStart {
-    val systemName = "test-system"
-    val streamName = "test-stream"
-    val metrics = new KafkaSystemConsumerMetrics
-    // Lie and tell the store that the partition metadata is empty. We can't
-    // use partition metadata because it has Broker in its constructor, which
-    // is package private to Kafka.
-    val metadataStore = new MockMetadataStore(Map(streamName -> TopicMetadata(streamName, Seq.empty, Errors.NONE)))
-    var hosts = List[String]()
-    var getHostPortCount = 0
-    val consumer = new KafkaSystemConsumer(systemName, systemAdmin, metrics, metadataStore, clientId) {
-      override def getLeaderHostPort(partitionMetadata: Option[PartitionMetadata]): Option[(String, Int)] = {
-        // Generate a unique host every time getHostPort is called.
-        getHostPortCount += 1
-        Some("localhost-%s" format getHostPortCount, 0)
-      }
-
-      override def createBrokerProxy(host: String, port: Int): BrokerProxy = {
-        new BrokerProxy(host, port, systemName, "", metrics, sink) {
-          override def addTopicPartition(tp: TopicAndPartition, nextOffset: Option[String]) = {
-            // Skip this since we normally do verification of offsets, which
-            // tries to connect to Kafka. Rather than mock that, just forget it.
-            nextOffsets.size
-          }
-
-          override def start {
-            hosts :+= host
-          }
-        }
-      }
-    }
-
-    consumer.register(new SystemStreamPartition(systemName, streamName, new Partition(0)), "1")
-    assertEquals(0, hosts.size)
-    consumer.start
-    assertEquals(List("localhost-1"), hosts)
-    // Should trigger a refresh with a new host.
-    consumer.sink.abdicate(new TopicAndPartition(streamName, 0), 2)
-    assertEquals(List("localhost-1", "localhost-2"), hosts)
-  }
-
-  @Test
-  def testConsumerRegisterOlderOffsetOfTheSamzaSSP {
-    when(systemAdmin.offsetComparator(anyString, anyString)).thenCallRealMethod()
-
-    val metadataStore = new MockMetadataStore
-    val consumer = new KafkaSystemConsumer("", systemAdmin, new KafkaSystemConsumerMetrics, metadataStore, clientId, fetchThreshold = 50000)
-    val ssp0 = new SystemStreamPartition("test-system", "test-stream", new Partition(0))
-    val ssp1 = new SystemStreamPartition("test-system", "test-stream", new Partition(1))
-    val ssp2 = new SystemStreamPartition("test-system", "test-stream", new Partition(2))
-
-    consumer.register(ssp0, "0")
-    consumer.register(ssp0, "5")
-    consumer.register(ssp1, "2")
-    consumer.register(ssp1, "3")
-    consumer.register(ssp2, "0")
-
-    assertEquals("0", consumer.topicPartitionsAndOffsets(KafkaSystemConsumer.toTopicAndPartition(ssp0)))
-    assertEquals("2", consumer.topicPartitionsAndOffsets(KafkaSystemConsumer.toTopicAndPartition(ssp1)))
-    assertEquals("0", consumer.topicPartitionsAndOffsets(KafkaSystemConsumer.toTopicAndPartition(ssp2)))
-  }
-
-  @Test
-  def testFetchThresholdBytesShouldDivideEvenlyAmongPartitions {
-    val metadataStore = new MockMetadataStore
-    val consumer = new KafkaSystemConsumer("", systemAdmin, new KafkaSystemConsumerMetrics, metadataStore, clientId,
-      fetchThreshold = 50000, fetchThresholdBytes = 60000L, fetchLimitByBytesEnabled = true) {
-      override def refreshBrokers {
-      }
-    }
-
-    for (i <- 0 until 10) {
-      consumer.register(new SystemStreamPartition("test-system", "test-stream", new Partition(i)), "0")
-    }
-
-    consumer.start
-
-    assertEquals(5000, consumer.perPartitionFetchThreshold)
-    assertEquals(3000, consumer.perPartitionFetchThresholdBytes)
-  }
-
-  @Test
-  def testFetchThresholdBytes {
-    val metadataStore = new MockMetadataStore
-    val consumer = new KafkaSystemConsumer("test-system", systemAdmin, new KafkaSystemConsumerMetrics, metadataStore, clientId,
-      fetchThreshold = 50000, fetchThresholdBytes = 60000L, fetchLimitByBytesEnabled = true) {
-      override def refreshBrokers {
-      }
-    }
-
-    for (i <- 0 until 10) {
-      consumer.register(new SystemStreamPartition("test-system", "test-stream", new Partition(i)), "0")
-    }
-
-    consumer.start
-
-    val msg = Array[Byte](5, 112, 9, 126)
-    val msgAndOffset: MessageAndOffset = MessageAndOffset(new Message(msg), 887654)
-    // 4 data + 18 Message overhead + 80 IncomingMessageEnvelope overhead
-    consumer.sink.addMessage(new TopicAndPartition("test-stream", 0),  msgAndOffset, 887354)
-
-    assertEquals(106, consumer.getMessagesSizeInQueue(new SystemStreamPartition("test-system", "test-stream", new Partition(0))))
-  }
-
-  @Test
-  def testFetchThresholdBytesDisabled {
-    val metadataStore = new MockMetadataStore
-    val consumer = new KafkaSystemConsumer("", systemAdmin, new KafkaSystemConsumerMetrics, metadataStore, clientId,
-      fetchThreshold = 50000, fetchThresholdBytes = 60000L) {
-      override def refreshBrokers {
-      }
-    }
-
-    for (i <- 0 until 10) {
-      consumer.register(new SystemStreamPartition("test-system", "test-stream", new Partition(i)), "0")
-    }
-
-    consumer.start
-
-    assertEquals(5000, consumer.perPartitionFetchThreshold)
-    assertEquals(0, consumer.perPartitionFetchThresholdBytes)
-    assertEquals(0, consumer.getMessagesSizeInQueue(new SystemStreamPartition("test-system", "test-stream", new Partition(0))))
-  }
-}
-
-class MockMetadataStore(var metadata: Map[String, TopicMetadata] = Map()) extends TopicMetadataStore {
-  def getTopicInfo(topics: Set[String]): Map[String, TopicMetadata] = metadata
-}

http://git-wip-us.apache.org/repos/asf/samza/blob/89f79829/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
new file mode 100644
index 0000000..f7f63f3
--- /dev/null
+++ b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
@@ -0,0 +1,203 @@
+package org.apache.samza.system.kafka;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
+import org.apache.kafka.common.serialization.ByteArraySerializer;
+import org.apache.samza.Partition;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.config.MapConfig;
+import org.apache.samza.system.IncomingMessageEnvelope;
+import org.apache.samza.system.SystemStreamPartition;
+import org.apache.samza.util.Clock;
+import org.apache.samza.util.NoOpMetricsRegistry;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+
+public class TestNewKafkaSystemConsumer {
+  public final String TEST_SYSTEM = "test-system";
+  public final String TEST_STREAM = "test-stream";
+  public final String TEST_CLIENT_ID = "testClientId";
+  public final String BOOTSTRAP_SERVER = "127.0.0.1:8888";
+  public final String FETCH_THRESHOLD_MSGS = "50000";
+  public final String FETCH_THRESHOLD_BYTES = "100000";
+
+  @Before
+  public void setUp() {
+
+  }
+
+  private NewKafkaSystemConsumer setupConsumer(String fetchMsg, String fetchBytes) {
+    final Map<String, String> map = new HashMap<>();
+
+    map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD(), TEST_SYSTEM), fetchMsg);
+    map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD_BYTES(), TEST_SYSTEM), fetchBytes);
+    map.put(String.format("systems.%s.consumer.%s", TEST_SYSTEM, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG),
+        BOOTSTRAP_SERVER);
+
+    Config config = new MapConfig(map);
+    KafkaConsumerConfig consumerConfig =
+        KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, TEST_SYSTEM, TEST_CLIENT_ID, Collections.emptyMap());
+    final KafkaConsumer<byte[], byte[]> kafkaConsumer = new MockKafkaConsumer(consumerConfig.originals());
+
+    MockNewKafkaSystmeCosumer newKafkaSystemConsumer =
+        new MockNewKafkaSystmeCosumer(kafkaConsumer, TEST_SYSTEM, config, TEST_CLIENT_ID,
+            new KafkaSystemConsumerMetrics(TEST_SYSTEM, new NoOpMetricsRegistry()), System::currentTimeMillis);
+
+    return newKafkaSystemConsumer;
+  }
+
+  @Test
+  public void testConfigValidations() {
+
+    final NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+    consumer.start();
+    // should be no failures
+  }
+
+  @Test
+  public void testFetchThresholdShouldDivideEvenlyAmongPartitions() {
+    final NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+    final int partitionsNum = 50;
+    for (int i = 0; i < partitionsNum; i++) {
+      consumer.register(new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(i)), "0");
+    }
+
+    consumer.start();
+
+    Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum, consumer.perPartitionFetchThreshold);
+    Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_BYTES) / 2 / partitionsNum,
+        consumer.perPartitionFetchThresholdBytes);
+  }
+
+  @Test
+  public void testConsumerRegisterOlderOffsetOfTheSamzaSSP() {
+
+    NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+    SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+    SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+    SystemStreamPartition ssp2 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(2));
+
+    consumer.register(ssp0, "0");
+    consumer.register(ssp0, "5");
+    consumer.register(ssp1, "2");
+    consumer.register(ssp1, "3");
+    consumer.register(ssp2, "0");
+
+    assertEquals("0", consumer.topicPartitions2Offset.get(NewKafkaSystemConsumer.toTopicPartition(ssp0)));
+    assertEquals("2", consumer.topicPartitions2Offset.get(NewKafkaSystemConsumer.toTopicPartition(ssp1)));
+    assertEquals("0", consumer.topicPartitions2Offset.get(NewKafkaSystemConsumer.toTopicPartition(ssp2)));
+  }
+
+  @Test
+  public void testFetchThresholdBytes() {
+
+    SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+    SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+    int partitionsNum = 2;
+    int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size
+    int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 1; // fake size
+    int ime11Size = 20;
+    ByteArraySerializer bytesSerde = new ByteArraySerializer();
+    IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
+        bytesSerde.serialize("", "value0".getBytes()), ime0Size);
+    IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
+        bytesSerde.serialize("", "value1".getBytes()), ime1Size);
+    IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
+        bytesSerde.serialize("", "value11".getBytes()), ime11Size);
+    NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+    consumer.register(ssp0, "0");
+    consumer.register(ssp1, "0");
+    consumer.start();
+    consumer.messageSink.addMessage(ssp0, ime0);
+    // queue for ssp0 should be full now, because we added message of size FETCH_THRESHOLD_MSGS/partitionsNum
+    Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp0));
+    consumer.messageSink.addMessage(ssp1, ime1);
+    // queue for ssp1 should be less then full now, because we added message of size (FETCH_THRESHOLD_MSGS/partitionsNum - 1)
+    Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
+    consumer.messageSink.addMessage(ssp1, ime11);
+    // queue for ssp1 should full now, because we added message of size 20 on top
+    Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
+
+    Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
+    Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
+    Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
+    Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
+  }
+
+  @Test
+  public void testFetchThresholdBytesDiabled() {
+    // Pass 0 as fetchThresholdByBytes, which disables checking for limit by size
+
+    SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+    SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+    int partitionsNum = 2;
+    int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size, upto the limit
+    int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 100; // fake size, below the limit
+    int ime11Size = 20;// event with the second message still below the size limit
+    ByteArraySerializer bytesSerde = new ByteArraySerializer();
+    IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
+        bytesSerde.serialize("", "value0".getBytes()), ime0Size);
+    IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
+        bytesSerde.serialize("", "value1".getBytes()), ime1Size);
+    IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
+        bytesSerde.serialize("", "value11".getBytes()), ime11Size);
+
+    // limit by number of messages 4/2 = 2 per partition
+    // limit by number of bytes - disabled
+    NewKafkaSystemConsumer consumer = setupConsumer("4", "0"); // should disable
+
+    consumer.register(ssp0, "0");
+    consumer.register(ssp1, "0");
+    consumer.start();
+    consumer.messageSink.addMessage(ssp0, ime0);
+    // should be full by size, but not full by number of messages (1 of 2)
+    Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp0));
+    consumer.messageSink.addMessage(ssp1, ime1);
+    // not full neither by size nor by messages
+    Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
+    consumer.messageSink.addMessage(ssp1, ime11);
+    // not full by size, but should be full by messages
+    Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
+
+    Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
+    Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
+    Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
+    Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
+  }
+
+  // mock kafkaConsumer and SystemConsumer
+  static class MockKafkaConsumer extends KafkaConsumer {
+    public MockKafkaConsumer(Map<String, Object> configs) {
+      super(configs);
+    }
+  }
+
+  static class MockNewKafkaSystmeCosumer extends NewKafkaSystemConsumer {
+    public MockNewKafkaSystmeCosumer(Consumer kafkaConsumer, String systemName, Config config, String clientId,
+        KafkaSystemConsumerMetrics metrics, Clock clock) {
+      super(kafkaConsumer, systemName, config, clientId, metrics, clock);
+    }
+
+    @Override
+    void createConsumerProxy() {
+      this.messageSink = new KafkaConsumerMessageSink();
+    }
+
+    @Override
+    void startConsumer() {
+    }
+  }
+}


[20/47] samza git commit: added new samza kafka system consumer using new kafka consumer

Posted by bo...@apache.org.
added new samza kafka system consumer using new kafka consumer


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/c0ea25cb
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/c0ea25cb
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/c0ea25cb

Branch: refs/heads/NewKafkaSystemConsumer
Commit: c0ea25cbc674a1d67546f7f47a6f36f6ee58bdc6
Parents: 7254460
Author: Boris S <bo...@apache.org>
Authored: Wed Aug 29 10:52:30 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Aug 29 10:52:30 2018 -0700

----------------------------------------------------------------------
 .../clients/consumer/KafkaConsumerConfig.java   | 15 ++-
 .../samza/system/kafka/KafkaConsumerProxy.java  |  7 +-
 .../samza/system/kafka/KafkaSystemFactory.scala | 59 +-----------
 .../system/kafka/NewKafkaSystemConsumer.java    | 97 +++++++++++++-------
 4 files changed, 80 insertions(+), 98 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/c0ea25cb/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index 97360e2..b29a041 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -40,6 +40,7 @@ public class KafkaConsumerConfig extends ConsumerConfig {
   private static final String SAMZA_OFFSET_SMALLEST = "smallest";
   private static final String KAFKA_OFFSET_LATEST = "latest";
   private static final String KAFKA_OFFSET_EARLIEST = "earliest";
+  private static final String KAFKA_OFFSET_NONE = "none";
   /*
    * By default, KafkaConsumer will fetch ALL available messages for all the partitions.
    * This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
@@ -64,16 +65,14 @@ public class KafkaConsumerConfig extends ConsumerConfig {
     consumerProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
     consumerProps.setProperty(ConsumerConfig.CLIENT_ID_CONFIG, clientId);
 
-    /********************************************
-     * Open-source Kafka Consumer configuration *
-     *******************************************/
+    //Open-source Kafka Consumer configuration
     consumerProps.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); // Disable consumer auto-commit
 
     consumerProps.setProperty(
         ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
         getAutoOffsetResetValue(consumerProps));  // Translate samza config value to kafka config value
 
-    // makesure bootstrap configs are in ?? SHOULD WE FAIL IF THEY ARE NOT?
+    // make sure bootstrap configs are in ?? SHOULD WE FAIL IF THEY ARE NOT?
     if (! subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
       // get it from the producer config
       String bootstrapServer = config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
@@ -139,6 +138,14 @@ public class KafkaConsumerConfig extends ConsumerConfig {
    */
   static String getAutoOffsetResetValue(Properties properties) {
     String autoOffsetReset = properties.getProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, KAFKA_OFFSET_LATEST);
+
+    // accept kafka values directly
+    if (autoOffsetReset.equals(KAFKA_OFFSET_EARLIEST) ||
+        autoOffsetReset.equals(KAFKA_OFFSET_LATEST) ||
+        autoOffsetReset.equals(KAFKA_OFFSET_NONE)) {
+      return autoOffsetReset;
+    }
+
     switch (autoOffsetReset) {
       case SAMZA_OFFSET_LARGEST:
         return KAFKA_OFFSET_LATEST;

http://git-wip-us.apache.org/repos/asf/samza/blob/c0ea25cb/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 66971af..01b345a 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -69,7 +69,6 @@ public class KafkaConsumerProxy<K, V> {
   private final Map<SystemStreamPartition, Long> nextOffsets = new ConcurrentHashMap<>();
   // lags behind the high water mark, as reported by the Kafka consumer.
   private final Map<SystemStreamPartition, Long> latestLags = new HashMap<>();
-  private final NewKafkaSystemConsumer.ValueUnwrapper<V> valueUnwrapper;
 
   private volatile boolean isRunning = false;
   private volatile Throwable failureCause = null;
@@ -77,7 +76,7 @@ public class KafkaConsumerProxy<K, V> {
 
   public KafkaConsumerProxy(Consumer<K, V> kafkaConsumer, String systemName, String clientId,
       NewKafkaSystemConsumer.KafkaConsumerMessageSink messageSink, KafkaSystemConsumerMetrics samzaConsumerMetrics,
-      String metricName, NewKafkaSystemConsumer.ValueUnwrapper<V> valueUnwrapper) {
+      String metricName) {
 
     this.kafkaConsumer = kafkaConsumer;
     this.systemName = systemName;
@@ -85,7 +84,6 @@ public class KafkaConsumerProxy<K, V> {
     this.kafkaConsumerMetrics = samzaConsumerMetrics;
     this.metricName = metricName;
     this.clientId = clientId;
-    this.valueUnwrapper = valueUnwrapper;
 
     // TODO - see if we need new metrics (not host:port based)
     this.kafkaConsumerMetrics.registerBrokerProxy(metricName, 0);
@@ -257,8 +255,7 @@ public class KafkaConsumerProxy<K, V> {
       //}
 
       final K key = r.key();
-      final Object value =
-          valueUnwrapper == null ? r.value() : valueUnwrapper.unwrapValue(ssp.getSystemStream(), r.value());
+      final Object value = r.value();
       IncomingMessageEnvelope imEnvelope =
           new IncomingMessageEnvelope(ssp, String.valueOf(r.offset()), key, value, msgSize);
       listMsgs.add(imEnvelope);

http://git-wip-us.apache.org/repos/asf/samza/blob/c0ea25cb/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index c7f6aed..6a5eda9 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -55,64 +55,9 @@ class KafkaSystemFactory extends SystemFactory with Logging {
     val clientId = KafkaUtil.getClientId("samza-consumer", config)
     val metrics = new KafkaSystemConsumerMetrics(systemName, registry)
 
-    // Kind of goofy to need a producer config for consumers, but we need metadata.
-    val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId)
-    val bootstrapServers = producerConfig.bootsrapServers
-    //val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)
-
-    //val kafkaConfig = new KafkaConfig(config)
-
-
-   // val timeout = consumerConfig.socketTimeoutMs
-    //val bufferSize = consumerConfig.socketReceiveBufferBytes
-    //val fetchSize = new StreamFetchSizes(consumerConfig.fetchMessageMaxBytes, config.getFetchMessageMaxBytesTopics(systemName))
-    //val consumerMinSize = consumerConfig.fetchMinBytes
-    //val consumerMaxWait = consumerConfig.fetchWaitMaxMs
-    //val autoOffsetResetDefault = consumerConfig.autoOffsetReset
-    val autoOffsetResetTopics = config.getAutoOffsetResetTopics(systemName)
-    val fetchThreshold = config.getConsumerFetchThreshold(systemName).getOrElse("50000").toInt
-    val fetchThresholdBytes = config.getConsumerFetchThresholdBytes(systemName).getOrElse("-1").toLong
-    //val offsetGetter = new GetOffset(autoOffsetResetDefault, autoOffsetResetTopics)
-    //val metadataStore = new ClientUtilTopicMetadataStore(bootstrapServers, clientId, timeout)
-
-
-    val kafkaConsumer: KafkaConsumer[Array[Byte], Array[Byte]] =
-      NewKafkaSystemConsumer.getKafkaConsumerImpl(systemName, clientId, config)
-
-    def valueUnwrapper: NewKafkaSystemConsumer.ValueUnwrapper[Array[Byte]] = null;// TODO add real unrapper from
-    val kc = new NewKafkaSystemConsumer (
-      kafkaConsumer, systemName, config, clientId,
-      metrics, new SystemClock, false, valueUnwrapper)
-
-    kc
-    /*
-      new KafkaSystemConsumer(
-      systemName = systemName,
-      systemAdmin = getAdmin(systemName, config),
-      metrics = metrics,
-      metadataStore = metadataStore,
-      clientId = clientId,
-      timeout = timeout,
-      bufferSize = bufferSize,
-      fetchSize = fetchSize,
-      consumerMinSize = consumerMinSize,
-      consumerMaxWait = consumerMaxWait,
-      fetchThreshold = fetchThreshold,
-      fetchThresholdBytes = fetchThresholdBytes,
-      fetchLimitByBytesEnabled = config.isConsumerFetchThresholdBytesEnabled(systemName),
-      offsetGetter = offsetGetter)
-      */
-  }
-
-  /*
-  def getKafkaConsumerImpl(systemName: String, config: KafkaConfig) = {
-    info("Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}", systemName, config)
-
-    val byteArrayDeserializer = new ByteArrayDeserializer
-    new KafkaConsumer[Array[Byte], Array[Byte]](config.configForVanillaConsumer(),
-      byteArrayDeserializer, byteArrayDeserializer)
+    NewKafkaSystemConsumer.getNewKafkaSystemConsumer(
+      systemName, config, clientId, metrics, new SystemClock)
   }
-  */
 
   def getProducer(systemName: String, config: Config, registry: MetricsRegistry): SystemProducer = {
     val clientId = KafkaUtil.getClientId("samza-producer", config)

http://git-wip-us.apache.org/repos/asf/samza/blob/c0ea25cb/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index 26db610..dd7e584 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -21,6 +21,7 @@
 
 package org.apache.samza.system.kafka;
 
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -40,29 +41,24 @@ import org.apache.samza.Partition;
 import org.apache.samza.SamzaException;
 import org.apache.samza.config.Config;
 import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.config.StreamConfig;
 import org.apache.samza.system.IncomingMessageEnvelope;
 import org.apache.samza.system.SystemConsumer;
 import org.apache.samza.system.SystemStream;
 import org.apache.samza.system.SystemStreamPartition;
 import org.apache.samza.util.BlockingEnvelopeMap;
 import org.apache.samza.util.Clock;
+import org.apache.samza.util.KafkaUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import scala.Option;
+import scala.collection.JavaConversions;
 
 
 public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements SystemConsumer{
 
   private static final Logger LOG = LoggerFactory.getLogger(NewKafkaSystemConsumer.class);
 
-  /**
-   * Provides a way to unwrap the value further. It is used for intermediate stream messages.
-   * @param <T> value type
-   */
-  public interface ValueUnwrapper<T> {
-    Object unwrapValue(SystemStream systemStream, T value);
-  }
-
   private static final long FETCH_THRESHOLD = 50000;
   private static final long FETCH_THRESHOLD_BYTES = -1L;
   private final Consumer<K,V> kafkaConsumer;
@@ -75,7 +71,6 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
   private final AtomicBoolean started = new AtomicBoolean(false);
   private final Config config;
   private final boolean fetchThresholdBytesEnabled;
-  private final ValueUnwrapper<V> valueUnwrapper;
 
   // This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
   private KafkaConsumerMessageSink messageSink;
@@ -99,9 +94,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
       Config config,
       String clientId,
       KafkaSystemConsumerMetrics metrics,
-      Clock clock,
-      boolean fetchThresholdBytesEnabled,
-      ValueUnwrapper<V> valueUnwrapper) {
+      Clock clock) {
 
     super(metrics.registry(),clock, metrics.getClass().getName());
 
@@ -109,41 +102,64 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
     this.clientId = clientId;
     this.systemName = systemName;
     this.config = config;
-    this.fetchThresholdBytesEnabled = fetchThresholdBytesEnabled;
     this.metricName = systemName + " " + clientId;
 
     this.kafkaConsumer = kafkaConsumer;
-    this.valueUnwrapper = valueUnwrapper;
+
+    this.fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName);
 
     LOG.info(String.format(
         "Created SamzaLiKafkaSystemConsumer for system=%s, clientId=%s, metricName=%s with liKafkaConsumer=%s",
         systemName, clientId, metricName, this.kafkaConsumer.toString()));
   }
 
-  public static KafkaConsumer<byte[], byte[]> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
+  public static <K, V> NewKafkaSystemConsumer getNewKafkaSystemConsumer(
+      String systemName,
+      Config config,
+      String clientId,
+      KafkaSystemConsumerMetrics metrics,
+      Clock clock) {
+
+    // extract consumer configs and create kafka consumer
+    KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
+
+    return new NewKafkaSystemConsumer(kafkaConsumer,
+        systemName,
+        config,
+        clientId,
+        metrics,
+        clock);
+  }
+
+  /**
+   * create kafka consumer
+   * @param systemName
+   * @param clientId
+   * @param config
+   * @return kafka consumer
+   */
+  private static <K, V> KafkaConsumer<K, V> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
 
     Map<String, String> injectProps = new HashMap<>();
-    injectProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
-    injectProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
 
+    // the consumer is fully typed, and deserialization can be too. But in case it is not provided we should
+    // default to byte[]
+    if ( !config.containsKey(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)) {
+      LOG.info("default key serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
+      injectProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+    }
+    if ( !config.containsKey(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)) {
+      LOG.info("default value serialization for the consumer(for {}) to ByteArrayDeserializer", systemName);
+      injectProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+    }
+
+    // extract kafka consumer configs
     KafkaConsumerConfig consumerConfig =
         KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
 
     LOG.info("==============>Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}", systemName, consumerConfig.originals());
-    /*
-    Map<String, Object> kafkaConsumerConfig = consumerConfig.originals().entrySet().stream()
-        .collect(Collectors.toMap((kv)->kv.getKey(), (kv)->(Object)kv.getValue()));
-*/
-
-    return new KafkaConsumer<byte[], byte[]>(consumerConfig.originals());
-  }
 
-  /**
-   * return system name for this consumer
-   * @return system name
-   */
-  public String getSystemName() {
-    return systemName;
+    return new KafkaConsumer<>(consumerConfig.originals());
   }
 
   @Override
@@ -156,7 +172,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
       LOG.warn("attempting to start a stopped consumer");
       return;
     }
-LOG.info("==============>About to start consumer");
+    LOG.info("==============>About to start consumer");
     // initialize the subscriptions for all the registered TopicPartitions
     startSubscription();
     LOG.info("==============>subscription started");
@@ -193,7 +209,7 @@ LOG.info("==============>About to start consumer");
 
     // create the thread with the consumer
     proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink,
-        samzaConsumerMetrics, metricName, valueUnwrapper);
+        samzaConsumerMetrics, metricName);
 
     LOG.info("==============>Created consumer proxy: " + proxy);
   }
@@ -363,6 +379,23 @@ LOG.info("==============>About to start consumer");
     return new SystemStreamPartition(systemName, tp.topic(), new Partition(tp.partition()));
   }
 
+  /**
+   * return system name for this consumer
+   * @return system name
+   */
+  public String getSystemName() {
+    return systemName;
+  }
+
+  private static Set<SystemStream> getIntermediateStreams(Config config) {
+    StreamConfig streamConfig = new StreamConfig(config);
+    Collection<String> streamIds = JavaConversions.asJavaCollection(streamConfig.getStreamIds());
+    return streamIds.stream()
+        .filter(streamConfig::getIsIntermediateStream)
+        .map(id -> streamConfig.streamIdToSystemStream(id))
+        .collect(Collectors.toSet());
+  }
+
   ////////////////////////////////////
   // inner class for the message sink
   ////////////////////////////////////


[41/47] samza git commit: make private kakfaconsumerProxy

Posted by bo...@apache.org.
make private kakfaconsumerProxy


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/053fe3bb
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/053fe3bb
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/053fe3bb

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 053fe3bb3b8914b1f8895abe2be2cf00943395c7
Parents: f81cf14
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 11 11:36:11 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 11 11:36:11 2018 -0700

----------------------------------------------------------------------
 .../scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/053fe3bb/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 92f9183..4b99fcc 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -74,7 +74,7 @@ public class KafkaConsumerProxy<K, V> {
   private volatile Throwable failureCause = null;
   private final CountDownLatch consumerPollThreadStartLatch = new CountDownLatch(1);
 
-  public KafkaConsumerProxy(Consumer<K, V> kafkaConsumer, String systemName, String clientId,
+  /* package private */KafkaConsumerProxy(Consumer<K, V> kafkaConsumer, String systemName, String clientId,
       KafkaSystemConsumer.KafkaConsumerMessageSink messageSink, KafkaSystemConsumerMetrics samzaConsumerMetrics,
       String metricName) {
 


[09/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/dd39d089
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/dd39d089
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/dd39d089

Branch: refs/heads/NewKafkaSystemConsumer
Commit: dd39d089437905abb93aa1074474b28a18292a54
Parents: 1ad58d4 7a2e192
Author: Boris S <bo...@apache.org>
Authored: Wed Nov 22 11:46:37 2017 -0800
Committer: Boris S <bo...@apache.org>
Committed: Wed Nov 22 11:46:37 2017 -0800

----------------------------------------------------------------------
 .gitignore                                      |   3 +-
 NOTICE                                          |   2 -
 build.gradle                                    |  35 +-
 .../versioned/jobs/configuration-table.html     |  14 +-
 gradle/dependency-versions.gradle               |   4 +-
 .../samza/checkpoint/CheckpointManager.java     |   6 +-
 .../samza/container/SamzaContainerContext.java  |   7 +-
 .../operators/functions/WatermarkFunction.java  |   7 +-
 .../org/apache/samza/sql/udfs/ScalarUdf.java    |  48 ++
 .../org/apache/samza/system/ControlMessage.java |  31 +
 .../apache/samza/system/WatermarkMessage.java   |  26 +
 .../azure/AzureCheckpointManager.java           | 236 +++++++
 .../azure/AzureCheckpointManagerFactory.java    |  33 +
 .../checkpoint/azure/TaskCheckpointEntity.java  |  43 ++
 .../org/apache/samza/config/AzureConfig.java    |   2 +-
 .../coordinator/AzureCoordinationUtils.java     |   2 +-
 .../samza/coordinator/AzureJobCoordinator.java  |   2 +-
 .../producer/EventHubSystemProducer.java        |   2 +-
 .../azure/ITestAzureCheckpointManager.java      | 181 +++++
 .../consumer/TestEventHubSystemConsumer.java    |  56 +-
 .../producer/SwapFirstLastByteInterceptor.java  |  36 +
 .../producer/TestEventHubSystemProducer.java    |  57 ++
 .../apache/samza/PartitionChangeException.java  |  31 +
 .../ClusterBasedJobCoordinator.java             | 108 ++-
 .../clustermanager/ContainerProcessManager.java |  26 +-
 .../clustermanager/SamzaApplicationState.java   |   7 +-
 .../org/apache/samza/config/TaskConfigJava.java |   2 +-
 .../StreamPartitionCountMonitor.java            | 116 ++--
 .../org/apache/samza/execution/StreamEdge.java  |   1 +
 .../apache/samza/operators/StreamGraphImpl.java |   7 +
 .../samza/operators/impl/OperatorImpl.java      |  68 +-
 .../operators/impl/PartitionByOperatorImpl.java |   7 +-
 .../operators/impl/WindowOperatorImpl.java      |  13 +
 .../samza/runtime/LocalApplicationRunner.java   |   2 +-
 .../apache/samza/storage/StorageRecovery.java   |   2 +-
 .../samza/checkpoint/CheckpointTool.scala       |  15 +-
 .../org/apache/samza/config/JobConfig.scala     |   2 -
 .../org/apache/samza/config/StorageConfig.scala |   8 +
 .../org/apache/samza/config/StreamConfig.scala  |  30 +-
 .../org/apache/samza/config/TaskConfig.scala    |   5 +
 .../apache/samza/container/SamzaContainer.scala |   2 +-
 .../samza/coordinator/JobModelManager.scala     |  49 +-
 .../samza/storage/TaskStorageManager.scala      |  54 +-
 .../main/scala/org/apache/samza/util/Util.scala |   1 -
 .../MockClusterResourceManagerFactory.java      |  32 +
 .../clustermanager/MockContainerListener.java   |   1 +
 .../TestClusterBasedJobCoordinator.java         | 108 +++
 .../clustermanager/TestContainerAllocator.java  |  12 +-
 .../TestContainerProcessManager.java            |  22 +-
 .../TestHostAwareContainerAllocator.java        |  16 +-
 .../apache/samza/config/TestTaskConfigJava.java |  20 +
 .../coordinator/JobModelManagerTestUtil.java    |   4 +-
 .../samza/coordinator/TestJobModelManager.java  |  24 +-
 .../apache/samza/execution/TestStreamEdge.java  |   1 +
 .../samza/operators/TestStreamGraphImpl.java    |  38 ++
 .../samza/operators/TestWindowOperator.java     | 565 ----------------
 .../operators/impl/TestOperatorImplGraph.java   |  10 +-
 .../operators/impl/TestWindowOperator.java      | 677 +++++++++++++++++++
 .../samza/storage/MockSystemConsumer.java       |  59 --
 .../apache/samza/storage/MockSystemFactory.java |  45 --
 .../samza/storage/TestStorageRecovery.java      |  37 +-
 .../apache/samza/system/MockSystemFactory.java  | 181 +++++
 .../zk/TestZkBarrierForVersionUpgrade.java      |  23 +-
 .../samza/checkpoint/TestCheckpointTool.scala   |  23 +-
 .../samza/container/TestSamzaContainer.scala    |  13 +-
 .../samza/container/TestTaskInstance.scala      |  10 +-
 .../samza/coordinator/TestJobCoordinator.scala  |  65 +-
 .../TestStreamPartitionCountMonitor.scala       |  82 ++-
 .../processor/StreamProcessorTestUtils.scala    |   3 +-
 .../samza/serializers/TestCheckpointSerde.scala |   8 +
 .../samza/storage/TestTaskStorageManager.scala  | 210 ++++--
 .../TestRangeSystemStreamPartitionMatcher.scala |   1 -
 .../TestRegexSystemStreamPartitionMatcher.scala |   1 -
 .../scala/org/apache/samza/util/TestUtil.scala  |  16 +
 .../checkpoint/kafka/KafkaCheckpointLogKey.java | 110 +++
 .../kafka/KafkaCheckpointLogKeySerde.java       |  68 ++
 .../samza/system/kafka/KafkaStreamSpec.java     |   4 +
 .../kafka/KafkaCheckpointLogKey.scala           | 171 -----
 .../kafka/KafkaCheckpointManager.scala          | 385 ++++++-----
 .../kafka/KafkaCheckpointManagerFactory.scala   |  81 +--
 .../system/kafka/KafkaSystemConsumer.scala      |   8 +-
 .../kafka/KafkaSystemConsumerMetrics.scala      |   2 -
 .../kafka/TestKafkaCheckpointLogKeySerde.java   |  53 ++
 .../kafka/TestKafkaCheckpointManagerJava.java   | 247 +++++++
 .../kafka/TeskKafkaCheckpointLogKey.scala       |  61 --
 .../kafka/TestKafkaCheckpointManager.scala      | 388 ++++-------
 .../system/kafka/TestKafkaSystemAdmin.scala     |   2 +-
 .../samza/storage/kv/RocksDbKeyValueReader.java |   3 +-
 .../apache/samza/monitor/LocalStoreMonitor.java |   1 +
 .../samza/monitor/LocalStoreMonitorMetrics.java |   4 +
 .../apache/samza/sql/avro/AvroRelConverter.java | 183 +++++
 .../samza/sql/avro/AvroRelConverterFactory.java |  44 ++
 .../samza/sql/avro/AvroRelSchemaProvider.java   |  28 +
 .../samza/sql/avro/AvroTypeFactoryImpl.java     | 132 ++++
 ...ConfigBasedAvroRelSchemaProviderFactory.java |  63 ++
 .../org/apache/samza/sql/data/Expression.java   |  38 ++
 .../samza/sql/data/RexToJavaCompiler.java       | 224 ++++++
 .../sql/data/SamzaSqlExecutionContext.java      |  61 ++
 .../samza/sql/data/SamzaSqlRelMessage.java      | 123 ++++
 .../org/apache/samza/sql/fn/FlattenUdf.java     |  36 +
 .../impl/ConfigBasedSourceResolverFactory.java  |  71 ++
 .../samza/sql/impl/ConfigBasedUdfResolver.java  |  97 +++
 .../samza/sql/interfaces/RelSchemaProvider.java |  36 +
 .../interfaces/RelSchemaProviderFactory.java    |  33 +
 .../samza/sql/interfaces/SamzaRelConverter.java |  46 ++
 .../interfaces/SamzaRelConverterFactory.java    |  39 ++
 .../samza/sql/interfaces/SourceResolver.java    |  34 +
 .../sql/interfaces/SourceResolverFactory.java   |  36 +
 .../sql/interfaces/SqlSystemStreamConfig.java   |  74 ++
 .../samza/sql/interfaces/UdfMetadata.java       |  61 ++
 .../samza/sql/interfaces/UdfResolver.java       |  35 +
 .../org/apache/samza/sql/planner/Checker.java   |  93 +++
 .../apache/samza/sql/planner/QueryPlanner.java  | 153 +++++
 .../sql/planner/SamzaSqlOperatorTable.java      | 101 +++
 .../sql/planner/SamzaSqlScalarFunctionImpl.java |  84 +++
 .../sql/planner/SamzaSqlUdfOperatorTable.java   |  62 ++
 .../samza/sql/runner/SamzaSqlApplication.java   |  56 ++
 .../sql/runner/SamzaSqlApplicationConfig.java   | 245 +++++++
 .../sql/runner/SamzaSqlApplicationRunner.java   | 133 ++++
 .../apache/samza/sql/testutil/ConfigUtil.java   |  62 ++
 .../org/apache/samza/sql/testutil/JsonUtil.java |  91 +++
 .../samza/sql/testutil/ReflectionUtils.java     |  62 ++
 .../samza/sql/testutil/SamzaSqlQueryParser.java | 188 +++++
 .../samza/sql/testutil/SqlFileParser.java       | 103 +++
 .../samza/sql/translator/FilterTranslator.java  |  62 ++
 .../samza/sql/translator/ProjectTranslator.java | 108 +++
 .../samza/sql/translator/QueryTranslator.java   |  96 +++
 .../samza/sql/translator/ScanTranslator.java    |  70 ++
 .../samza/sql/translator/TranslatorContext.java | 162 +++++
 .../apache/samza/sql/TestQueryTranslator.java   | 103 +++
 .../sql/TestSamzaSqlApplicationConfig.java      |  92 +++
 .../samza/sql/TestSamzaSqlFileParser.java       |  58 ++
 .../samza/sql/TestSamzaSqlQueryParser.java      |  70 ++
 .../samza/sql/TestSamzaSqlRelMessage.java       |  46 ++
 .../samza/sql/avro/TestAvroRelConversion.java   | 239 +++++++
 .../samza/sql/avro/schemas/ComplexRecord.avsc   | 143 ++++
 .../samza/sql/avro/schemas/ComplexRecord.java   |  92 +++
 .../apache/samza/sql/avro/schemas/MyFixed.java  |  29 +
 .../samza/sql/avro/schemas/SimpleRecord.avsc    |  39 ++
 .../samza/sql/avro/schemas/SimpleRecord.java    |  52 ++
 .../samza/sql/avro/schemas/SubRecord.java       |  53 ++
 .../samza/sql/avro/schemas/TestEnumType.java    |  31 +
 .../samza/sql/e2e/TestSamzaSqlEndToEnd.java     | 137 ++++
 .../sql/system/ConsoleLoggingSystemFactory.java |  83 +++
 .../samza/sql/system/SimpleSystemAdmin.java     |  61 ++
 .../samza/sql/system/TestAvroSystemFactory.java | 156 +++++
 .../samza/sql/testutil/MyTestArrayUdf.java      |  37 +
 .../apache/samza/sql/testutil/MyTestUdf.java    |  45 ++
 .../samza/sql/testutil/SamzaSqlTestConfig.java  | 103 +++
 samza-sql/src/test/resources/log4j.xml          |  43 ++
 .../performance/TestKeyValuePerformance.scala   |   2 +-
 .../samza/processor/TestZkStreamProcessor.java  |  11 +-
 .../processor/TestZkStreamProcessorBase.java    |   3 +-
 .../TestZkStreamProcessorFailures.java          |   8 +-
 .../processor/TestZkStreamProcessorSession.java |   5 +-
 .../operator/TestRepartitionJoinWindowApp.java  |  13 +-
 .../processor/TestZkLocalApplicationRunner.java |  15 +-
 settings.gradle                                 |   5 +-
 158 files changed, 8640 insertions(+), 1843 deletions(-)
----------------------------------------------------------------------



[33/47] samza git commit: cleanup names

Posted by bo...@apache.org.
cleanup names


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/f14d6081
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/f14d6081
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/f14d6081

Branch: refs/heads/NewKafkaSystemConsumer
Commit: f14d6081f25f1738d0a31c9d2798f8bdd52a7c75
Parents: ceb0f6a
Author: Boris S <bo...@apache.org>
Authored: Wed Sep 5 14:26:28 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Sep 5 14:26:28 2018 -0700

----------------------------------------------------------------------
 .../samza/system/kafka/KafkaConsumerProxy.java      | 16 ++++++++--------
 .../samza/system/kafka/NewKafkaSystemConsumer.java  |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/f14d6081/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 7232a0a..5c79017 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -48,7 +48,7 @@ import org.slf4j.LoggerFactory;
 
 /**
  * Separate thread that reads messages from kafka and puts them into the BlockingEnvelopeMap.
- * This class is not thread safe. There will be only one instance of this class per LiKafkaSystemConsumer object.
+ * This class is not thread safe. There will be only one instance of this class per KafkaSystemConsumer object.
  * We still need some synchronization around kafkaConsumer. See pollConsumer() method for details.
  */
 public class KafkaConsumerProxy<K, V> {
@@ -108,7 +108,7 @@ public class KafkaConsumerProxy<K, V> {
         }
       }
     } else {
-      LOG.debug("Tried to start an already started LiKafkaConsumerProxy (%s). Ignoring.", this.toString());
+      LOG.debug("Tried to start an already started KafkaConsumerProxy (%s). Ignoring.", this.toString());
     }
   }
 
@@ -146,14 +146,14 @@ public class KafkaConsumerProxy<K, V> {
         }
         System.out.println("THREAD: finished " + consumerPollThread.getName());
       } catch (Throwable throwable) {
-        LOG.error(String.format("Error in LiKafkaConsumerProxy poll thread for system: %s.", systemName), throwable);
-        // SamzaLiKafkaSystemConsumer uses the failureCause to propagate the throwable to the container
+        LOG.error(String.format("Error in KafkaConsumerProxy poll thread for system: %s.", systemName), throwable);
+        // SamzaKafkaSystemConsumer uses the failureCause to propagate the throwable to the container
         failureCause = throwable;
         isRunning = false;
       }
 
       if (!isRunning) {
-        LOG.info("Stopping the LiKafkaConsumerProxy poll thread for system: {}.", systemName);
+        LOG.info("Stopping the KafkaConsumerProxy poll thread for system: {}.", systemName);
       }
     };
   }
@@ -318,7 +318,7 @@ public class KafkaConsumerProxy<K, V> {
   }
 
   /*
-    The only way to figure out lag for the LiKafkaConsumer is to look at the metrics after each poll() call.
+    The only way to figure out lag for the KafkaConsumer is to look at the metrics after each poll() call.
     One of the metrics (records-lag) shows how far behind the HighWatermark the consumer is.
     This method populates the lag information for each SSP into latestLags member variable.
    */
@@ -335,7 +335,7 @@ public class KafkaConsumerProxy<K, V> {
       MetricName mn = ssp2MetricName.get(ssp);
       Metric currentLagM = consumerMetrics.get(mn);
 
-      // In linkedin-kafka-client 5.*, high watermark is fixed to be the offset of last available message,
+      // High watermark is fixed to be the offset of last available message,
       // so the lag is now at least 0, which is the same as Samza's definition.
       // If the lag is not 0, then isAtHead is not true, and kafkaClient keeps polling.
       long currentLag = (currentLagM != null) ? (long) currentLagM.value() : -1L;
@@ -433,7 +433,7 @@ public class KafkaConsumerProxy<K, V> {
   }
 
   public void stop(long timeout) {
-    System.out.println("THREAD: Shutting down LiKafkaConsumerProxy poll thread:" + consumerPollThread.getName());
+    System.out.println("THREAD: Shutting down KafkaConsumerProxy poll thread:" + consumerPollThread.getName());
 
     isRunning = false;
     try {

http://git-wip-us.apache.org/repos/asf/samza/blob/f14d6081/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index 717b45d..afec8ad 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -194,7 +194,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
       try {
         synchronized (kafkaConsumer) {
           // TODO in the future we may need to add special handling here for BEGIN/END_OFFSET
-          // this will call liKafkaConsumer.seekToBegin/End()
+          // this will call KafkaConsumer.seekToBegin/End()
           kafkaConsumer.seek(tp, startingOffset); // this value should already be the 'upcoming' value
         }
       } catch (Exception e) {
@@ -274,7 +274,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
         kafkaConsumer.close();
       }
     } catch (Exception e) {
-      LOG.warn("failed to stop SamzaRawLiKafkaConsumer + " + this, e);
+      LOG.warn("failed to stop SamzaRawKafkaConsumer + " + this, e);
     }
   }
 
@@ -340,7 +340,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
         throw new SamzaException(message, proxy.getFailureCause());
       } else {
         LOG.warn("Failure cause is not populated for KafkaConsumerProxy");
-        throw new SamzaException("LiKafkaConsumerProxy has stopped");
+        throw new SamzaException("KafkaConsumerProxy has stopped");
       }
     }
 


[46/47] samza git commit: rename of a var

Posted by bo...@apache.org.
rename of a var


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/74b6cfab
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/74b6cfab
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/74b6cfab

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 74b6cfabdbb5112488965c2fc3629156e0ff8c4c
Parents: ed0648d
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 18 14:17:58 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 18 14:17:58 2018 -0700

----------------------------------------------------------------------
 .../apache/samza/system/kafka/KafkaConsumerProxy.java | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/74b6cfab/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 6fc6491..b67df0a 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -367,20 +367,20 @@ import org.slf4j.LoggerFactory;
     Using the consumer to poll the messages from the stream.
    */
   private void fetchMessages() {
-    Set<SystemStreamPartition> SSPsToFetch = new HashSet<>();
+    Set<SystemStreamPartition> sspsToFetch = new HashSet<>();
     for (SystemStreamPartition ssp : nextOffsets.keySet()) {
       if (sink.needsMoreMessages(ssp)) {
-        SSPsToFetch.add(ssp);
+        sspsToFetch.add(ssp);
       }
     }
-    LOG.debug("pollConsumer {}", SSPsToFetch.size());
-    if (!SSPsToFetch.isEmpty()) {
+    LOG.debug("pollConsumer {}", sspsToFetch.size());
+    if (!sspsToFetch.isEmpty()) {
       kafkaConsumerMetrics.incClientReads(metricName);
 
       Map<SystemStreamPartition, List<IncomingMessageEnvelope>> response;
-      LOG.debug("pollConsumer from following SSPs: {}; total#={}", SSPsToFetch, SSPsToFetch.size());
+      LOG.debug("pollConsumer from following SSPs: {}; total#={}", sspsToFetch, sspsToFetch.size());
 
-      response = pollConsumer(SSPsToFetch, 500); // TODO should be default value from ConsumerConfig
+      response = pollConsumer(sspsToFetch, 500); // TODO should be default value from ConsumerConfig
 
       // move the responses into the queue
       for (Map.Entry<SystemStreamPartition, List<IncomingMessageEnvelope>> e : response.entrySet()) {
@@ -390,7 +390,7 @@ import org.slf4j.LoggerFactory;
         }
       }
 
-      populateCurrentLags(SSPsToFetch); // find current lags for for each SSP
+      populateCurrentLags(sspsToFetch); // find current lags for for each SSP
     } else { // nothing to read
 
       LOG.debug("No topic/partitions need to be fetched for consumer {} right now. Sleeping {}ms.", kafkaConsumer,


[36/47] samza git commit: merge

Posted by bo...@apache.org.
merge


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/9217644e
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/9217644e
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/9217644e

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 9217644ec05cc9dfe0140b5ee488fcea2fed83b9
Parents: 0b6768f 728dc18
Author: Boris S <bo...@apache.org>
Authored: Fri Sep 7 16:00:02 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Sep 7 16:00:02 2018 -0700

----------------------------------------------------------------------
 .../application/ApplicationDescriptor.java      |  80 +++
 .../samza/application/SamzaApplication.java     |  40 ++
 .../samza/application/StreamApplication.java    |  75 +--
 .../StreamApplicationDescriptor.java            | 113 ++++
 .../samza/application/TaskApplication.java      |  86 +++
 .../application/TaskApplicationDescriptor.java  |  64 ++
 .../java/org/apache/samza/config/Config.java    |   3 +-
 .../samza/metrics/MetricsReporterFactory.java   |   5 +-
 .../apache/samza/operators/MessageStream.java   |   9 +-
 .../org/apache/samza/operators/StreamGraph.java | 120 ----
 .../operators/functions/ClosableFunction.java   |   7 +-
 .../operators/functions/InitableFunction.java   |   6 +-
 .../operators/functions/StreamExpander.java     |  16 +-
 .../apache/samza/runtime/ApplicationRunner.java |  92 +--
 .../samza/runtime/ApplicationRunners.java       |  82 +++
 .../apache/samza/runtime/ProcessorContext.java  |  31 +
 .../runtime/ProcessorLifecycleListener.java     |  55 ++
 .../ProcessorLifecycleListenerFactory.java      |  40 ++
 .../samza/task/AsyncStreamTaskFactory.java      |  10 +-
 .../apache/samza/task/StreamTaskFactory.java    |   6 +-
 .../java/org/apache/samza/task/TaskFactory.java |  38 ++
 .../samza/runtime/TestApplicationRunners.java   |  88 +++
 .../application/ApplicationDescriptorImpl.java  | 179 ++++++
 .../application/ApplicationDescriptorUtil.java  |  51 ++
 .../samza/application/ApplicationUtil.java      |  63 ++
 .../application/LegacyTaskApplication.java      |  37 ++
 .../StreamApplicationDescriptorImpl.java        | 381 ++++++++++++
 .../TaskApplicationDescriptorImpl.java          | 129 ++++
 .../samza/container/SamzaContainerListener.java |  22 +-
 .../samza/execution/ExecutionPlanner.java       |   7 +-
 .../org/apache/samza/execution/JobGraph.java    |   6 -
 .../org/apache/samza/execution/JobPlanner.java  | 188 ++++++
 .../apache/samza/execution/LocalJobPlanner.java | 134 +++++
 .../samza/execution/RemoteJobPlanner.java       |  96 +++
 .../samza/operators/MessageStreamImpl.java      |  57 +-
 .../samza/operators/OperatorSpecGraph.java      |  26 +-
 .../apache/samza/operators/StreamGraphSpec.java | 336 -----------
 .../samza/operators/spec/OperatorSpec.java      |   2 +-
 .../stream/IntermediateMessageStreamImpl.java   |   6 +-
 .../apache/samza/processor/StreamProcessor.java | 122 ++--
 .../StreamProcessorLifecycleListener.java       |  49 --
 .../runtime/AbstractApplicationRunner.java      | 135 -----
 .../samza/runtime/ApplicationRunnerMain.java    |  42 +-
 .../samza/runtime/LocalApplicationRunner.java   | 355 ++++-------
 .../samza/runtime/LocalContainerRunner.java     |  56 +-
 .../samza/runtime/RemoteApplicationRunner.java  | 123 ++--
 .../apache/samza/task/StreamOperatorTask.java   |   5 +-
 .../org/apache/samza/task/TaskFactoryUtil.java  | 137 ++---
 .../apache/samza/container/SamzaContainer.scala |  16 +-
 .../scala/org/apache/samza/job/JobRunner.scala  |   2 -
 .../samza/job/local/ThreadJobFactory.scala      |  49 +-
 .../application/MockStreamApplication.java      |  29 +
 .../samza/application/TestApplicationUtil.java  |  96 +++
 .../TestStreamApplicationDescriptorImpl.java    | 584 +++++++++++++++++++
 .../TestTaskApplicationDescriptorImpl.java      | 144 +++++
 .../samza/execution/TestExecutionPlanner.java   | 192 +++---
 .../execution/TestJobGraphJsonGenerator.java    | 120 ++--
 .../org/apache/samza/execution/TestJobNode.java |  53 +-
 .../samza/execution/TestLocalJobPlanner.java    | 211 +++++++
 .../samza/execution/TestRemoteJobPlanner.java   |  88 +++
 .../samza/operators/TestJoinOperator.java       | 103 ++--
 .../samza/operators/TestMessageStreamImpl.java  |  29 +-
 .../samza/operators/TestOperatorSpecGraph.java  |  19 +-
 .../samza/operators/TestStreamGraphSpec.java    | 506 ----------------
 .../operators/impl/TestOperatorImplGraph.java   | 190 +++---
 .../operators/impl/TestWindowOperator.java      | 147 ++---
 .../spec/TestPartitionByOperatorSpec.java       |  70 ++-
 .../samza/processor/TestStreamProcessor.java    | 139 +++--
 .../runtime/TestApplicationRunnerMain.java      |  47 +-
 .../runtime/TestLocalApplicationRunner.java     | 311 +++-------
 .../runtime/TestRemoteApplicationRunner.java    |  35 +-
 .../apache/samza/task/MockAsyncStreamTask.java  |  31 +
 .../org/apache/samza/task/MockStreamTask.java   |  31 +
 .../apache/samza/task/TestTaskFactoryUtil.java  | 215 ++-----
 .../samza/testUtils/TestAsyncStreamTask.java    |  35 --
 .../samza/testUtils/TestStreamApplication.java  |  33 --
 .../apache/samza/testUtils/TestStreamTask.java  |  34 --
 .../samza/container/TestSamzaContainer.scala    |  76 ++-
 .../samza/sql/runner/SamzaSqlApplication.java   |  13 +-
 .../sql/runner/SamzaSqlApplicationRunner.java   |  53 +-
 .../samza/sql/translator/JoinTranslator.java    |   2 +-
 .../samza/sql/translator/QueryTranslator.java   |  27 +-
 .../samza/sql/translator/ScanTranslator.java    |   8 +-
 .../samza/sql/translator/TranslatorContext.java |  19 +-
 .../apache/samza/sql/e2e/TestSamzaSqlTable.java |   8 +-
 .../runner/TestSamzaSqlApplicationRunner.java   |   2 -
 .../sql/translator/TestFilterTranslator.java    |   6 +-
 .../sql/translator/TestJoinTranslator.java      |  16 +-
 .../sql/translator/TestProjectTranslator.java   |  14 +-
 .../sql/translator/TestQueryTranslator.java     | 162 +++--
 .../example/AppWithGlobalConfigExample.java     |  25 +-
 .../apache/samza/example/BroadcastExample.java  |  22 +-
 .../samza/example/KeyValueStoreExample.java     |  19 +-
 .../org/apache/samza/example/MergeExample.java  |  18 +-
 .../samza/example/OrderShipmentJoinExample.java |  19 +-
 .../samza/example/PageViewCounterExample.java   |  15 +-
 .../samza/example/RepartitionExample.java       |  19 +-
 .../samza/example/TaskApplicationExample.java   |  77 +++
 .../org/apache/samza/example/WindowExample.java |  18 +-
 .../samza/system/mock/MockSystemConsumer.java   |   4 +-
 .../apache/samza/test/framework/TestRunner.java |  41 +-
 .../integration/LocalApplicationRunnerMain.java |  21 +-
 .../TestStandaloneIntegrationApplication.java   |   9 +-
 .../processor/TestZkStreamProcessorBase.java    |  20 +-
 .../EndOfStreamIntegrationTest.java             |  37 +-
 .../WatermarkIntegrationTest.java               |  62 +-
 .../test/framework/BroadcastAssertApp.java      |   7 +-
 .../StreamApplicationIntegrationTest.java       |   9 +-
 ...StreamApplicationIntegrationTestHarness.java |  42 +-
 .../samza/test/framework/TestTimerApp.java      |   7 +-
 .../apache/samza/test/framework/TimerTest.java  |  18 +-
 .../test/operator/RepartitionJoinWindowApp.java |  25 +-
 .../test/operator/RepartitionWindowApp.java     |  20 +-
 .../samza/test/operator/SessionWindowApp.java   |  17 +-
 .../operator/TestRepartitionJoinWindowApp.java  |  30 +-
 .../test/operator/TestRepartitionWindowApp.java |  10 +-
 .../samza/test/operator/TumblingWindowApp.java  |  16 +-
 .../test/processor/TestStreamApplication.java   |  82 +--
 .../test/processor/TestStreamProcessor.java     |  18 +-
 .../processor/TestZkLocalApplicationRunner.java | 317 +++++-----
 .../apache/samza/test/table/TestLocalTable.java |  39 +-
 .../table/TestLocalTableWithSideInputs.java     |  13 +-
 .../samza/test/table/TestRemoteTable.java       |  27 +-
 .../benchmark/SystemConsumerWithSamzaBench.java |  14 +-
 124 files changed, 5280 insertions(+), 3632 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/9217644e/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/samza/blob/9217644e/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
----------------------------------------------------------------------
diff --cc samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
index 0d71303,abd7f65..bec4ec0
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
@@@ -19,17 -19,19 +19,18 @@@
  
  package org.apache.samza.job.local
  
+ import org.apache.samza.application.{ApplicationDescriptorUtil, ApplicationUtil}
 -import org.apache.samza.config.{Config, TaskConfigJava}
  import org.apache.samza.config.JobConfig._
  import org.apache.samza.config.ShellCommandConfig._
 +import org.apache.samza.config.{Config, TaskConfigJava}
  import org.apache.samza.container.{SamzaContainer, SamzaContainerListener, TaskName}
  import org.apache.samza.coordinator.JobModelManager
  import org.apache.samza.coordinator.stream.CoordinatorStreamManager
  import org.apache.samza.job.{StreamJob, StreamJobFactory}
  import org.apache.samza.metrics.{JmxServer, MetricsRegistryMap, MetricsReporter}
- import org.apache.samza.operators.StreamGraphSpec
+ import org.apache.samza.runtime.ProcessorContext
  import org.apache.samza.storage.ChangelogStreamManager
 -import org.apache.samza.task.TaskFactory
--import org.apache.samza.task.TaskFactoryUtil
++import org.apache.samza.task.{TaskFactory, TaskFactoryUtil}
  import org.apache.samza.util.Logging
  
  import scala.collection.JavaConversions._
@@@ -72,32 -72,36 +73,36 @@@ class ThreadJobFactory extends StreamJo
  
      val containerId = "0"
      val jmxServer = new JmxServer
-     val streamApp = TaskFactoryUtil.createStreamApplication(config)
- 
-     val taskFactory = if (streamApp != null) {
-       val graphSpec = new StreamGraphSpec(config)
-       streamApp.init(graphSpec, config)
-       TaskFactoryUtil.createTaskFactory(graphSpec.getOperatorSpecGraph(), graphSpec.getContextManager)
-     } else {
-       TaskFactoryUtil.createTaskFactory(config)
-     }
+ 
+     val appDesc = ApplicationDescriptorUtil.getAppDescriptor(ApplicationUtil.fromConfig(config), config)
 -    val taskFactory : TaskFactory[_] = TaskFactoryUtil.getTaskFactory(appDesc)
++    val taskFactory: TaskFactory[_] = TaskFactoryUtil.getTaskFactory(appDesc)
  
      // Give developers a nice friendly warning if they've specified task.opts and are using a threaded job.
      config.getTaskOpts match {
-       case Some(taskOpts) => warn("%s was specified in config, but is not being used because job is being executed with ThreadJob. You probably want to run %s=%s." format(TASK_JVM_OPTS, STREAM_JOB_FACTORY_CLASS, classOf[ProcessJobFactory].getName))
+       case Some(taskOpts) => warn("%s was specified in config, but is not being used because job is being executed with ThreadJob. " +
 -        "You probably want to run %s=%s." format (TASK_JVM_OPTS, STREAM_JOB_FACTORY_CLASS, classOf[ProcessJobFactory].getName))
++        "You probably want to run %s=%s." format(TASK_JVM_OPTS, STREAM_JOB_FACTORY_CLASS, classOf[ProcessJobFactory].getName))
        case _ => None
      }
  
-     val containerListener = new SamzaContainerListener {
-       override def onContainerFailed(t: Throwable): Unit = {
-         error("Container failed.", t)
-         throw t
-       }
- 
-       override def onContainerStop(): Unit = {
-       }
- 
-       override def onContainerStart(): Unit = {
+     val containerListener = {
 -      val processorLifecycleListener = appDesc.getProcessorLifecycleListenerFactory().createInstance(new ProcessorContext() { }, config)
++      val processorLifecycleListener = appDesc.getProcessorLifecycleListenerFactory().createInstance(new ProcessorContext() {}, config)
+       new SamzaContainerListener {
+         override def afterFailure(t: Throwable): Unit = {
+           processorLifecycleListener.afterFailure(t)
+           throw t
+         }
+ 
+         override def afterStart(): Unit = {
+           processorLifecycleListener.afterStart()
+         }
+ 
+         override def afterStop(): Unit = {
+           processorLifecycleListener.afterStop()
+         }
+ 
+         override def beforeStart(): Unit = {
+           processorLifecycleListener.beforeStart()
+         }
  
        }
      }


[17/47] samza git commit: test

Posted by bo...@apache.org.
test


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/4801709f
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/4801709f
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/4801709f

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 4801709f3d8d2b50a059abb830de23ffcdaffda5
Parents: 57fca52
Author: Boris S <bo...@apache.org>
Authored: Thu Aug 16 10:38:26 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Thu Aug 16 10:38:26 2018 -0700

----------------------------------------------------------------------
 .../samza/checkpoint/kafka/TestKafkaCheckpointManager.scala   | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/4801709f/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala b/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
index 8d92f4d..065170c 100644
--- a/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
+++ b/samza-kafka/src/test/scala/org/apache/samza/checkpoint/kafka/TestKafkaCheckpointManager.scala
@@ -88,12 +88,12 @@ class TestKafkaCheckpointManager extends KafkaServerTestHarness {
     zkClient.close
 
     // read before topic exists should result in a null checkpoint
-    val readCp = readCheckpoint(checkpointTopic, taskName)
-    assertNull(readCp)
+    //val readCp = readCheckpoint(checkpointTopic, taskName)
+    //assertNull(readCp)
 
     writeCheckpoint(checkpointTopic, taskName, checkpoint1)
     assertEquals(checkpoint1, readCheckpoint(checkpointTopic, taskName))
-
+try {Thread.sleep(20000)} catch { case e:Exception =>() }
     // writing a second message and reading it returns a more recent checkpoint
     writeCheckpoint(checkpointTopic, taskName, checkpoint2)
     assertEquals(checkpoint2, readCheckpoint(checkpointTopic, taskName))
@@ -194,6 +194,7 @@ class TestKafkaCheckpointManager extends KafkaServerTestHarness {
     val systemFactory = Util.getObj(systemFactoryClassName, classOf[SystemFactory])
 
     val spec = new KafkaStreamSpec("id", cpTopic, checkpointSystemName, 1, 1, props)
+    System.out.println("CONFIG:" + config)
     new KafkaCheckpointManager(spec, systemFactory, failOnTopicValidation, config, new NoOpMetricsRegistry, serde)
   }
 


[45/47] samza git commit: addressed comments

Posted by bo...@apache.org.
addressed comments


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/ed0648dc
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/ed0648dc
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/ed0648dc

Branch: refs/heads/NewKafkaSystemConsumer
Commit: ed0648dca2b2a902875073861a433238d84ce68f
Parents: 5120740
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 18 13:12:14 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 18 13:12:14 2018 -0700

----------------------------------------------------------------------
 .../org/apache/samza/system/kafka/KafkaConsumerProxy.java      | 2 +-
 .../org/apache/samza/system/kafka/KafkaSystemFactory.scala     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/ed0648dc/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 83e7a58..6fc6491 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -51,7 +51,7 @@ import org.slf4j.LoggerFactory;
  * This class is not thread safe. There will be only one instance of this class per KafkaSystemConsumer object.
  * We still need some synchronization around kafkaConsumer. See pollConsumer() method for details.
  */
-public class KafkaConsumerProxy<K, V> {
+/*package private */class KafkaConsumerProxy<K, V> {
   private static final Logger LOG = LoggerFactory.getLogger(KafkaConsumerProxy.class);
 
   private static final int SLEEP_MS_WHILE_NO_TOPIC_PARTITION = 100;

http://git-wip-us.apache.org/repos/asf/samza/blob/ed0648dc/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index 9f92583..5342b08 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -53,10 +53,10 @@ class KafkaSystemFactory extends SystemFactory with Logging {
     val kafkaConsumer = KafkaSystemConsumer.getKafkaConsumerImpl(systemName, clientId, config)
     info("Created kafka consumer for system %s, clientId %s: %s" format (systemName, clientId, kafkaConsumer))
 
-    val kc = new KafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, new SystemClock)
-    info("Created samza system consumer %s" format  (kc.toString))
+    val kafkaSystemConsumer = new KafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, new SystemClock)
+    info("Created samza system consumer %s" format  (kafkaSystemConsumer.toString))
 
-    kc
+    kafkaSystemConsumer
   }
 
   def getProducer(systemName: String, config: Config, registry: MetricsRegistry): SystemProducer = {


[02/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/410ce78b
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/410ce78b
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/410ce78b

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 410ce78ba1ff8dafa2587481473e62ac9cfa6f4f
Parents: a31a7aa 343712e
Author: Boris S <bo...@apache.org>
Authored: Mon Oct 16 18:20:04 2017 -0700
Committer: Boris S <bo...@apache.org>
Committed: Mon Oct 16 18:20:04 2017 -0700

----------------------------------------------------------------------
 .../versioned/jobs/configuration-table.html     |  8 +++++
 .../samza/storage/kv/RocksDbKeyValueStore.scala | 32 ++++++++++++-----
 .../storage/kv/TestRocksDbKeyValueStore.scala   | 38 ++++++++++++++++++--
 3 files changed, 66 insertions(+), 12 deletions(-)
----------------------------------------------------------------------



[19/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/7f7b5594
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/7f7b5594
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/7f7b5594

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 7f7b5594c21e9e3ebf25d243cbf078dbf6201a0f
Parents: 57fca52 a9ff093
Author: Boris S <bo...@apache.org>
Authored: Wed Aug 22 11:31:26 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Aug 22 11:31:26 2018 -0700

----------------------------------------------------------------------
 .../samza/execution/ExecutionPlanner.java       | 20 +++++---
 .../runtime/AbstractApplicationRunner.java      | 20 +++++---
 .../samza/runtime/LocalApplicationRunner.java   | 41 ++++++++--------
 .../samza/runtime/RemoteApplicationRunner.java  | 36 +++++++-------
 .../org/apache/samza/config/MetricsConfig.scala | 11 +++--
 .../diagnostics/DiagnosticsExceptionEvent.java  |  6 +--
 .../scala/org/apache/samza/job/JobRunner.scala  |  2 +-
 .../reporter/MetricsSnapshotReporter.scala      | 35 +++++++-------
 .../MetricsSnapshotReporterFactory.scala        |  6 +--
 .../runtime/TestLocalApplicationRunner.java     | 50 ++++++++++++--------
 10 files changed, 129 insertions(+), 98 deletions(-)
----------------------------------------------------------------------



[10/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/67e611ee
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/67e611ee
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/67e611ee

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 67e611ee47f0747b81c0de5d061f9888145b605a
Parents: dd39d08 b00ebd2
Author: Boris S <bo...@apache.org>
Authored: Wed Jan 10 09:50:40 2018 -0800
Committer: Boris S <bo...@apache.org>
Committed: Wed Jan 10 09:50:40 2018 -0800

----------------------------------------------------------------------
 RELEASE.md                                      |   4 +-
 bin/merge-pull-request.py                       |  28 +-
 build.gradle                                    |  96 +++++-
 docs/README.md                                  |   4 +-
 docs/_config.yml                                |   4 +-
 docs/_docs/replace-versioned.sh                 |   3 +
 docs/_layouts/default.html                      |   1 +
 docs/archive/index.html                         |   8 +
 .../documentation/hadoop/multi_stage_batch.png  | Bin 0 -> 65364 bytes
 .../hadoop/unified_batch_streaming.png          | Bin 0 -> 17371 bytes
 .../azure/eventhub_send_methods.png             | Bin 0 -> 52427 bytes
 .../documentation/versioned/aws/kinesis.md      | 104 +++++++
 .../documentation/versioned/azure/eventhubs.md  | 197 ++++++++++++
 .../versioned/container/metrics-table.html      |  12 +
 .../documentation/versioned/hadoop/consumer.md  | 110 +++++++
 .../documentation/versioned/hadoop/overview.md  |  46 +++
 .../documentation/versioned/hadoop/producer.md  |  68 ++++
 .../documentation/versioned/hdfs/consumer.md    | 110 -------
 .../documentation/versioned/hdfs/producer.md    |  70 -----
 docs/learn/documentation/versioned/index.html   |  27 +-
 .../versioned/jobs/configuration-table.html     | 128 ++++++++
 .../versioned/yarn/yarn-host-affinity.md        |   2 +-
 .../versioned/yarn/yarn-security.md             |   1 -
 .../versioned/deploy-samza-job-from-hdfs.md     |   2 +-
 .../tutorials/versioned/deploy-samza-to-CDH.md  |   4 +-
 .../versioned/hello-samza-high-level-code.md    |  87 +++---
 .../versioned/hello-samza-high-level-yarn.md    |   2 +-
 .../versioned/hello-samza-high-level-zk.md      |   2 +-
 docs/learn/tutorials/versioned/index.md         |   3 +
 .../versioned/remote-debugging-samza.md         |   2 +-
 .../versioned/run-in-multi-node-yarn.md         |   4 +-
 .../versioned/samza-rest-getting-started.md     |   2 +-
 docs/learn/tutorials/versioned/samza-sql.md     | 123 ++++++++
 docs/learn/tutorials/versioned/samza-tools.md   | 109 +++++++
 docs/startup/download/index.md                  |  24 +-
 docs/startup/hello-samza/versioned/index.md     |   2 +-
 docs/startup/preview/index.md                   | 135 ++++++--
 .../startup/releases/versioned/release-notes.md |  82 +++++
 gradle.properties                               |   2 +-
 gradle/dependency-versions.gradle               |   1 +
 .../java/org/apache/samza/operators/KV.java     |   2 +-
 .../apache/samza/operators/MessageStream.java   |  69 ++++-
 .../org/apache/samza/operators/StreamGraph.java |  48 ++-
 .../apache/samza/operators/TableDescriptor.java |  73 +++++
 .../functions/StreamTableJoinFunction.java      |  59 ++++
 .../org/apache/samza/serializers/KVSerde.java   |   8 +-
 .../table/LocalStoreBackedTableProvider.java    |  37 +++
 .../org/apache/samza/table/ReadWriteTable.java  |  75 +++++
 .../org/apache/samza/table/ReadableTable.java   |  61 ++++
 .../main/java/org/apache/samza/table/Table.java |  31 ++
 .../org/apache/samza/table/TableProvider.java   |  57 ++++
 .../samza/table/TableProviderFactory.java       |  35 +++
 .../java/org/apache/samza/table/TableSpec.java  | 125 ++++++++
 .../java/org/apache/samza/task/TaskContext.java |   6 +-
 .../kinesis/KinesisAWSCredentialsProvider.java  |  69 +++++
 .../samza/system/kinesis/KinesisConfig.java     | 287 +++++++++++++++++
 .../system/kinesis/KinesisSystemAdmin.java      | 124 ++++++++
 .../system/kinesis/KinesisSystemFactory.java    |  87 ++++++
 .../KinesisIncomingMessageEnvelope.java         |  62 ++++
 .../consumer/KinesisRecordProcessor.java        | 208 +++++++++++++
 .../KinesisRecordProcessorListener.java         |  51 +++
 .../kinesis/consumer/KinesisSystemConsumer.java | 256 +++++++++++++++
 .../consumer/KinesisSystemConsumerOffset.java   | 107 +++++++
 .../consumer/NoAvailablePartitionException.java |  38 +++
 .../system/kinesis/consumer/SSPAllocator.java   |  73 +++++
 .../metrics/KinesisSystemConsumerMetrics.java   | 106 +++++++
 .../system/kinesis/metrics/SamzaHistogram.java  |  63 ++++
 .../TestKinesisAWSCredentialsProvider.java      |  60 ++++
 .../samza/system/kinesis/TestKinesisConfig.java | 132 ++++++++
 .../kinesis/TestKinesisSystemFactory.java       | 115 +++++++
 .../consumer/TestKinesisRecordProcessor.java    | 301 ++++++++++++++++++
 .../consumer/TestKinesisSystemConsumer.java     | 270 ++++++++++++++++
 .../TestKinesisSystemConsumerOffset.java        |  48 +++
 .../kinesis/consumer/TestSSPAllocator.java      | 127 ++++++++
 .../azure/AzureCheckpointManager.java           |  31 +-
 .../azure/AzureCheckpointManagerFactory.java    |   4 +-
 .../samza/system/eventhub/EventHubConfig.java   |  71 ++++-
 .../eventhub/SamzaEventHubClientManager.java    |   4 +-
 .../eventhub/admin/EventHubSystemAdmin.java     |  17 +-
 .../consumer/EventHubSystemConsumer.java        |  77 +++--
 .../producer/EventHubSystemProducer.java        | 103 +++---
 .../azure/ITestAzureCheckpointManager.java      |   2 +-
 .../MockEventHubClientManagerFactory.java       |  18 ++
 .../eventhub/admin/TestEventHubSystemAdmin.java |  15 +-
 .../consumer/ITestEventHubSystemConsumer.java   |   5 +-
 .../consumer/TestEventHubSystemConsumer.java    |  19 +-
 .../producer/ITestEventHubSystemProducer.java   |   2 +
 .../producer/TestEventHubSystemProducer.java    |  34 +-
 .../AbstractContainerAllocator.java             |  17 +-
 .../clustermanager/ClusterResourceManager.java  |  26 +-
 .../clustermanager/ContainerProcessManager.java |  74 ++++-
 .../clustermanager/SamzaApplicationState.java   |   8 +-
 .../samza/clustermanager/SamzaResource.java     |   8 +
 .../apache/samza/config/JavaStorageConfig.java  |   5 +-
 .../apache/samza/config/JavaTableConfig.java    |  87 ++++++
 .../container/ContainerHeartbeatClient.java     |  20 +-
 .../container/ContainerHeartbeatMonitor.java    |  11 +-
 .../apache/samza/container/TaskContextImpl.java |  24 +-
 .../AllSspToSingleTaskGrouperFactory.java       |  50 ++-
 .../samza/execution/ExecutionPlanner.java       |   5 +
 .../org/apache/samza/execution/JobGraph.java    |  16 +
 .../samza/execution/JobGraphJsonGenerator.java  |  60 +++-
 .../org/apache/samza/execution/JobNode.java     |  54 +++-
 .../samza/operators/BaseTableDescriptor.java    |  94 ++++++
 .../samza/operators/MessageStreamImpl.java      |  35 ++-
 .../apache/samza/operators/StreamGraphImpl.java |  46 ++-
 .../org/apache/samza/operators/TableImpl.java   |  40 +++
 .../samza/operators/impl/InputOperatorImpl.java |   2 +-
 .../samza/operators/impl/OperatorImpl.java      |  16 +-
 .../samza/operators/impl/OperatorImplGraph.java |  44 ++-
 .../operators/impl/OutputOperatorImpl.java      |   2 +-
 .../operators/impl/PartitionByOperatorImpl.java |   7 +-
 .../operators/impl/SendToTableOperatorImpl.java |  71 +++++
 .../impl/StreamTableJoinOperatorImpl.java       |  82 +++++
 .../samza/operators/spec/InputOperatorSpec.java |  10 +-
 .../samza/operators/spec/OperatorSpec.java      |  12 +-
 .../samza/operators/spec/OperatorSpecs.java     |  44 ++-
 .../samza/operators/spec/OutputStreamImpl.java  |  10 +-
 .../operators/spec/SendToTableOperatorSpec.java |  65 ++++
 .../spec/StreamTableJoinOperatorSpec.java       |  67 ++++
 .../stream/IntermediateMessageStreamImpl.java   |  13 +
 .../samza/runtime/LocalContainerRunner.java     |   9 +-
 .../samza/runtime/RemoteApplicationRunner.java  |   7 +-
 .../standalone/PassthroughJobCoordinator.java   |   6 +-
 .../org/apache/samza/table/TableManager.java    | 153 +++++++++
 .../org/apache/samza/zk/ZkJobCoordinator.java   |  30 +-
 .../main/java/org/apache/samza/zk/ZkUtils.java  |   2 +-
 .../org/apache/samza/config/JobConfig.scala     |   1 +
 .../apache/samza/container/SamzaContainer.scala |  75 ++---
 .../apache/samza/container/TaskInstance.scala   |  40 ++-
 .../samza/coordinator/JobModelManager.scala     |  23 +-
 .../org/apache/samza/metrics/JvmMetrics.scala   |  28 +-
 .../MockClusterResourceManager.java             |  19 +-
 .../MockClusterResourceManagerCallback.java     |  10 +
 .../clustermanager/TestContainerAllocator.java  |  55 ----
 .../TestContainerProcessManager.java            | 101 +++---
 .../TestHostAwareContainerAllocator.java        |  56 ----
 .../samza/config/TestJavaStorageConfig.java     |  13 +
 .../samza/config/TestJavaTableConfig.java       |  58 ++++
 .../stream/TestAllSspToSingleTaskGrouper.java   | 125 ++++++++
 .../execution/TestJobGraphJsonGenerator.java    |  75 +++++
 .../samza/operators/TestMessageStreamImpl.java  |  70 ++++-
 .../samza/operators/TestStreamGraphImpl.java    |  25 +-
 .../samza/operators/impl/TestOperatorImpl.java  |   1 +
 .../impl/TestStreamTableJoinOperatorImpl.java   | 101 ++++++
 .../apache/samza/table/TestTableManager.java    | 176 +++++++++++
 .../org/apache/samza/task/TestAsyncRunLoop.java |  14 +-
 .../kafka/KafkaCheckpointManager.scala          |   2 +-
 .../system/kafka/KafkaSystemProducer.scala      | 135 ++++----
 .../kafka/TestKafkaCheckpointManager.scala      |   1 -
 .../system/kafka/TestKafkaSystemProducer.scala  |  39 ++-
 .../kv/inmemory/InMemoryTableDescriptor.java    |  59 ++++
 .../kv/inmemory/InMemoryTableProvider.java      |  65 ++++
 .../inmemory/InMemoryTableProviderFactory.java  |  33 ++
 .../inmemory/TestInMemoryTableDescriptor.java   |  48 +++
 .../kv/inmemory/TestInMemoryTableProvider.java  |  65 ++++
 .../storage/kv/RocksDbTableDescriptor.java      | 232 ++++++++++++++
 .../samza/storage/kv/RocksDbTableProvider.java  |  64 ++++
 .../storage/kv/RocksDbTableProviderFactory.java |  31 ++
 .../samza/storage/kv/RocksDbKeyValueStore.scala |  12 +-
 .../storage/kv/TestRocksDbTableDescriptor.java  |  87 ++++++
 .../storage/kv/TestRocksDbTableProvider.java    |  66 ++++
 .../kv/BaseLocalStoreBackedTableDescriptor.java |  56 ++++
 .../kv/BaseLocalStoreBackedTableProvider.java   |  92 ++++++
 .../kv/LocalStoreBackedReadWriteTable.java      |  68 ++++
 .../kv/LocalStoreBackedReadableTable.java       |  61 ++++
 .../samza/storage/kv/AccessLoggedStore.scala    |   4 +-
 .../storage/kv/KeyValueStorageEngine.scala      |  14 +-
 .../kv/KeyValueStorageEngineMetrics.scala       |   4 +-
 .../apache/samza/storage/kv/LoggedStore.scala   |   4 +-
 .../storage/kv/NullSafeKeyValueStore.scala      |   4 +-
 .../storage/kv/SerializedKeyValueStore.scala    |   4 +-
 .../TestLocalBaseStoreBackedTableProvider.java  |  77 +++++
 .../storage/kv/TestKeyValueStorageEngine.scala  |  16 +
 .../samza/logging/log4j/StreamAppender.java     | 110 ++++++-
 .../logging/log4j/StreamAppenderMetrics.java    |  43 +++
 .../samza/logging/log4j/MockSystemProducer.java |  12 +-
 .../samza/logging/log4j/TestStreamAppender.java | 144 +++++++--
 .../src/main/visualizer/js/planToDagre.js       |   6 +-
 .../apache/samza/sql/avro/AvroRelConverter.java |   6 +-
 samza-test/src/main/config/join/README          |   8 +-
 samza-test/src/main/python/configs/tests.json   |   2 +-
 .../test/processor/TestStreamProcessor.java     |   9 +-
 .../processor/TestZkLocalApplicationRunner.java | 152 +++++++--
 .../apache/samza/test/table/TestLocalTable.java | 304 ++++++++++++++++++
 .../apache/samza/test/table/TestTableData.java  | 200 ++++++++++++
 .../samza/test/util/ArraySystemConsumer.java    |   4 +-
 .../samza/test/util/SimpleSystemAdmin.java      |  26 +-
 samza-tools/config/eh-consumer-log4j.xml        |  35 +++
 .../config/generate-kafka-events-log4j.xml      |  35 +++
 samza-tools/config/samza-sql-console-log4j.xml  |  35 +++
 samza-tools/scripts/eh-consumer.sh              |  34 ++
 samza-tools/scripts/generate-kafka-events.sh    |  34 ++
 samza-tools/scripts/samza-sql-console.sh        |  34 ++
 .../apache/samza/tools/CommandLineHelper.java   |  42 +++
 .../tools/ConsoleLoggingSystemFactory.java      | 126 ++++++++
 .../samza/tools/EventHubConsoleConsumer.java    | 120 +++++++
 .../apache/samza/tools/GenerateKafkaEvents.java | 205 ++++++++++++
 .../samza/tools/RandomValueGenerator.java       |  87 ++++++
 .../org/apache/samza/tools/SamzaSqlConsole.java | 188 +++++++++++
 .../tools/avro/AvroSchemaGenRelConverter.java   |  94 ++++++
 .../avro/AvroSchemaGenRelConverterFactory.java  |  43 +++
 .../samza/tools/avro/AvroSerDeFactory.java      |  96 ++++++
 .../tools/json/JsonRelConverterFactory.java     |  93 ++++++
 .../samza/tools/schemas/PageViewEvent.avsc      |  51 +++
 .../samza/tools/schemas/PageViewEvent.java      |  60 ++++
 .../samza/tools/schemas/ProfileChangeEvent.avsc |  51 +++
 .../samza/tools/schemas/ProfileChangeEvent.java |  60 ++++
 .../apache/samza/tools/udf/RegexMatchUdf.java   |  40 +++
 samza-tools/src/main/resources/log4j.xml        |  43 +++
 .../org/apache/samza/job/yarn/YarnAppState.java |   4 +-
 .../job/yarn/YarnClusterResourceManager.java    | 310 +++++++++++++++++--
 .../samza/job/yarn/YarnContainerRunner.java     | 272 ----------------
 settings.gradle                                 |   6 +-
 214 files changed, 10944 insertions(+), 1302 deletions(-)
----------------------------------------------------------------------



[39/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/952dbbe2
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/952dbbe2
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/952dbbe2

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 952dbbe20a23fc318589b62a044ac7e2cc944fc0
Parents: 728dc18 6668351
Author: Boris S <bo...@apache.org>
Authored: Mon Sep 10 19:06:24 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Mon Sep 10 19:06:24 2018 -0700

----------------------------------------------------------------------
 samza-shell/src/main/bash/run-class.sh          | 12 ++-
 .../job/yarn/YarnClusterResourceManager.java    | 41 ++++++----
 .../yarn/TestYarnClusterResourceManager.java    | 81 ++++++++++++++++++++
 3 files changed, 116 insertions(+), 18 deletions(-)
----------------------------------------------------------------------



[28/47] samza git commit: added eventPrcoessed sync

Posted by bo...@apache.org.
added eventPrcoessed sync


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/b5ce9b38
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/b5ce9b38
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/b5ce9b38

Branch: refs/heads/NewKafkaSystemConsumer
Commit: b5ce9b38da88318a625f1dd7a6d35b9ed14ca04b
Parents: 19ba300
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 4 17:22:16 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 4 17:22:16 2018 -0700

----------------------------------------------------------------------
 .../apache/samza/checkpoint/OffsetManager.scala |  4 ++--
 .../apache/samza/container/SamzaContainer.scala |  2 +-
 .../org/apache/samza/job/local/ThreadJob.scala  |  5 +----
 .../samza/job/local/ThreadJobFactory.scala      |  6 +++++-
 .../apache/samza/job/local/TestThreadJob.scala  |  9 --------
 .../samza/system/kafka/KafkaConsumerProxy.java  | 22 ++++++++++++++------
 .../system/kafka/NewKafkaSystemConsumer.java    | 18 +++++++++-------
 .../test/integration/StreamTaskTestUtil.scala   | 17 +++++++++++++--
 .../integration/TestShutdownStatefulTask.scala  |  6 +-----
 9 files changed, 52 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala b/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
index 53d5e98..d2b6667 100644
--- a/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
+++ b/samza-core/src/main/scala/org/apache/samza/checkpoint/OffsetManager.scala
@@ -304,7 +304,7 @@ class OffsetManager(
    */
   private def loadOffsetsFromCheckpointManager {
     if (checkpointManager != null) {
-      debug("Loading offsets from checkpoint manager.")
+      info("Loading offsets from checkpoint manager.")
 
       checkpointManager.start
       val result = systemStreamPartitions
@@ -332,7 +332,7 @@ class OffsetManager(
    * Loads last processed offsets for a single taskName.
    */
   private def restoreOffsetsFromCheckpoint(taskName: TaskName): Map[TaskName, Map[SystemStreamPartition, String]] = {
-    debug("Loading checkpoints for taskName: %s." format taskName)
+    info("Loading checkpoints for taskName: %s." format taskName)
 
     val checkpoint = checkpointManager.readLastCheckpoint(taskName)
 

http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala b/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
index 0c889d2..d02660b 100644
--- a/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
+++ b/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
@@ -825,7 +825,7 @@ class SamzaContainer(
     }
 
     try {
-      info("Shutting down.")
+      info("Shutting down Samza.")
       removeShutdownHook
 
       jmxServer.stop

http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
index 33dde52..a61a297 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
@@ -19,12 +19,11 @@
 
 package org.apache.samza.job.local
 
-import org.apache.samza.coordinator.JobModelManager
 import org.apache.samza.job.ApplicationStatus.{New, Running, SuccessfulFinish, UnsuccessfulFinish}
 import org.apache.samza.job.{ApplicationStatus, StreamJob}
 import org.apache.samza.util.Logging
 
-class ThreadJob(runnable: Runnable, val jobModelManager: JobModelManager) extends StreamJob with Logging {
+class ThreadJob(runnable: Runnable) extends StreamJob with Logging {
   @volatile var jobStatus: Option[ApplicationStatus] = None
   var thread: Thread = null
 
@@ -44,8 +43,6 @@ class ThreadJob(runnable: Runnable, val jobModelManager: JobModelManager) extend
             jobStatus = Some(UnsuccessfulFinish)
             throw e
           }
-        } finally {
-          jobModelManager.stop
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
index 4b08721..34cc2a0 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
@@ -19,6 +19,8 @@
 
 package org.apache.samza.job.local
 
+import java.util.concurrent.{CountDownLatch, TimeUnit}
+
 import org.apache.samza.config.{Config, TaskConfigJava}
 import org.apache.samza.config.JobConfig._
 import org.apache.samza.config.ShellCommandConfig._
@@ -65,6 +67,7 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
     val checkpointManager = new TaskConfigJava(jobModel.getConfig).getCheckpointManager(metricsRegistry)
     if (checkpointManager != null) {
       checkpointManager.createResources()
+      checkpointManager.stop()
     }
     ChangelogStreamManager.createChangelogStreams(jobModel.getConfig, jobModel.maxChangeLogStreamPartitions)
 
@@ -110,10 +113,11 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
         taskFactory)
       container.setContainerListener(containerListener)
 
-      val threadJob = new ThreadJob(container, coordinator)
+      val threadJob = new ThreadJob(container)
       threadJob
     } finally {
       coordinator.stop
+      coordinatorStreamManager.stop
       jmxServer.stop
     }
   }

http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala b/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
index b1de215..4f3f511 100644
--- a/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
+++ b/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
@@ -19,7 +19,6 @@
 
 package org.apache.samza.job.local
 
-import org.apache.samza.coordinator.JobModelManager
 import org.junit.Assert._
 import org.junit.Test
 import org.apache.samza.job.ApplicationStatus
@@ -30,10 +29,6 @@ class TestThreadJob {
     val job = new ThreadJob(new Runnable {
       override def run {
       }
-    }, new JobModelManager(null) {
-      override def stop: Unit = {
-
-      }
     })
     job.submit
     job.waitForFinish(999999)
@@ -45,10 +40,6 @@ class TestThreadJob {
       override def run {
         Thread.sleep(999999)
       }
-    }, new JobModelManager(null) {
-      override def stop: Unit = {
-
-      }
     })
     job.submit
     job.waitForFinish(500)

http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index e61e0ff..cddfdfd 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -72,7 +72,7 @@ public class KafkaConsumerProxy<K, V> {
 
   private volatile boolean isRunning = false;
   private volatile Throwable failureCause = null;
-  private CountDownLatch consumerPollThreadStartLatch = new CountDownLatch(1);
+  private final CountDownLatch consumerPollThreadStartLatch = new CountDownLatch(1);
 
   public KafkaConsumerProxy(Consumer<K, V> kafkaConsumer, String systemName, String clientId,
       NewKafkaSystemConsumer.KafkaConsumerMessageSink messageSink, KafkaSystemConsumerMetrics samzaConsumerMetrics,
@@ -93,19 +93,26 @@ public class KafkaConsumerProxy<K, V> {
 
   public void start() {
     if (!consumerPollThread.isAlive()) {
-      LOG.info("Starting LiKafkaConsumerProxy polling thread for system " + systemName + " " + this.toString());
+      LOG.info("Starting KafkaConsumerProxy polling thread for system " + systemName + " " + this.toString());
       consumerPollThread.setDaemon(true);
       consumerPollThread.setName(
-          "Samza LiKafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
+          "Samza KafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
       consumerPollThread.start();
 
+      System.out.println("THREAD: starting" + consumerPollThread.getName());
+
+
       // we need to wait until the thread starts
       while (!isRunning) {
         try {
           consumerPollThreadStartLatch.await(3000, TimeUnit.MILLISECONDS);
         } catch (InterruptedException e) {
+          LOG.info("WTH");
         }
       }
+      new Exception().printStackTrace(System.out);
+      System.out.println("THREAD: started" + consumerPollThread.getName());
+
     } else {
       LOG.debug("Tried to start an already started LiKafkaConsumerProxy (%s). Ignoring.", this.toString());
     }
@@ -135,12 +142,15 @@ public class KafkaConsumerProxy<K, V> {
     return () -> {
       isRunning = true;
 
+
       try {
         consumerPollThreadStartLatch.countDown();
+        System.out.println("THREAD: runing " + consumerPollThread.getName());
         initializeLags();
         while (isRunning) {
           fetchMessages();
         }
+        System.out.println("THREAD: finished " + consumerPollThread.getName());
       } catch (Throwable throwable) {
         LOG.error(String.format("Error in LiKafkaConsumerProxy poll thread for system: %s.", systemName), throwable);
         // SamzaLiKafkaSystemConsumer uses the failureCause to propagate the throwable to the container
@@ -164,7 +174,7 @@ public class KafkaConsumerProxy<K, V> {
       // If the message we are about to consume is < end offset, we are starting with a lag.
       long initialLag = endOffsets.get(tp) - startingOffset;
 
-      LOG.info("Initial lag is {} for SSP {}", initialLag, ssp);
+      LOG.info("Initial lag for SSP {} is {} (end={}, startOffset={})", ssp, initialLag, endOffsets.get(tp), startingOffset);
       latestLags.put(ssp, initialLag);
       sink.setIsAtHighWatermark(ssp, initialLag == 0);
     });
@@ -446,13 +456,13 @@ public class KafkaConsumerProxy<K, V> {
   }
 
   public void stop(long timeout) {
-    LOG.info("Shutting down LiKafkaConsumerProxy poll thread:" + toString());
+    System.out.println("THREAD: Shutting down LiKafkaConsumerProxy poll thread:" + consumerPollThread.getName());
 
     isRunning = false;
     try {
       consumerPollThread.join(timeout);
     } catch (InterruptedException e) {
-      LOG.warn("Join in LiKafkaConsumerProxy has failed", e);
+      LOG.warn("Join in KafkaConsumerProxy has failed", e);
       consumerPollThread.interrupt();
     }
   }

http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index aeeadce..b33db42 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -103,13 +103,16 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
   public static <K, V> NewKafkaSystemConsumer getNewKafkaSystemConsumer(String systemName, Config config,
       String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) {
 
-    System.out.println("GETTING FOR " + systemName);
 
-    System.out.printf("RETURNING NEW ONE");
+
     // extract consumer configs and create kafka consumer
     KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
 
-    return new NewKafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
+
+    NewKafkaSystemConsumer kc = new NewKafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
+    System.out.println("kc=" + kc + "!!!!!!!!!!!!!!!!!GETTING FOR NKC for " + systemName);
+
+    return kc;
   }
 
   /**
@@ -254,7 +257,8 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
 
   @Override
   public void stop() {
-    System.out.println("##################### stopping " + this + "; kc=" + kafkaConsumer);
+    System.out.println("kc=" + this + "!!!!!!!!!!!!!!!!!!!!!! stopping "+ "; kc=" + kafkaConsumer);
+    System.out.println("kc=" + this + "!!!!!!!!!!!!!!!!!!!!!!TPs = " + topicPartitions2Offset);
 
     if (!stopped.compareAndSet(false, true)) {
       LOG.warn("attempting to stop stopped consumer.");
@@ -300,7 +304,7 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
 
     topicPartitions2SSP.put(tp, systemStreamPartition);
 
-    LOG.info("==============>registering ssp = " + systemStreamPartition + " with offset " + offset);
+    LOG.info("============>registering ssp = " + systemStreamPartition + " with offset " + offset + "; kc=" + this);
 
     String existingOffset = topicPartitions2Offset.get(tp);
     // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages.
@@ -348,8 +352,8 @@ public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements
     }
 
     Map<SystemStreamPartition, List<IncomingMessageEnvelope>> res = super.poll(systemStreamPartitions, timeout);
-    LOG.info("=============================>. Res for " + systemStreamPartitions);
-    LOG.info("=============================>. Res:" + res.toString());
+    //LOG.info("=============================>. Res for " + systemStreamPartitions);
+    //LOG.info("=============================>. Res:" + res.toString());
     return res;
   }
 

http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
----------------------------------------------------------------------
diff --git a/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala b/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
index 864d2e5..2ea9a5f 100644
--- a/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
+++ b/samza-test/src/test/scala/org/apache/samza/test/integration/StreamTaskTestUtil.scala
@@ -37,7 +37,7 @@ import org.apache.kafka.common.protocol.SecurityProtocol
 import org.apache.kafka.common.security.JaasUtils
 import org.apache.samza.config._
 import org.apache.samza.container.TaskName
-import org.apache.samza.job.local.ThreadJobFactory
+import org.apache.samza.job.local.{ThreadJob, ThreadJobFactory}
 import org.apache.samza.job.model.{ContainerModel, JobModel}
 import org.apache.samza.job.{ApplicationStatus, JobRunner, StreamJob}
 import org.apache.samza.metrics.MetricsRegistryMap
@@ -223,9 +223,16 @@ class StreamTaskTestUtil {
    * interrupt, which is forwarded on to ThreadJob, and marked as a failure).
    */
   def stopJob(job: StreamJob) {
+    // make sure we don't kill the job before it was started
+    val tasks = TestTask.tasks
+    val task = tasks.values.toList.head
+    task.eventProcessed.await(60, TimeUnit.SECONDS)
+    System.out.println("THREAD: JOB KILL BEFORE")
     // Shutdown task.
     job.kill
+    System.out.println("THREAD: JOB KILL")
     val status = job.waitForFinish(60000)
+    System.out.println("THREAD: JOB KILL WAIT")
     assertEquals(ApplicationStatus.UnsuccessfulFinish, status)
   }
 
@@ -279,7 +286,10 @@ class StreamTaskTestUtil {
     val taskConfig = new TaskConfig(jobModel.getConfig)
     val checkpointManager = taskConfig.getCheckpointManager(new MetricsRegistryMap())
     checkpointManager match {
-      case Some(checkpointManager) => checkpointManager.createResources
+      case Some(checkpointManager) => {
+        checkpointManager.createResources
+        checkpointManager.stop
+      }
       case _ => assert(checkpointManager != null, "No checkpoint manager factory configured")
     }
 
@@ -323,6 +333,7 @@ object TestTask {
 abstract class TestTask extends StreamTask with InitableTask {
   var received = ArrayBuffer[String]()
   val initFinished = new CountDownLatch(1)
+  val eventProcessed = new CountDownLatch(1)
   @volatile var gotMessage = new CountDownLatch(1)
 
   def init(config: Config, context: TaskContext) {
@@ -334,6 +345,8 @@ abstract class TestTask extends StreamTask with InitableTask {
   def process(envelope: IncomingMessageEnvelope, collector: MessageCollector, coordinator: TaskCoordinator) {
     val msg = envelope.getMessage.asInstanceOf[String]
 
+    eventProcessed.countDown()
+
     System.err.println("TestTask.process(): %s" format msg)
 
     received += msg

http://git-wip-us.apache.org/repos/asf/samza/blob/b5ce9b38/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
----------------------------------------------------------------------
diff --git a/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala b/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
index a42433c..ccb7cd4 100644
--- a/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
+++ b/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
@@ -77,20 +77,16 @@ class TestShutdownStatefulTask extends StreamTaskTestUtil {
     val (job, task) = startJob
 
     // Validate that restored is empty.
-    assertEquals(0, task.initFinished.getCount)
     assertEquals(0, task.asInstanceOf[ShutdownStateStoreTask].restored.size)
     assertEquals(0, task.received.size)
 
     // Send some messages to input stream.
-    System.out.println("************************BEFORE DONE sending")
     send(task, "1")
-    System.out.println("************************FIRST DONE sending")
     send(task, "2")
     send(task, "3")
     send(task, "2")
     send(task, "99")
     send(task, "99")
-    System.out.println("************************DONE sending")
     stopJob(job)
 
   }
@@ -122,7 +118,7 @@ class ShutdownStateStoreTask extends TestTask {
       .asInstanceOf[KeyValueStore[String, String]]
     val iter = store.all
     iter.asScala.foreach( p => restored += (p.getKey -> p.getValue))
-    System.err.println("ShutdownStateStoreTask.createStream(): %s" format restored)
+    System.out.println("ShutdownStateStoreTask.createStream(): %s" format restored)
     iter.close
   }
 


[26/47] samza git commit: added apache license

Posted by bo...@apache.org.
added apache license


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/59b3dc1c
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/59b3dc1c
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/59b3dc1c

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 59b3dc1c2c1820c899b5d5b217b0268d119348af
Parents: 2203494
Author: Boris S <bo...@apache.org>
Authored: Fri Aug 31 15:11:39 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Aug 31 15:11:39 2018 -0700

----------------------------------------------------------------------
 .../kafka/TestNewKafkaSystemConsumer.java       | 21 ++++++++++++++++++++
 1 file changed, 21 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/59b3dc1c/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
index f7f63f3..fb7533b 100644
--- a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
+++ b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
@@ -1,3 +1,24 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
 package org.apache.samza.system.kafka;
 
 import java.util.Collections;


[30/47] samza git commit: Merge branch 'master' into NewConsumer

Posted by bo...@apache.org.
Merge branch 'master' into NewConsumer


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/bab5bdd5
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/bab5bdd5
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/bab5bdd5

Branch: refs/heads/NewKafkaSystemConsumer
Commit: bab5bdd5a8d12ae0efcd6d3b5c5601d476470373
Parents: b5ce9b3 add733b
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 4 17:23:51 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 4 17:23:51 2018 -0700

----------------------------------------------------------------------
 .../org/apache/samza/execution/JobNode.java     |  5 +++++
 .../runtime/AbstractApplicationRunner.java      | 17 ++++++++-------
 .../sql/runner/SamzaSqlApplicationConfig.java   |  6 ++++--
 .../runner/TestSamzaSqlApplicationConfig.java   | 22 +++++++++++++++++++-
 .../sql/testutil/TestSamzaSqlFileParser.java    |  1 -
 .../table/TestLocalTableWithSideInputs.java     |  3 ++-
 6 files changed, 41 insertions(+), 13 deletions(-)
----------------------------------------------------------------------



[15/47] samza git commit: Merge branch 'master' of https://github.com/sborya/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/sborya/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/afb34d91
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/afb34d91
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/afb34d91

Branch: refs/heads/NewKafkaSystemConsumer
Commit: afb34d916b435c1c08fe80009cc7d47d8287e27f
Parents: 7887d88 78ad578
Author: Boris S <bo...@apache.org>
Authored: Sun Aug 12 23:48:54 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Sun Aug 12 23:48:54 2018 -0700

----------------------------------------------------------------------

----------------------------------------------------------------------



[37/47] samza git commit: addessed some review comments

Posted by bo...@apache.org.
addessed some review comments


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/26552213
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/26552213
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/26552213

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 2655221348304507e1a91e6fa93ef2dc79a4620d
Parents: 9217644
Author: Boris S <bo...@apache.org>
Authored: Mon Sep 10 11:17:18 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Mon Sep 10 11:17:18 2018 -0700

----------------------------------------------------------------------
 .../apache/samza/container/SamzaContainer.scala |   2 +-
 .../samza/coordinator/JobModelManager.scala     |   3 +-
 .../clients/consumer/KafkaConsumerConfig.java   |  43 +-
 .../samza/system/kafka/KafkaConsumerProxy.java  |  50 +--
 .../samza/system/kafka/KafkaSystemConsumer.java | 406 ++++++++++++++++++
 .../samza/system/kafka/KafkaSystemFactory.scala |   4 +-
 .../system/kafka/NewKafkaSystemConsumer.java    | 412 -------------------
 .../system/kafka/TestKafkaSystemConsumer.java   | 224 ++++++++++
 .../kafka/TestNewKafkaSystemConsumer.java       | 224 ----------
 9 files changed, 687 insertions(+), 681 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala b/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
index b17788f..5ee9206 100644
--- a/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
+++ b/samza-core/src/main/scala/org/apache/samza/container/SamzaContainer.scala
@@ -829,7 +829,7 @@ class SamzaContainer(
     }
 
     try {
-      info("Shutting down Samza.")
+      info("Shutting down SamzaContaier.")
       removeShutdownHook
 
       jmxServer.stop

http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala b/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
index f95a521..e626d9a 100644
--- a/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
+++ b/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
@@ -35,7 +35,6 @@ import org.apache.samza.container.LocalityManager
 import org.apache.samza.container.TaskName
 import org.apache.samza.coordinator.server.HttpServer
 import org.apache.samza.coordinator.server.JobServlet
-import org.apache.samza.coordinator.stream.CoordinatorStreamManager
 import org.apache.samza.job.model.JobModel
 import org.apache.samza.job.model.TaskModel
 import org.apache.samza.metrics.MetricsRegistryMap
@@ -64,7 +63,7 @@ object JobModelManager extends Logging {
    * a) Reads the jobModel from coordinator stream using the job's configuration.
    * b) Recomputes changelog partition mapping based on jobModel and job's configuration.
    * c) Builds JobModelManager using the jobModel read from coordinator stream.
-   * @param config Coordinator stream manager config.
+   * @param config Config from the coordinator stream.
    * @param changelogPartitionMapping The changelog partition-to-task mapping.
    * @return JobModelManager
    */

http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index 98792ab..8ca5b93 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -54,21 +54,28 @@ public class KafkaConsumerConfig extends ConsumerConfig {
    * By default, KafkaConsumer will fetch ALL available messages for all the partitions.
    * This may cause memory issues. That's why we will limit the number of messages per partition we get on EACH poll().
    */
-  private static final String KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT = "100";
+  private static final String DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS = "100";
 
-
-  public KafkaConsumerConfig(Properties props) {
+  private KafkaConsumerConfig(Properties props) {
     super(props);
   }
 
+  /**
+   * Create kafka consumer configs, based on the subset of global configs.
+   * @param config
+   * @param systemName
+   * @param clientId
+   * @param injectProps
+   * @return KafkaConsumerConfig
+   */
   public static KafkaConsumerConfig getKafkaSystemConsumerConfig(Config config, String systemName, String clientId,
       Map<String, String> injectProps) {
 
-    Config subConf = config.subset(String.format("systems.%s.consumer.", systemName), true);
+    final Config subConf = config.subset(String.format("systems.%s.consumer.", systemName), true);
 
-    String groupId = getConsumerGroupId(config);
+    final String groupId = getConsumerGroupId(config);
 
-    Properties consumerProps = new Properties();
+    final Properties consumerProps = new Properties();
     consumerProps.putAll(subConf);
 
     consumerProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
@@ -109,8 +116,8 @@ public class KafkaConsumerConfig extends ConsumerConfig {
     }
 
     // NOT SURE THIS IS NEEDED TODO
-    String maxPollRecords =
-        subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, KAFKA_CONSUMER_MAX_POLL_RECORDS_DEFAULT);
+    final String maxPollRecords =
+        subConf.get(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, DEFAULT_KAFKA_CONSUMER_MAX_POLL_RECORDS);
     consumerProps.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
 
     // put overrides
@@ -120,7 +127,7 @@ public class KafkaConsumerConfig extends ConsumerConfig {
   }
 
   // group id should be unique per job
-  static String getConsumerGroupId(Config config) {
+  private static String getConsumerGroupId(Config config) {
     JobConfig jobConfig = new JobConfig(config);
     Option<String> jobIdOption = jobConfig.getJobId();
     Option<String> jobNameOption = jobConfig.getName();
@@ -151,11 +158,12 @@ public class KafkaConsumerConfig extends ConsumerConfig {
   }
 
   /**
-   * Settings for auto.reset in samza are different from settings in Kafka (auto.offset.reset) - need to convert
+   * If settings for auto.reset in samza are different from settings in Kafka (auto.offset.reset),
+   * then need to convert them (see kafka.apache.org/documentation):
    * "largest" -> "latest"
    * "smallest" -> "earliest"
-   * "none" -> "none"
-   * "none" - will fail the kafka consumer, if offset is out of range
+   *
+   * If no setting specified we return "latest" (same as Kafka).
    * @param properties All consumer related {@link Properties} parsed from samza config
    * @return String representing the config value for "auto.offset.reset" property
    */
@@ -168,13 +176,18 @@ public class KafkaConsumerConfig extends ConsumerConfig {
       return autoOffsetReset;
     }
 
+    String newAutoOffsetReset;
     switch (autoOffsetReset) {
       case SAMZA_OFFSET_LARGEST:
-        return KAFKA_OFFSET_LATEST;
+        newAutoOffsetReset =  KAFKA_OFFSET_LATEST;
+        break;
       case SAMZA_OFFSET_SMALLEST:
-        return KAFKA_OFFSET_EARLIEST;
+        newAutoOffsetReset =  KAFKA_OFFSET_EARLIEST;
+        break;
       default:
-        return KAFKA_OFFSET_LATEST;
+        newAutoOffsetReset =  KAFKA_OFFSET_LATEST;
     }
+    LOG.info("AutoOffsetReset value converted from {} to {}", autoOffsetReset,  newAutoOffsetReset);
+    return newAutoOffsetReset;
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index ae80d50..0825c90 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -22,7 +22,6 @@
 package org.apache.samza.system.kafka;
 
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -40,6 +39,7 @@ import org.apache.kafka.clients.consumer.InvalidOffsetException;
 import org.apache.kafka.common.Metric;
 import org.apache.kafka.common.MetricName;
 import org.apache.kafka.common.TopicPartition;
+import org.apache.samza.Partition;
 import org.apache.samza.SamzaException;
 import org.apache.samza.system.IncomingMessageEnvelope;
 import org.apache.samza.system.SystemStreamPartition;
@@ -58,13 +58,13 @@ public class KafkaConsumerProxy<K, V> {
 
   /* package private */ final Thread consumerPollThread;
   private final Consumer<K, V> kafkaConsumer;
-  private final NewKafkaSystemConsumer.KafkaConsumerMessageSink sink;
+  private final KafkaSystemConsumer.KafkaConsumerMessageSink sink;
   private final KafkaSystemConsumerMetrics kafkaConsumerMetrics;
   private final String metricName;
   private final String systemName;
   private final String clientId;
   private final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
-  private final Map<SystemStreamPartition, MetricName> ssp2MetricName = new HashMap<>();
+  private final Map<SystemStreamPartition, MetricName> perPartitionMetrics = new HashMap<>();
   // list of all the SSPs we poll from, with their next offsets correspondingly.
   private final Map<SystemStreamPartition, Long> nextOffsets = new ConcurrentHashMap<>();
   // lags behind the high water mark, as reported by the Kafka consumer.
@@ -75,7 +75,7 @@ public class KafkaConsumerProxy<K, V> {
   private final CountDownLatch consumerPollThreadStartLatch = new CountDownLatch(1);
 
   public KafkaConsumerProxy(Consumer<K, V> kafkaConsumer, String systemName, String clientId,
-      NewKafkaSystemConsumer.KafkaConsumerMessageSink messageSink, KafkaSystemConsumerMetrics samzaConsumerMetrics,
+      KafkaSystemConsumer.KafkaConsumerMessageSink messageSink, KafkaSystemConsumerMetrics samzaConsumerMetrics,
       String metricName) {
 
     this.kafkaConsumer = kafkaConsumer;
@@ -88,14 +88,15 @@ public class KafkaConsumerProxy<K, V> {
     this.kafkaConsumerMetrics.registerClientProxy(metricName);
 
     consumerPollThread = new Thread(createProxyThreadRunnable());
+    consumerPollThread.setDaemon(true);
+    consumerPollThread.setName(
+        "Samza KafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
   }
 
   public void start() {
     if (!consumerPollThread.isAlive()) {
       LOG.info("Starting KafkaConsumerProxy polling thread for system " + systemName + " " + this.toString());
-      consumerPollThread.setDaemon(true);
-      consumerPollThread.setName(
-          "Samza KafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
+
       consumerPollThread.start();
 
       // we need to wait until the thread starts
@@ -116,7 +117,7 @@ public class KafkaConsumerProxy<K, V> {
   public void addTopicPartition(SystemStreamPartition ssp, long nextOffset) {
     LOG.info(String.format("Adding new topic and partition %s, offset = %s to queue for consumer %s", ssp, nextOffset,
         this));
-    topicPartitions2SSP.put(NewKafkaSystemConsumer.toTopicPartition(ssp), ssp); //registered SSPs
+    topicPartitions2SSP.put(KafkaSystemConsumer.toTopicPartition(ssp), ssp); //registered SSPs
 
     // this is already vetted offset so there is no need to validate it
     LOG.info(String.format("Got offset %s for new topic and partition %s.", nextOffset, ssp));
@@ -135,7 +136,6 @@ public class KafkaConsumerProxy<K, V> {
     Runnable runnable=  () -> {
       isRunning = true;
 
-
       try {
         consumerPollThreadStartLatch.countDown();
         LOG.info("Starting runnable " + consumerPollThread.getName());
@@ -230,19 +230,19 @@ public class KafkaConsumerProxy<K, V> {
 
   private Map<SystemStreamPartition, List<IncomingMessageEnvelope>> processResults(ConsumerRecords<K, V> records) {
     if (records == null) {
-      return Collections.emptyMap();
+      throw new SamzaException("processResults is called with null object for records");
     }
 
     int capacity = (int) (records.count() / 0.75 + 1); // to avoid rehash, allocate more then 75% of expected capacity.
     Map<SystemStreamPartition, List<IncomingMessageEnvelope>> results = new HashMap<>(capacity);
     // Parse the returned records and convert them into the IncomingMessageEnvelope.
     // Note. They have been already de-serialized by the consumer.
-    for (ConsumerRecord<K, V> r : records) {
-      int partition = r.partition();
-      String topic = r.topic();
+    for (ConsumerRecord<K, V> record : records) {
+      int partition = record.partition();
+      String topic = record.topic();
       TopicPartition tp = new TopicPartition(topic, partition);
 
-      updateMetrics(r, tp);
+      updateMetrics(record, tp);
 
       SystemStreamPartition ssp = topicPartitions2SSP.get(tp);
       List<IncomingMessageEnvelope> listMsgs = results.get(ssp);
@@ -251,10 +251,10 @@ public class KafkaConsumerProxy<K, V> {
         results.put(ssp, listMsgs);
       }
 
-      final K key = r.key();
-      final Object value = r.value();
-      IncomingMessageEnvelope imEnvelope =
-          new IncomingMessageEnvelope(ssp, String.valueOf(r.offset()), key, value, getRecordSize(r));
+      final K key = record.key();
+      final Object value = record.value();
+      final IncomingMessageEnvelope imEnvelope =
+          new IncomingMessageEnvelope(ssp, String.valueOf(record.offset()), key, value, getRecordSize(record));
       listMsgs.add(imEnvelope);
     }
     if (LOG.isDebugEnabled()) {
@@ -274,8 +274,8 @@ public class KafkaConsumerProxy<K, V> {
   }
 
   private void updateMetrics(ConsumerRecord<K, V> r, TopicPartition tp) {
-    TopicAndPartition tap = NewKafkaSystemConsumer.toTopicAndPartition(tp);
-    SystemStreamPartition ssp = NewKafkaSystemConsumer.toSystemStreamPartition(systemName, tap);
+    TopicAndPartition tap = KafkaSystemConsumer.toTopicAndPartition(tp);
+    SystemStreamPartition ssp = new SystemStreamPartition(systemName, tp.topic(), new Partition(tp.partition()));
     long currentSSPLag = getLatestLag(ssp); // lag between the current offset and the highwatermark
     if (currentSSPLag < 0) {
       return;
@@ -312,8 +312,8 @@ public class KafkaConsumerProxy<K, V> {
     tags.put("client-id", clientId);// this is required by the KafkaConsumer to get the metrics
 
     for (SystemStreamPartition ssp : ssps) {
-      TopicPartition tp = NewKafkaSystemConsumer.toTopicPartition(ssp);
-      ssp2MetricName.put(ssp, new MetricName(tp + ".records-lag", "consumer-fetch-manager-metrics", "", tags));
+      TopicPartition tp = KafkaSystemConsumer.toTopicPartition(ssp);
+      perPartitionMetrics.put(ssp, new MetricName(tp + ".records-lag", "consumer-fetch-manager-metrics", "", tags));
     }
   }
 
@@ -327,12 +327,12 @@ public class KafkaConsumerProxy<K, V> {
     Map<MetricName, ? extends Metric> consumerMetrics = kafkaConsumer.metrics();
 
     // populate the MetricNames first time
-    if (ssp2MetricName.isEmpty()) {
+    if (perPartitionMetrics.isEmpty()) {
       populateMetricNames(ssps);
     }
 
     for (SystemStreamPartition ssp : ssps) {
-      MetricName mn = ssp2MetricName.get(ssp);
+      MetricName mn = perPartitionMetrics.get(ssp);
       Metric currentLagM = consumerMetrics.get(mn);
 
       // High watermark is fixed to be the offset of last available message,
@@ -412,7 +412,7 @@ public class KafkaConsumerProxy<K, V> {
     for (Map.Entry<SystemStreamPartition, Long> e : nextOffsets.entrySet()) {
       SystemStreamPartition ssp = e.getKey();
       Long offset = e.getValue();
-      TopicAndPartition tp = NewKafkaSystemConsumer.toTopicAndPartition(ssp);
+      TopicAndPartition tp = new TopicAndPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
       Long lag = latestLags.get(ssp);
       LOG.trace("Latest offset of {} is  {}; lag = {}", ssp, offset, lag);
       if (lag != null && offset != null && lag >= 0) {

http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
new file mode 100644
index 0000000..196fb85
--- /dev/null
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
@@ -0,0 +1,406 @@
+
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.samza.system.kafka;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import kafka.common.TopicAndPartition;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.samza.Partition;
+import org.apache.samza.SamzaException;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.system.IncomingMessageEnvelope;
+import org.apache.samza.system.SystemConsumer;
+import org.apache.samza.system.SystemStreamPartition;
+import org.apache.samza.util.BlockingEnvelopeMap;
+import org.apache.samza.util.Clock;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Option;
+
+
+public class KafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements SystemConsumer {
+
+  private static final Logger LOG = LoggerFactory.getLogger(KafkaSystemConsumer.class);
+
+  private static final long FETCH_THRESHOLD = 50000;
+  private static final long FETCH_THRESHOLD_BYTES = -1L;
+
+  private final Consumer<K, V> kafkaConsumer;
+  private final String systemName;
+  private final KafkaSystemConsumerMetrics samzaConsumerMetrics;
+  private final String clientId;
+  private final String metricName;
+  private final AtomicBoolean stopped = new AtomicBoolean(false);
+  private final AtomicBoolean started = new AtomicBoolean(false);
+  private final Config config;
+  private final boolean fetchThresholdBytesEnabled;
+
+  // This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
+  /* package private */final KafkaConsumerMessageSink messageSink;
+
+  // proxy is doing the actual reading
+  final private KafkaConsumerProxy proxy;
+
+  /* package private */final Map<TopicPartition, String> topicPartitions2Offset = new HashMap<>();
+  /* package private */final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
+
+  /* package private */ long perPartitionFetchThreshold;
+  /* package private */ long perPartitionFetchThresholdBytes;
+
+  /**
+   * Constructor
+   * @param systemName system name for which we create the consumer
+   * @param config config
+   * @param metrics metrics
+   * @param clock - system clock
+   */
+  public KafkaSystemConsumer(Consumer<K, V> kafkaConsumer, String systemName, Config config, String clientId,
+      KafkaSystemConsumerMetrics metrics, Clock clock) {
+
+    super(metrics.registry(), clock, metrics.getClass().getName());
+
+    this.kafkaConsumer = kafkaConsumer;
+    this.samzaConsumerMetrics = metrics;
+    this.clientId = clientId;
+    this.systemName = systemName;
+    this.config = config;
+    this.metricName = String.format("%s %s", systemName, clientId);
+
+    this.fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName);
+
+    // create a sink for passing the messages between the proxy and the consumer
+    messageSink = new KafkaConsumerMessageSink();
+
+    // Create the proxy to do the actual message reading. It is a separate thread that reads the messages from the stream
+    // and puts them into the sink.
+    proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, samzaConsumerMetrics, metricName);
+    LOG.info("Created consumer proxy: " + proxy);
+
+    LOG.info("Created SamzaKafkaSystemConsumer for system={}, clientId={}, metricName={}, KafkaConsumer={}", systemName,
+        clientId, metricName, this.kafkaConsumer.toString());
+  }
+
+  public static <K, V> KafkaSystemConsumer getNewKafkaSystemConsumer(String systemName, Config config,
+      String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) {
+
+    // extract consumer configs and create kafka consumer
+    KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
+    LOG.info("Created kafka consumer for system {}, clientId {}: {}", systemName, clientId, kafkaConsumer);
+
+    KafkaSystemConsumer kc = new KafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
+    LOG.info("Created samza system consumer {}", kc.toString());
+
+    return kc;
+  }
+
+  /**
+   * create kafka consumer
+   * @param systemName system name for which we create the consumer
+   * @param clientId client id to use int the kafka client
+   * @param config config
+   * @return kafka consumer
+   */
+  public static <K, V> KafkaConsumer<K, V> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
+
+    Map<String, String> injectProps = new HashMap<>();
+
+    // extract kafka client configs
+    KafkaConsumerConfig consumerConfig =
+        KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
+
+    LOG.info("KafkaClient properties for systemName {}: {}", systemName, consumerConfig.originals());
+
+    return new KafkaConsumer<>(consumerConfig.originals());
+  }
+
+  @Override
+  public void start() {
+    if (!started.compareAndSet(false, true)) {
+      LOG.warn("attempting to start the consumer for the second (or more) time.");
+      return;
+    }
+    if (stopped.get()) {
+      LOG.warn("attempting to start a stopped consumer");
+      return;
+    }
+    // initialize the subscriptions for all the registered TopicPartitions
+    startSubscription();
+    // needs to be called after all the registrations are completed
+    setFetchThresholds();
+
+    startConsumer();
+    LOG.info("consumer {} started", this);
+  }
+
+  private void startSubscription() {
+    //subscribe to all the registered TopicPartitions
+    LOG.info("consumer {}, subscribes to {} ", this, topicPartitions2SSP.keySet());
+    try {
+      synchronized (kafkaConsumer) {
+        // we are using assign (and not subscribe), so we need to specify both topic and partition
+        kafkaConsumer.assign(topicPartitions2SSP.keySet());
+      }
+    } catch (Exception e) {
+      LOG.warn("startSubscription failed.", e);
+      throw new SamzaException(e);
+    }
+  }
+
+  /*
+   Set the offsets to start from.
+   Add the TopicPartitions to the proxy.
+   Start the proxy thread.
+   */
+  void startConsumer() {
+    //set the offset for each TopicPartition
+    if (topicPartitions2Offset.size() <= 0) {
+      LOG.warn("Consumer {} is not subscribed to any SSPs", this);
+    }
+
+    topicPartitions2Offset.forEach((tp, startingOffsetString) -> {
+      long startingOffset = Long.valueOf(startingOffsetString);
+
+      try {
+        synchronized (kafkaConsumer) {
+          // TODO in the future we may need to add special handling here for BEGIN/END_OFFSET
+          // this will call KafkaConsumer.seekToBegin/End()
+          kafkaConsumer.seek(tp, startingOffset); // this value should already be the 'upcoming' value
+        }
+      } catch (Exception e) {
+        // all other exceptions - non recoverable
+        LOG.error("Got Exception while seeking to " + startingOffsetString + " for " + tp, e);
+        throw new SamzaException(e);
+      }
+
+      LOG.info("Changing consumer's starting offset for tp = " + tp + " to " + startingOffsetString);
+
+      // add the partition to the proxy
+      proxy.addTopicPartition(topicPartitions2SSP.get(tp), startingOffset);
+    });
+
+    // start the proxy thread
+    if (proxy != null && !proxy.isRunning()) {
+      LOG.info("Starting proxy: " + proxy);
+      proxy.start();
+    }
+  }
+
+  private void setFetchThresholds() {
+    // get the thresholds, and set defaults if not defined.
+    KafkaConfig kafkaConfig = new KafkaConfig(config);
+
+    Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName);
+    long fetchThreshold = FETCH_THRESHOLD;
+    if (fetchThresholdOption.isDefined()) {
+      fetchThreshold = Long.valueOf(fetchThresholdOption.get());
+      LOG.info("fetchThresholdOption is configured. fetchThreshold=" + fetchThreshold);
+    }
+
+    Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName);
+    long fetchThresholdBytes = FETCH_THRESHOLD_BYTES;
+    if (fetchThresholdBytesOption.isDefined()) {
+      fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get());
+      LOG.info("fetchThresholdBytesOption is configured. fetchThresholdBytes=" + fetchThresholdBytes);
+    }
+
+    int numTPs = topicPartitions2SSP.size();
+    assert (numTPs == topicPartitions2Offset.size());
+
+    LOG.info("fetchThresholdBytes = " + fetchThresholdBytes + "; fetchThreshold=" + fetchThreshold);
+    LOG.info("number of topicPartitions " + numTPs);
+
+    if (numTPs > 0) {
+      perPartitionFetchThreshold = fetchThreshold / numTPs;
+      LOG.info("perPartitionFetchThreshold=" + perPartitionFetchThreshold);
+      if (fetchThresholdBytesEnabled) {
+        // currently this feature cannot be enabled, because we do not have the size of the messages available.
+        // messages get double buffered, hence divide by 2
+        perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / numTPs;
+        LOG.info("perPartitionFetchThresholdBytes is enabled. perPartitionFetchThresholdBytes="
+            + perPartitionFetchThresholdBytes);
+      }
+    }
+  }
+
+  @Override
+  public void stop() {
+    LOG.info("Stopping Samza kafkaConsumer " + this);
+
+    if (!stopped.compareAndSet(false, true)) {
+      LOG.warn("attempting to stop stopped consumer.");
+      return;
+    }
+
+    // stop the proxy (with 5 minutes timeout)
+    if (proxy != null) {
+      LOG.info("Stopping proxy " + proxy);
+      proxy.stop(TimeUnit.MINUTES.toMillis(5));
+    }
+
+    try {
+      synchronized (kafkaConsumer) {
+        LOG.info("Closing kafka consumer " + kafkaConsumer);
+        kafkaConsumer.close();
+      }
+    } catch (Exception e) {
+      LOG.warn("failed to stop SamzaRawKafkaConsumer + " + this, e);
+    }
+  }
+
+  /*
+   record the ssp and the offset. Do not submit it to the consumer yet.
+   */
+  @Override
+  public void register(SystemStreamPartition systemStreamPartition, String offset) {
+    if (started.get()) {
+      String msg =
+          String.format("Trying to register partition after consumer has been started. sn=%s, ssp=%s", systemName,
+              systemStreamPartition);
+      LOG.error(msg);
+      throw new SamzaException(msg);
+    }
+
+    if (!systemStreamPartition.getSystem().equals(systemName)) {
+      LOG.warn("ignoring SSP " + systemStreamPartition + ", because this consumer's system is " + systemName);
+      return;
+    }
+    super.register(systemStreamPartition, offset);
+
+    TopicPartition tp = toTopicPartition(systemStreamPartition);
+
+    topicPartitions2SSP.put(tp, systemStreamPartition);
+
+    LOG.info("Registering ssp = " + systemStreamPartition + " with offset " + offset);
+
+    String existingOffset = topicPartitions2Offset.get(tp);
+    // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages.
+    if (existingOffset == null || compareOffsets(existingOffset, offset) > 0) {
+      topicPartitions2Offset.put(tp, offset);
+    }
+
+    samzaConsumerMetrics.registerTopicAndPartition(toTopicAndPartition(tp));
+  }
+
+  /**
+   * Compare two String offsets.
+   * Note. There is a method in KafkaAdmin that does that, but that would require instantiation of systemadmin for each consumer.
+   * @return see {@link Long#compareTo(Long)}
+   */
+  public static int compareOffsets(String offset1, String offset2) {
+    return Long.valueOf(offset1).compareTo(Long.valueOf(offset2));
+  }
+
+  @Override
+  public String toString() {
+    return systemName + "/" + clientId + "/" + super.toString();
+  }
+
+  @Override
+  public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
+      Set<SystemStreamPartition> systemStreamPartitions, long timeout) throws InterruptedException {
+
+    // check if the proxy is running
+    if (!proxy.isRunning()) {
+      stop();
+      if (proxy.getFailureCause() != null) {
+        String message = "KafkaConsumerProxy has stopped";
+        throw new SamzaException(message, proxy.getFailureCause());
+      } else {
+        LOG.warn("Failure cause is not populated for KafkaConsumerProxy");
+        throw new SamzaException("KafkaConsumerProxy has stopped");
+      }
+    }
+
+    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> res = super.poll(systemStreamPartitions, timeout);
+    return res;
+  }
+
+  /**
+   * convert from TopicPartition to TopicAndPartition
+   */
+  public static TopicAndPartition toTopicAndPartition(TopicPartition tp) {
+    return new TopicAndPartition(tp.topic(), tp.partition());
+  }
+
+  /**
+   * convert to TopicPartition from SystemStreamPartition
+   */
+  public static TopicPartition toTopicPartition(SystemStreamPartition ssp) {
+    return new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
+  }
+
+  /**
+   * return system name for this consumer
+   * @return system name
+   */
+  public String getSystemName() {
+    return systemName;
+  }
+
+  ////////////////////////////////////
+  // inner class for the message sink
+  ////////////////////////////////////
+  public class KafkaConsumerMessageSink {
+
+    public void setIsAtHighWatermark(SystemStreamPartition ssp, boolean isAtHighWatermark) {
+      setIsAtHead(ssp, isAtHighWatermark);
+    }
+
+    boolean needsMoreMessages(SystemStreamPartition ssp) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("needsMoreMessages from following SSP: {}. fetchLimitByBytes enabled={}; messagesSizeInQueue={};"
+                + "(limit={}); messagesNumInQueue={}(limit={};", ssp, fetchThresholdBytesEnabled,
+            getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes, getNumMessagesInQueue(ssp),
+            perPartitionFetchThreshold);
+      }
+
+      if (fetchThresholdBytesEnabled) {
+        return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes;
+      } else {
+        return getNumMessagesInQueue(ssp) < perPartitionFetchThreshold;
+      }
+    }
+
+    void addMessage(SystemStreamPartition ssp, IncomingMessageEnvelope envelope) {
+      LOG.trace("Incoming message ssp = {}: envelope = {}.", ssp, envelope);
+
+      try {
+        put(ssp, envelope);
+      } catch (InterruptedException e) {
+        throw new SamzaException(
+            String.format("Interrupted while trying to add message with offset %s for ssp %s", envelope.getOffset(),
+                ssp));
+      }
+    }
+  }  // end of KafkaMessageSink class
+  ///////////////////////////////////////////////////////////////////////////
+}

http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index 6f58bed..e0e85be 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -50,7 +50,7 @@ class KafkaSystemFactory extends SystemFactory with Logging {
     val clientId = KafkaConsumerConfig.getConsumerClientId( config)
     val metrics = new KafkaSystemConsumerMetrics(systemName, registry)
 
-    NewKafkaSystemConsumer.getNewKafkaSystemConsumer(
+    KafkaSystemConsumer.getNewKafkaSystemConsumer(
       systemName, config, clientId, metrics, new SystemClock)
   }
 
@@ -76,7 +76,7 @@ class KafkaSystemFactory extends SystemFactory with Logging {
   }
 
   def getAdmin(systemName: String, config: Config): SystemAdmin = {
-    val clientId = KafkaConsumerConfig.getConsumerClientId(config)
+    val clientId = KafkaConsumerConfig.getAdminClientId(config)
     val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId)
     val bootstrapServers = producerConfig.bootsrapServers
     val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)

http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
deleted file mode 100644
index afec8ad..0000000
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ /dev/null
@@ -1,412 +0,0 @@
-
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- */
-
-package org.apache.samza.system.kafka;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import kafka.common.TopicAndPartition;
-import org.apache.kafka.clients.consumer.Consumer;
-import org.apache.kafka.clients.consumer.KafkaConsumer;
-import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
-import org.apache.kafka.common.TopicPartition;
-import org.apache.samza.Partition;
-import org.apache.samza.SamzaException;
-import org.apache.samza.config.Config;
-import org.apache.samza.config.KafkaConfig;
-import org.apache.samza.system.IncomingMessageEnvelope;
-import org.apache.samza.system.SystemConsumer;
-import org.apache.samza.system.SystemStreamPartition;
-import org.apache.samza.util.BlockingEnvelopeMap;
-import org.apache.samza.util.Clock;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import scala.Option;
-
-
-public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements SystemConsumer {
-
-  private static final Logger LOG = LoggerFactory.getLogger(NewKafkaSystemConsumer.class);
-
-  private static final long FETCH_THRESHOLD = 50000;
-  private static final long FETCH_THRESHOLD_BYTES = -1L;
-
-  private final Consumer<K, V> kafkaConsumer;
-  private final String systemName;
-  private final KafkaSystemConsumerMetrics samzaConsumerMetrics;
-  private final String clientId;
-  private final String metricName;
-  private final AtomicBoolean stopped = new AtomicBoolean(false);
-  private final AtomicBoolean started = new AtomicBoolean(false);
-  private final Config config;
-  private final boolean fetchThresholdBytesEnabled;
-
-  // This sink is used to transfer the messages from the proxy/consumer to the BlockingEnvelopeMap.
-  /* package private */ KafkaConsumerMessageSink messageSink;
-
-  // proxy is doing the actual reading
-  private KafkaConsumerProxy proxy;
-
-  /* package private */final Map<TopicPartition, String> topicPartitions2Offset = new HashMap<>();
-  /* package private */final Map<TopicPartition, SystemStreamPartition> topicPartitions2SSP = new HashMap<>();
-
-  /* package private */ long perPartitionFetchThreshold;
-  /* package private */ long perPartitionFetchThresholdBytes;
-
-  /**
-   * @param systemName
-   * @param config
-   * @param metrics
-   */
-  protected NewKafkaSystemConsumer(Consumer<K, V> kafkaConsumer, String systemName, Config config, String clientId,
-      KafkaSystemConsumerMetrics metrics, Clock clock) {
-
-    super(metrics.registry(), clock, metrics.getClass().getName());
-
-    this.kafkaConsumer = kafkaConsumer;
-    this.samzaConsumerMetrics = metrics;
-    this.clientId = clientId;
-    this.systemName = systemName;
-    this.config = config;
-    this.metricName = systemName + " " + clientId;
-
-    this.fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName);
-
-    LOG.info("Created SamzaKafkaSystemConsumer for system={}, clientId={}, metricName={}, KafkaConsumer={}", systemName,
-        clientId, metricName, this.kafkaConsumer.toString());
-  }
-
-  public static <K, V> NewKafkaSystemConsumer getNewKafkaSystemConsumer(String systemName, Config config,
-      String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) {
-
-    // extract consumer configs and create kafka consumer
-    KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
-    LOG.info("Created kafka consumer for system {}, clientId {}: {}", systemName, clientId, kafkaConsumer);
-
-    NewKafkaSystemConsumer kc = new NewKafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
-    LOG.info("Created samza system consumer {}", kc.toString());
-
-    return kc;
-  }
-
-  /**
-   * create kafka consumer
-   * @param systemName
-   * @param clientId
-   * @param config
-   * @return kafka consumer
-   */
-  private static <K, V> KafkaConsumer<K, V> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
-
-    Map<String, String> injectProps = new HashMap<>();
-
-    // extract kafka client configs
-    KafkaConsumerConfig consumerConfig =
-        KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
-
-    LOG.info("KafkaClient properties for systemName {}: {}", systemName, consumerConfig.originals());
-
-    return new KafkaConsumer<>(consumerConfig.originals());
-  }
-
-  @Override
-  public void start() {
-    if (!started.compareAndSet(false, true)) {
-      LOG.warn("attempting to start the consumer for the second (or more) time.");
-      return;
-    }
-    if (stopped.get()) {
-      LOG.warn("attempting to start a stopped consumer");
-      return;
-    }
-    // initialize the subscriptions for all the registered TopicPartitions
-    startSubscription();
-    // needs to be called after all the registrations are completed
-    setFetchThresholds();
-    // Create the proxy to do the actual message reading. It is a separate thread that reads the messages from the stream
-    // and puts them into the sink.
-    createConsumerProxy();
-    startConsumer();
-    LOG.info("consumer {} started", this);
-  }
-
-  private void startSubscription() {
-    //subscribe to all the registered TopicPartitions
-    LOG.info("consumer {}, subscribes to {} ", this, topicPartitions2SSP.keySet());
-    try {
-      synchronized (kafkaConsumer) {
-        // we are using assign (and not subscribe), so we need to specify both topic and partition
-        kafkaConsumer.assign(topicPartitions2SSP.keySet());
-      }
-    } catch (Exception e) {
-      LOG.warn("startSubscription failed.", e);
-      throw new SamzaException(e);
-    }
-  }
-
-  void createConsumerProxy() {
-    // create a sink for passing the messages between the proxy and the consumer
-    messageSink = new KafkaConsumerMessageSink();
-
-    // create the thread with the consumer
-    proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, samzaConsumerMetrics, metricName);
-
-    LOG.info("Created consumer proxy: " + proxy);
-  }
-
-  /*
-   Set the offsets to start from.
-   Add the TopicPartitions to the proxy.
-   Start the proxy thread.
-   */
-  void startConsumer() {
-    //set the offset for each TopicPartition
-    if (topicPartitions2Offset.size() <= 0) {
-      LOG.warn("Consumer {} is not subscribed to any SSPs", this);
-    }
-
-    topicPartitions2Offset.forEach((tp, startingOffsetString) -> {
-      long startingOffset = Long.valueOf(startingOffsetString);
-
-      try {
-        synchronized (kafkaConsumer) {
-          // TODO in the future we may need to add special handling here for BEGIN/END_OFFSET
-          // this will call KafkaConsumer.seekToBegin/End()
-          kafkaConsumer.seek(tp, startingOffset); // this value should already be the 'upcoming' value
-        }
-      } catch (Exception e) {
-        // all other exceptions - non recoverable
-        LOG.error("Got Exception while seeking to " + startingOffsetString + " for " + tp, e);
-        throw new SamzaException(e);
-      }
-
-      LOG.info("Changing consumer's starting offset for tp = " + tp + " to " + startingOffsetString);
-
-      // add the partition to the proxy
-      proxy.addTopicPartition(topicPartitions2SSP.get(tp), startingOffset);
-    });
-
-    // start the proxy thread
-    if (proxy != null && !proxy.isRunning()) {
-      LOG.info("Starting proxy: " + proxy);
-      proxy.start();
-    }
-  }
-
-  private void setFetchThresholds() {
-    // get the thresholds, and set defaults if not defined.
-    KafkaConfig kafkaConfig = new KafkaConfig(config);
-
-    Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName);
-    long fetchThreshold = FETCH_THRESHOLD;
-    if (fetchThresholdOption.isDefined()) {
-      fetchThreshold = Long.valueOf(fetchThresholdOption.get());
-      LOG.info("fetchThresholdOption is configured. fetchThreshold=" + fetchThreshold);
-    }
-
-    Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName);
-    long fetchThresholdBytes = FETCH_THRESHOLD_BYTES;
-    if (fetchThresholdBytesOption.isDefined()) {
-      fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get());
-      LOG.info("fetchThresholdBytesOption is configured. fetchThresholdBytes=" + fetchThresholdBytes);
-    }
-
-    int numTPs = topicPartitions2SSP.size();
-    assert (numTPs == topicPartitions2Offset.size());
-
-    LOG.info("fetchThresholdBytes = " + fetchThresholdBytes + "; fetchThreshold=" + fetchThreshold);
-    LOG.info("number of topicPartitions " + numTPs);
-
-    if (numTPs > 0) {
-      perPartitionFetchThreshold = fetchThreshold / numTPs;
-      LOG.info("perPartitionFetchThreshold=" + perPartitionFetchThreshold);
-      if (fetchThresholdBytesEnabled) {
-        // currently this feature cannot be enabled, because we do not have the size of the messages available.
-        // messages get double buffered, hence divide by 2
-        perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / numTPs;
-        LOG.info("perPartitionFetchThresholdBytes is enabled. perPartitionFetchThresholdBytes="
-            + perPartitionFetchThresholdBytes);
-      }
-    }
-  }
-
-  @Override
-  public void stop() {
-    LOG.info("Stopping Samza kafkaConsumer " + this);
-
-    if (!stopped.compareAndSet(false, true)) {
-      LOG.warn("attempting to stop stopped consumer.");
-      return;
-    }
-
-    // stop the proxy (with 5 minutes timeout)
-    if (proxy != null) {
-      LOG.info("Stopping proxy " + proxy);
-      proxy.stop(TimeUnit.MINUTES.toMillis(5));
-    }
-
-    try {
-      synchronized (kafkaConsumer) {
-        LOG.info("Closing kafka consumer " + kafkaConsumer);
-        kafkaConsumer.close();
-      }
-    } catch (Exception e) {
-      LOG.warn("failed to stop SamzaRawKafkaConsumer + " + this, e);
-    }
-  }
-
-  /*
-   record the ssp and the offset. Do not submit it to the consumer yet.
-   */
-  @Override
-  public void register(SystemStreamPartition systemStreamPartition, String offset) {
-    if (started.get()) {
-      String msg =
-          String.format("Trying to register partition after consumer has been started. sn=%s, ssp=%s", systemName,
-              systemStreamPartition);
-      LOG.error(msg);
-      throw new SamzaException(msg);
-    }
-
-    if (!systemStreamPartition.getSystem().equals(systemName)) {
-      LOG.warn("ignoring SSP " + systemStreamPartition + ", because this consumer's system is " + systemName);
-      return;
-    }
-    super.register(systemStreamPartition, offset);
-
-    TopicPartition tp = toTopicPartition(systemStreamPartition);
-
-    topicPartitions2SSP.put(tp, systemStreamPartition);
-
-    LOG.info("Registering ssp = " + systemStreamPartition + " with offset " + offset);
-
-    String existingOffset = topicPartitions2Offset.get(tp);
-    // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages.
-    if (existingOffset == null || compareOffsets(existingOffset, offset) > 0) {
-      topicPartitions2Offset.put(tp, offset);
-    }
-
-    samzaConsumerMetrics.registerTopicAndPartition(toTopicAndPartition(tp));
-  }
-
-  /**
-   * Compare two String offsets.
-   * Note. There is a method in KafkaAdmin that does that, but that would require instantiation of systemadmin for each consumer.
-   * @param off1
-   * @param off2
-   * @return see {@link Long#compareTo(Long)}
-   */
-  public static int compareOffsets(String off1, String off2) {
-    return Long.valueOf(off1).compareTo(Long.valueOf(off2));
-  }
-
-  @Override
-  public String toString() {
-    return systemName + "/" + clientId + "/" + super.toString();
-  }
-
-  @Override
-  public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
-      Set<SystemStreamPartition> systemStreamPartitions, long timeout) throws InterruptedException {
-
-    // check if the proxy is running
-    if (!proxy.isRunning()) {
-      stop();
-      if (proxy.getFailureCause() != null) {
-        String message = "KafkaConsumerProxy has stopped";
-        throw new SamzaException(message, proxy.getFailureCause());
-      } else {
-        LOG.warn("Failure cause is not populated for KafkaConsumerProxy");
-        throw new SamzaException("KafkaConsumerProxy has stopped");
-      }
-    }
-
-    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> res = super.poll(systemStreamPartitions, timeout);
-    return res;
-  }
-
-  public static TopicAndPartition toTopicAndPartition(TopicPartition tp) {
-    return new TopicAndPartition(tp.topic(), tp.partition());
-  }
-
-  public static TopicAndPartition toTopicAndPartition(SystemStreamPartition ssp) {
-    return new TopicAndPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
-  }
-
-  public static TopicPartition toTopicPartition(SystemStreamPartition ssp) {
-    return new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId());
-  }
-
-  public static SystemStreamPartition toSystemStreamPartition(String systemName, TopicAndPartition tp) {
-    return new SystemStreamPartition(systemName, tp.topic(), new Partition(tp.partition()));
-  }
-
-  /**
-   * return system name for this consumer
-   * @return system name
-   */
-  public String getSystemName() {
-    return systemName;
-  }
-
-  ////////////////////////////////////
-  // inner class for the message sink
-  ////////////////////////////////////
-  public class KafkaConsumerMessageSink {
-
-    public void setIsAtHighWatermark(SystemStreamPartition ssp, boolean isAtHighWatermark) {
-      setIsAtHead(ssp, isAtHighWatermark);
-    }
-
-    boolean needsMoreMessages(SystemStreamPartition ssp) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("needsMoreMessages from following SSP: {}. fetchLimitByBytes enabled={}; messagesSizeInQueue={};"
-                + "(limit={}); messagesNumInQueue={}(limit={};", ssp, fetchThresholdBytesEnabled,
-            getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes, getNumMessagesInQueue(ssp),
-            perPartitionFetchThreshold);
-      }
-
-      if (fetchThresholdBytesEnabled) {
-        return getMessagesSizeInQueue(ssp) < perPartitionFetchThresholdBytes;
-      } else {
-        return getNumMessagesInQueue(ssp) < perPartitionFetchThreshold;
-      }
-    }
-
-    void addMessage(SystemStreamPartition ssp, IncomingMessageEnvelope envelope) {
-      LOG.trace("Incoming message ssp = {}: envelope = {}.", ssp, envelope);
-
-      try {
-        put(ssp, envelope);
-      } catch (InterruptedException e) {
-        throw new SamzaException(
-            String.format("Interrupted while trying to add message with offset %s for ssp %s", envelope.getOffset(),
-                ssp));
-      }
-    }
-  }  // end of KafkaMessageSink class
-  ///////////////////////////////////////////////////////////////////////////
-}

http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java
new file mode 100644
index 0000000..d90bc35
--- /dev/null
+++ b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestKafkaSystemConsumer.java
@@ -0,0 +1,224 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.samza.system.kafka;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
+import org.apache.kafka.common.serialization.ByteArraySerializer;
+import org.apache.samza.Partition;
+import org.apache.samza.config.Config;
+import org.apache.samza.config.KafkaConfig;
+import org.apache.samza.config.MapConfig;
+import org.apache.samza.system.IncomingMessageEnvelope;
+import org.apache.samza.system.SystemStreamPartition;
+import org.apache.samza.util.Clock;
+import org.apache.samza.util.NoOpMetricsRegistry;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+
+public class TestKafkaSystemConsumer {
+  public final String TEST_SYSTEM = "test-system";
+  public final String TEST_STREAM = "test-stream";
+  public final String TEST_CLIENT_ID = "testClientId";
+  public final String BOOTSTRAP_SERVER = "127.0.0.1:8888";
+  public final String FETCH_THRESHOLD_MSGS = "50000";
+  public final String FETCH_THRESHOLD_BYTES = "100000";
+
+  @Before
+  public void setUp() {
+
+  }
+
+  private KafkaSystemConsumer setupConsumer(String fetchMsg, String fetchBytes) {
+    final Map<String, String> map = new HashMap<>();
+
+    map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD(), TEST_SYSTEM), fetchMsg);
+    map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD_BYTES(), TEST_SYSTEM), fetchBytes);
+    map.put(String.format("systems.%s.consumer.%s", TEST_SYSTEM, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG),
+        BOOTSTRAP_SERVER);
+
+    Config config = new MapConfig(map);
+    KafkaConsumerConfig consumerConfig =
+        KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, TEST_SYSTEM, TEST_CLIENT_ID, Collections.emptyMap());
+    final KafkaConsumer<byte[], byte[]> kafkaConsumer = new MockKafkaConsumer(consumerConfig.originals());
+
+    MockKafkaSystmeCosumer newKafkaSystemConsumer =
+        new MockKafkaSystmeCosumer(kafkaConsumer, TEST_SYSTEM, config, TEST_CLIENT_ID,
+            new KafkaSystemConsumerMetrics(TEST_SYSTEM, new NoOpMetricsRegistry()), System::currentTimeMillis);
+
+    return newKafkaSystemConsumer;
+  }
+
+  @Test
+  public void testConfigValidations() {
+
+    final KafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+    consumer.start();
+    // should be no failures
+  }
+
+  @Test
+  public void testFetchThresholdShouldDivideEvenlyAmongPartitions() {
+    final KafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+    final int partitionsNum = 50;
+    for (int i = 0; i < partitionsNum; i++) {
+      consumer.register(new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(i)), "0");
+    }
+
+    consumer.start();
+
+    Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum, consumer.perPartitionFetchThreshold);
+    Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_BYTES) / 2 / partitionsNum,
+        consumer.perPartitionFetchThresholdBytes);
+  }
+
+  @Test
+  public void testConsumerRegisterOlderOffsetOfTheSamzaSSP() {
+
+    KafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+    SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+    SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+    SystemStreamPartition ssp2 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(2));
+
+    consumer.register(ssp0, "0");
+    consumer.register(ssp0, "5");
+    consumer.register(ssp1, "2");
+    consumer.register(ssp1, "3");
+    consumer.register(ssp2, "0");
+
+    assertEquals("0", consumer.topicPartitions2Offset.get(KafkaSystemConsumer.toTopicPartition(ssp0)));
+    assertEquals("2", consumer.topicPartitions2Offset.get(KafkaSystemConsumer.toTopicPartition(ssp1)));
+    assertEquals("0", consumer.topicPartitions2Offset.get(KafkaSystemConsumer.toTopicPartition(ssp2)));
+  }
+
+  @Test
+  public void testFetchThresholdBytes() {
+
+    SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+    SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+    int partitionsNum = 2;
+    int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size
+    int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 1; // fake size
+    int ime11Size = 20;
+    ByteArraySerializer bytesSerde = new ByteArraySerializer();
+    IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
+        bytesSerde.serialize("", "value0".getBytes()), ime0Size);
+    IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
+        bytesSerde.serialize("", "value1".getBytes()), ime1Size);
+    IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
+        bytesSerde.serialize("", "value11".getBytes()), ime11Size);
+    KafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
+
+    consumer.register(ssp0, "0");
+    consumer.register(ssp1, "0");
+    consumer.start();
+    consumer.messageSink.addMessage(ssp0, ime0);
+    // queue for ssp0 should be full now, because we added message of size FETCH_THRESHOLD_MSGS/partitionsNum
+    Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp0));
+    consumer.messageSink.addMessage(ssp1, ime1);
+    // queue for ssp1 should be less then full now, because we added message of size (FETCH_THRESHOLD_MSGS/partitionsNum - 1)
+    Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
+    consumer.messageSink.addMessage(ssp1, ime11);
+    // queue for ssp1 should full now, because we added message of size 20 on top
+    Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
+
+    Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
+    Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
+    Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
+    Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
+  }
+
+  @Test
+  public void testFetchThresholdBytesDiabled() {
+    // Pass 0 as fetchThresholdByBytes, which disables checking for limit by size
+
+    SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
+    SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
+    int partitionsNum = 2;
+    int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size, upto the limit
+    int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 100; // fake size, below the limit
+    int ime11Size = 20;// event with the second message still below the size limit
+    ByteArraySerializer bytesSerde = new ByteArraySerializer();
+    IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
+        bytesSerde.serialize("", "value0".getBytes()), ime0Size);
+    IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
+        bytesSerde.serialize("", "value1".getBytes()), ime1Size);
+    IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
+        bytesSerde.serialize("", "value11".getBytes()), ime11Size);
+
+    // limit by number of messages 4/2 = 2 per partition
+    // limit by number of bytes - disabled
+    KafkaSystemConsumer consumer = setupConsumer("4", "0"); // should disable
+
+    consumer.register(ssp0, "0");
+    consumer.register(ssp1, "0");
+    consumer.start();
+    consumer.messageSink.addMessage(ssp0, ime0);
+    // should be full by size, but not full by number of messages (1 of 2)
+    Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp0));
+    consumer.messageSink.addMessage(ssp1, ime1);
+    // not full neither by size nor by messages
+    Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
+    consumer.messageSink.addMessage(ssp1, ime11);
+    // not full by size, but should be full by messages
+    Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
+
+    Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
+    Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
+    Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
+    Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
+  }
+
+  // mock kafkaConsumer and SystemConsumer
+  static class MockKafkaConsumer extends KafkaConsumer {
+    public MockKafkaConsumer(Map<String, Object> configs) {
+      super(configs);
+    }
+  }
+
+  static class MockKafkaSystmeCosumer extends KafkaSystemConsumer {
+    public MockKafkaSystmeCosumer(Consumer kafkaConsumer, String systemName, Config config, String clientId,
+        KafkaSystemConsumerMetrics metrics, Clock clock) {
+      super(kafkaConsumer, systemName, config, clientId, metrics, clock);
+    }
+
+    //@Override
+    //void createConsumerProxy() {
+    //  this.messageSink = new KafkaConsumerMessageSink();
+    //}
+
+    @Override
+    void startConsumer() {
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/samza/blob/26552213/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java b/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
deleted file mode 100644
index fb7533b..0000000
--- a/samza-kafka/src/test/scala/org/apache/samza/system/kafka/TestNewKafkaSystemConsumer.java
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- */
-
-package org.apache.samza.system.kafka;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.kafka.clients.consumer.Consumer;
-import org.apache.kafka.clients.consumer.ConsumerConfig;
-import org.apache.kafka.clients.consumer.KafkaConsumer;
-import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
-import org.apache.kafka.common.serialization.ByteArraySerializer;
-import org.apache.samza.Partition;
-import org.apache.samza.config.Config;
-import org.apache.samza.config.KafkaConfig;
-import org.apache.samza.config.MapConfig;
-import org.apache.samza.system.IncomingMessageEnvelope;
-import org.apache.samza.system.SystemStreamPartition;
-import org.apache.samza.util.Clock;
-import org.apache.samza.util.NoOpMetricsRegistry;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import static org.junit.Assert.*;
-
-
-public class TestNewKafkaSystemConsumer {
-  public final String TEST_SYSTEM = "test-system";
-  public final String TEST_STREAM = "test-stream";
-  public final String TEST_CLIENT_ID = "testClientId";
-  public final String BOOTSTRAP_SERVER = "127.0.0.1:8888";
-  public final String FETCH_THRESHOLD_MSGS = "50000";
-  public final String FETCH_THRESHOLD_BYTES = "100000";
-
-  @Before
-  public void setUp() {
-
-  }
-
-  private NewKafkaSystemConsumer setupConsumer(String fetchMsg, String fetchBytes) {
-    final Map<String, String> map = new HashMap<>();
-
-    map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD(), TEST_SYSTEM), fetchMsg);
-    map.put(String.format(KafkaConfig.CONSUMER_FETCH_THRESHOLD_BYTES(), TEST_SYSTEM), fetchBytes);
-    map.put(String.format("systems.%s.consumer.%s", TEST_SYSTEM, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG),
-        BOOTSTRAP_SERVER);
-
-    Config config = new MapConfig(map);
-    KafkaConsumerConfig consumerConfig =
-        KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, TEST_SYSTEM, TEST_CLIENT_ID, Collections.emptyMap());
-    final KafkaConsumer<byte[], byte[]> kafkaConsumer = new MockKafkaConsumer(consumerConfig.originals());
-
-    MockNewKafkaSystmeCosumer newKafkaSystemConsumer =
-        new MockNewKafkaSystmeCosumer(kafkaConsumer, TEST_SYSTEM, config, TEST_CLIENT_ID,
-            new KafkaSystemConsumerMetrics(TEST_SYSTEM, new NoOpMetricsRegistry()), System::currentTimeMillis);
-
-    return newKafkaSystemConsumer;
-  }
-
-  @Test
-  public void testConfigValidations() {
-
-    final NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
-
-    consumer.start();
-    // should be no failures
-  }
-
-  @Test
-  public void testFetchThresholdShouldDivideEvenlyAmongPartitions() {
-    final NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
-    final int partitionsNum = 50;
-    for (int i = 0; i < partitionsNum; i++) {
-      consumer.register(new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(i)), "0");
-    }
-
-    consumer.start();
-
-    Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum, consumer.perPartitionFetchThreshold);
-    Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_BYTES) / 2 / partitionsNum,
-        consumer.perPartitionFetchThresholdBytes);
-  }
-
-  @Test
-  public void testConsumerRegisterOlderOffsetOfTheSamzaSSP() {
-
-    NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
-
-    SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
-    SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
-    SystemStreamPartition ssp2 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(2));
-
-    consumer.register(ssp0, "0");
-    consumer.register(ssp0, "5");
-    consumer.register(ssp1, "2");
-    consumer.register(ssp1, "3");
-    consumer.register(ssp2, "0");
-
-    assertEquals("0", consumer.topicPartitions2Offset.get(NewKafkaSystemConsumer.toTopicPartition(ssp0)));
-    assertEquals("2", consumer.topicPartitions2Offset.get(NewKafkaSystemConsumer.toTopicPartition(ssp1)));
-    assertEquals("0", consumer.topicPartitions2Offset.get(NewKafkaSystemConsumer.toTopicPartition(ssp2)));
-  }
-
-  @Test
-  public void testFetchThresholdBytes() {
-
-    SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
-    SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
-    int partitionsNum = 2;
-    int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size
-    int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 1; // fake size
-    int ime11Size = 20;
-    ByteArraySerializer bytesSerde = new ByteArraySerializer();
-    IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
-        bytesSerde.serialize("", "value0".getBytes()), ime0Size);
-    IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
-        bytesSerde.serialize("", "value1".getBytes()), ime1Size);
-    IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
-        bytesSerde.serialize("", "value11".getBytes()), ime11Size);
-    NewKafkaSystemConsumer consumer = setupConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);
-
-    consumer.register(ssp0, "0");
-    consumer.register(ssp1, "0");
-    consumer.start();
-    consumer.messageSink.addMessage(ssp0, ime0);
-    // queue for ssp0 should be full now, because we added message of size FETCH_THRESHOLD_MSGS/partitionsNum
-    Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp0));
-    consumer.messageSink.addMessage(ssp1, ime1);
-    // queue for ssp1 should be less then full now, because we added message of size (FETCH_THRESHOLD_MSGS/partitionsNum - 1)
-    Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
-    consumer.messageSink.addMessage(ssp1, ime11);
-    // queue for ssp1 should full now, because we added message of size 20 on top
-    Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
-
-    Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
-    Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
-    Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
-    Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
-  }
-
-  @Test
-  public void testFetchThresholdBytesDiabled() {
-    // Pass 0 as fetchThresholdByBytes, which disables checking for limit by size
-
-    SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
-    SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
-    int partitionsNum = 2;
-    int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size, upto the limit
-    int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 100; // fake size, below the limit
-    int ime11Size = 20;// event with the second message still below the size limit
-    ByteArraySerializer bytesSerde = new ByteArraySerializer();
-    IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
-        bytesSerde.serialize("", "value0".getBytes()), ime0Size);
-    IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
-        bytesSerde.serialize("", "value1".getBytes()), ime1Size);
-    IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
-        bytesSerde.serialize("", "value11".getBytes()), ime11Size);
-
-    // limit by number of messages 4/2 = 2 per partition
-    // limit by number of bytes - disabled
-    NewKafkaSystemConsumer consumer = setupConsumer("4", "0"); // should disable
-
-    consumer.register(ssp0, "0");
-    consumer.register(ssp1, "0");
-    consumer.start();
-    consumer.messageSink.addMessage(ssp0, ime0);
-    // should be full by size, but not full by number of messages (1 of 2)
-    Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp0));
-    consumer.messageSink.addMessage(ssp1, ime1);
-    // not full neither by size nor by messages
-    Assert.assertEquals(true, consumer.messageSink.needsMoreMessages(ssp1));
-    consumer.messageSink.addMessage(ssp1, ime11);
-    // not full by size, but should be full by messages
-    Assert.assertEquals(false, consumer.messageSink.needsMoreMessages(ssp1));
-
-    Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
-    Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
-    Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
-    Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));
-  }
-
-  // mock kafkaConsumer and SystemConsumer
-  static class MockKafkaConsumer extends KafkaConsumer {
-    public MockKafkaConsumer(Map<String, Object> configs) {
-      super(configs);
-    }
-  }
-
-  static class MockNewKafkaSystmeCosumer extends NewKafkaSystemConsumer {
-    public MockNewKafkaSystmeCosumer(Consumer kafkaConsumer, String systemName, Config config, String clientId,
-        KafkaSystemConsumerMetrics metrics, Clock clock) {
-      super(kafkaConsumer, systemName, config, clientId, metrics, clock);
-    }
-
-    @Override
-    void createConsumerProxy() {
-      this.messageSink = new KafkaConsumerMessageSink();
-    }
-
-    @Override
-    void startConsumer() {
-    }
-  }
-}


[40/47] samza git commit: Merge branch 'master' into NewConsumer2

Posted by bo...@apache.org.
Merge branch 'master' into NewConsumer2


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/f81cf148
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/f81cf148
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/f81cf148

Branch: refs/heads/NewKafkaSystemConsumer
Commit: f81cf1489c35ba52ef215d414ecf860689bd86a8
Parents: ddada94 952dbbe
Author: Boris S <bo...@apache.org>
Authored: Mon Sep 10 19:06:52 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Mon Sep 10 19:06:52 2018 -0700

----------------------------------------------------------------------
 samza-shell/src/main/bash/run-class.sh          | 12 ++-
 .../job/yarn/YarnClusterResourceManager.java    | 41 ++++++----
 .../yarn/TestYarnClusterResourceManager.java    | 81 ++++++++++++++++++++
 3 files changed, 116 insertions(+), 18 deletions(-)
----------------------------------------------------------------------



[47/47] samza git commit: Merge branch 'NewConsumer2' of https://github.com/sborya/samza into NewKafkaSystemConsumer

Posted by bo...@apache.org.
Merge branch 'NewConsumer2' of https://github.com/sborya/samza into NewKafkaSystemConsumer


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/36159631
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/36159631
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/36159631

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 361596317a990a047282669ab93c1a1eb7810ae5
Parents: 40f7430 74b6cfa
Author: Boris S <bs...@linkedin.com>
Authored: Tue Sep 18 15:56:52 2018 -0700
Committer: Boris S <bs...@linkedin.com>
Committed: Tue Sep 18 15:56:52 2018 -0700

----------------------------------------------------------------------
 .../samza/system/IncomingMessageEnvelope.java   |   3 +-
 .../ClusterBasedJobCoordinator.java             |   2 +-
 .../apache/samza/storage/StorageRecovery.java   |   2 +-
 .../samza/checkpoint/CheckpointTool.scala       |   2 +-
 .../apache/samza/container/SamzaContainer.scala |   2 +-
 .../samza/coordinator/JobModelManager.scala     |   6 +-
 .../samza/job/local/ProcessJobFactory.scala     |   3 +-
 .../samza/job/local/ThreadJobFactory.scala      |  20 +-
 .../samza/coordinator/TestJobCoordinator.scala  |   4 +-
 .../clients/consumer/KafkaConsumerConfig.java   | 194 ++++++++
 .../org/apache/samza/config/KafkaConfig.scala   |   5 +-
 .../apache/samza/system/kafka/BrokerProxy.scala | 332 --------------
 .../samza/system/kafka/KafkaConsumerProxy.java  | 456 +++++++++++++++++++
 .../samza/system/kafka/KafkaSystemConsumer.java | 391 ++++++++++++++++
 .../system/kafka/KafkaSystemConsumer.scala      | 309 -------------
 .../kafka/KafkaSystemConsumerMetrics.scala      |  68 ++-
 .../samza/system/kafka/KafkaSystemFactory.scala |  81 ++--
 .../consumer/TestKafkaConsumerConfig.java       | 137 ++++++
 .../samza/system/kafka/TestBrokerProxy.scala    | 434 ------------------
 .../system/kafka/TestKafkaSystemConsumer.java   | 224 +++++++++
 .../system/kafka/TestKafkaSystemConsumer.scala  | 191 --------
 .../test/integration/StreamTaskTestUtil.scala   |  17 +-
 .../integration/TestShutdownStatefulTask.scala  |   4 +-
 .../samza/validation/YarnJobValidationTool.java |   2 +-
 .../yarn/TestSamzaYarnAppMasterService.scala    |   4 +-
 25 files changed, 1511 insertions(+), 1382 deletions(-)
----------------------------------------------------------------------



[05/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/010fa168
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/010fa168
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/010fa168

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 010fa168ee2a290b93f5a3b0908709b2c19044ec
Parents: bbffb79 e6049b7
Author: Boris S <bo...@apache.org>
Authored: Tue Oct 24 18:33:03 2017 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Oct 24 18:33:03 2017 -0700

----------------------------------------------------------------------
 .../samza/system/kafka/KafkaStreamSpec.java     |  9 +++
 .../kafka/KafkaCheckpointManagerFactory.scala   | 21 ++-----
 .../org/apache/samza/config/KafkaConfig.scala   | 37 +++++++++++-
 .../samza/system/kafka/KafkaSystemAdmin.scala   |  9 ++-
 .../samza/system/kafka/KafkaSystemFactory.scala | 22 ++++++-
 .../TestKafkaCheckpointManagerFactory.java      | 51 +++++++++++++++++
 .../kafka/TestKafkaSystemFactoryJava.java       | 60 ++++++++++++++++++++
 .../kafka/TestKafkaCheckpointManager.scala      |  6 +-
 .../apache/samza/config/TestKafkaConfig.scala   | 13 +++++
 9 files changed, 204 insertions(+), 24 deletions(-)
----------------------------------------------------------------------



[34/47] samza git commit: cleanup

Posted by bo...@apache.org.
cleanup


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/0b6768f8
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/0b6768f8
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/0b6768f8

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 0b6768f803db12bf433d96b832c95fa228f6e7ca
Parents: f14d608
Author: Boris S <bo...@apache.org>
Authored: Wed Sep 5 14:39:08 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Sep 5 14:39:08 2018 -0700

----------------------------------------------------------------------
 .../org/apache/samza/coordinator/JobModelManager.scala    |  2 +-
 .../kafka/clients/consumer/KafkaConsumerConfig.java       | 10 +++++-----
 .../org/apache/samza/system/kafka/KafkaConsumerProxy.java | 10 +++++-----
 .../apache/samza/system/kafka/KafkaSystemFactory.scala    |  4 ++--
 4 files changed, 13 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/0b6768f8/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala b/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
index f7ffd4e..f95a521 100644
--- a/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
+++ b/samza-core/src/main/scala/org/apache/samza/coordinator/JobModelManager.scala
@@ -64,7 +64,7 @@ object JobModelManager extends Logging {
    * a) Reads the jobModel from coordinator stream using the job's configuration.
    * b) Recomputes changelog partition mapping based on jobModel and job's configuration.
    * c) Builds JobModelManager using the jobModel read from coordinator stream.
-   * @param config Coordinator stream manager config
+   * @param config Coordinator stream manager config.
    * @param changelogPartitionMapping The changelog partition-to-task mapping.
    * @return JobModelManager
    */

http://git-wip-us.apache.org/repos/asf/samza/blob/0b6768f8/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index 843e03d..98792ab 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -129,17 +129,17 @@ public class KafkaConsumerConfig extends ConsumerConfig {
   }
 
   // client id should be unique per job
-  public static String getClientId(Config config) {
-    return getClientId(CONSUMER_CLIENT_ID_PREFIX, config);
+  public static String getConsumerClientId(Config config) {
+    return getConsumerClientId(CONSUMER_CLIENT_ID_PREFIX, config);
   }
   public static String getProducerClientId(Config config) {
-    return getClientId(PRODUCER_CLIENT_ID_PREFIX, config);
+    return getConsumerClientId(PRODUCER_CLIENT_ID_PREFIX, config);
   }
   public static String getAdminClientId(Config config) {
-    return getClientId(ADMIN_CLIENT_ID_PREFIX, config);
+    return getConsumerClientId(ADMIN_CLIENT_ID_PREFIX, config);
   }
 
-  private static String getClientId(String id, Config config) {
+  private static String getConsumerClientId(String id, Config config) {
     if (config.get(JobConfig.JOB_NAME()) == null) {
       throw new ConfigException("Missing job name");
     }

http://git-wip-us.apache.org/repos/asf/samza/blob/0b6768f8/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index 5c79017..ae80d50 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -85,7 +85,6 @@ public class KafkaConsumerProxy<K, V> {
     this.metricName = metricName;
     this.clientId = clientId;
 
-    // TODO - see if we need new metrics (not host:port based)
     this.kafkaConsumerMetrics.registerClientProxy(metricName);
 
     consumerPollThread = new Thread(createProxyThreadRunnable());
@@ -133,18 +132,17 @@ public class KafkaConsumerProxy<K, V> {
    * creates a separate thread for pulling messages
    */
   private Runnable createProxyThreadRunnable() {
-    return () -> {
+    Runnable runnable=  () -> {
       isRunning = true;
 
 
       try {
         consumerPollThreadStartLatch.countDown();
-        System.out.println("THREAD: runing " + consumerPollThread.getName());
+        LOG.info("Starting runnable " + consumerPollThread.getName());
         initializeLags();
         while (isRunning) {
           fetchMessages();
         }
-        System.out.println("THREAD: finished " + consumerPollThread.getName());
       } catch (Throwable throwable) {
         LOG.error(String.format("Error in KafkaConsumerProxy poll thread for system: %s.", systemName), throwable);
         // SamzaKafkaSystemConsumer uses the failureCause to propagate the throwable to the container
@@ -156,6 +154,8 @@ public class KafkaConsumerProxy<K, V> {
         LOG.info("Stopping the KafkaConsumerProxy poll thread for system: {}.", systemName);
       }
     };
+
+    return runnable;
   }
 
   private void initializeLags() {
@@ -433,7 +433,7 @@ public class KafkaConsumerProxy<K, V> {
   }
 
   public void stop(long timeout) {
-    System.out.println("THREAD: Shutting down KafkaConsumerProxy poll thread:" + consumerPollThread.getName());
+    LOG.info("Shutting down KafkaConsumerProxy poll thread:" + consumerPollThread.getName());
 
     isRunning = false;
     try {

http://git-wip-us.apache.org/repos/asf/samza/blob/0b6768f8/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index 892d400..6f58bed 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -47,7 +47,7 @@ object KafkaSystemFactory extends Logging {
 class KafkaSystemFactory extends SystemFactory with Logging {
 
   def getConsumer(systemName: String, config: Config, registry: MetricsRegistry): SystemConsumer = {
-    val clientId = KafkaConsumerConfig.getClientId( config)
+    val clientId = KafkaConsumerConfig.getConsumerClientId( config)
     val metrics = new KafkaSystemConsumerMetrics(systemName, registry)
 
     NewKafkaSystemConsumer.getNewKafkaSystemConsumer(
@@ -76,7 +76,7 @@ class KafkaSystemFactory extends SystemFactory with Logging {
   }
 
   def getAdmin(systemName: String, config: Config): SystemAdmin = {
-    val clientId = KafkaConsumerConfig.getClientId(config)
+    val clientId = KafkaConsumerConfig.getConsumerClientId(config)
     val producerConfig = config.getKafkaSystemProducerConfig(systemName, clientId)
     val bootstrapServers = producerConfig.bootsrapServers
     val consumerConfig = config.getKafkaSystemConsumerConfig(systemName, clientId)


[12/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/88f85595
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/88f85595
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/88f85595

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 88f855954e4d98aa3bdd3a3a778eb699a13ff659
Parents: 0edf343 8ce1bd5
Author: Boris S <bo...@apache.org>
Authored: Thu Aug 2 13:29:58 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Thu Aug 2 13:29:58 2018 -0700

----------------------------------------------------------------------
 .travis.yml                                     |   6 +-
 README.md                                       |   8 +-
 RELEASE.md                                      |  20 +
 bin/integration-tests.sh                        |  17 +-
 build.gradle                                    |   2 +-
 docs/community/committers.md                    |   5 +
 .../versioned/jobs/configuration-table.html     |  20 +
 .../org/apache/samza/metrics/ListGauge.java     | 143 +++++++
 .../apache/samza/metrics/MetricsRegistry.java   |   9 +
 .../apache/samza/metrics/MetricsVisitor.java    |   7 +-
 .../ReadableMetricsRegistryListener.java        |   2 +
 .../samza/operators/functions/MapFunction.java  |   2 +-
 .../apache/samza/runtime/ApplicationRunner.java |  22 --
 .../org/apache/samza/sql/SamzaSqlRelRecord.java | 113 ++++++
 .../samza/storage/SideInputsProcessor.java      |  46 +++
 .../storage/SideInputsProcessorFactory.java     |  45 +++
 .../samza/system/ExtendedSystemAdmin.java       |   6 +-
 .../samza/system/IncomingMessageEnvelope.java   |   9 +
 .../org/apache/samza/system/StreamSpec.java     |  56 +--
 .../org/apache/samza/system/SystemAdmin.java    |  31 ++
 .../samza/table/TableDescriptorsProvider.java   | 100 +++++
 .../org/apache/samza/table/TableProvider.java   |   3 +-
 .../java/org/apache/samza/table/TableSpec.java  |  44 ++-
 .../apache/samza/util/NoOpMetricsRegistry.java  |   7 +
 .../org/apache/samza/util/TimestampedValue.java |  61 +++
 .../org/apache/samza/metrics/TestListGauge.java |  99 +++++
 .../org/apache/samza/metrics/TestTimer.java     |   5 +-
 .../apache/samza/sql/TestSamzaSqlRelRecord.java |  43 ++
 .../apache/samza/system/TestSystemAdmin.java    | 116 ++++++
 .../samza/system/eventhub/EventHubConfig.java   |  40 ++
 .../consumer/EventHubSystemConsumer.java        | 129 ++++--
 .../MockEventHubClientManagerFactory.java       |   4 +
 .../system/eventhub/TestMetricsRegistry.java    |  16 +-
 .../consumer/TestEventHubSystemConsumer.java    |  94 +++++
 .../AbstractContainerAllocator.java             |   5 +
 .../HostAwareContainerAllocator.java            |  10 +
 .../clustermanager/SamzaApplicationState.java   |   8 +
 .../samza/config/InMemorySystemConfig.java      |  52 +++
 .../apache/samza/config/JavaStorageConfig.java  |  47 +++
 .../org/apache/samza/config/TaskConfigJava.java |   3 +-
 .../samza/container/SamzaContainerListener.java |   9 +-
 .../apache/samza/container/TaskContextImpl.java |  15 +-
 .../samza/execution/ExecutionPlanner.java       |  22 +-
 .../org/apache/samza/execution/JobGraph.java    |  18 +-
 .../samza/execution/JobGraphJsonGenerator.java  |  12 +-
 .../org/apache/samza/execution/JobNode.java     |  67 ++--
 .../org/apache/samza/execution/StreamEdge.java  |  34 +-
 .../apache/samza/execution/StreamManager.java   |  19 +-
 .../executors/KeyBasedExecutorService.java      | 174 +++++++++
 .../org/apache/samza/metrics/MetricGroup.java   |   4 +
 .../samza/operators/OperatorSpecGraph.java      |  15 +-
 .../apache/samza/operators/StreamGraphSpec.java |  79 ++--
 .../functions/PartialJoinFunction.java          |   2 +-
 .../operators/impl/BroadcastOperatorImpl.java   |   4 +-
 .../samza/operators/impl/OperatorImplGraph.java |  64 +--
 .../operators/impl/OutputOperatorImpl.java      |   4 +-
 .../operators/impl/PartialJoinOperatorImpl.java |   2 +-
 .../operators/impl/PartitionByOperatorImpl.java |  11 +-
 .../operators/impl/WindowOperatorImpl.java      |   2 +-
 .../operators/impl/store/TimeSeriesStore.java   |   2 +
 .../impl/store/TimeSeriesStoreImpl.java         |   1 +
 .../operators/impl/store/TimestampedValue.java  |  61 ---
 .../impl/store/TimestampedValueSerde.java       |   1 +
 .../samza/operators/spec/InputOperatorSpec.java |  12 +-
 .../samza/operators/spec/JoinOperatorSpec.java  |   2 +-
 .../samza/operators/spec/OperatorSpecs.java     |   7 +-
 .../samza/operators/spec/OutputStreamImpl.java  |  18 +-
 .../stream/IntermediateMessageStreamImpl.java   |   7 +-
 .../apache/samza/processor/StreamProcessor.java | 281 ++++++++-----
 .../runtime/AbstractApplicationRunner.java      | 111 ++----
 .../samza/runtime/ApplicationRunnerMain.java    |  10 -
 .../samza/runtime/LocalApplicationRunner.java   |  36 +-
 .../samza/runtime/LocalContainerRunner.java     | 150 +++----
 .../samza/runtime/RemoteApplicationRunner.java  |  34 +-
 .../standalone/PassthroughJobCoordinator.java   |   4 +
 .../samza/storage/ChangelogStreamManager.java   |   4 +-
 .../samza/storage/StorageManagerUtil.java       | 142 +++++++
 .../apache/samza/storage/StorageRecovery.java   |  14 +-
 .../storage/TaskSideInputStorageManager.java    | 375 ++++++++++++++++++
 .../system/inmemory/InMemorySystemFactory.java  |  15 +-
 .../samza/table/TableConfigGenerator.java       | 143 +++++++
 .../samza/table/caching/CachingTable.java       |  39 +-
 .../table/caching/CachingTableProvider.java     |   4 +-
 .../table/caching/guava/GuavaCacheTable.java    |  12 +-
 .../caching/guava/GuavaCacheTableProvider.java  |   1 +
 .../table/remote/RemoteReadWriteTable.java      |  39 +-
 .../samza/table/remote/RemoteReadableTable.java |  20 +-
 .../table/utils/DefaultTableReadMetrics.java    |  55 +++
 .../table/utils/DefaultTableWriteMetrics.java   |  63 +++
 .../samza/table/utils/TableMetricsUtil.java     | 101 +++++
 .../apache/samza/task/SystemTimerScheduler.java |   1 +
 .../org/apache/samza/testUtils/TestClock.java   |  45 +++
 .../java/org/apache/samza/util/StreamUtil.java  |  90 +++++
 .../samza/zk/ZkBarrierForVersionUpgrade.java    |  19 +-
 .../java/org/apache/samza/zk/ZkController.java  |  39 --
 .../org/apache/samza/zk/ZkControllerImpl.java   | 163 --------
 .../apache/samza/zk/ZkControllerListener.java   |  37 --
 .../org/apache/samza/zk/ZkJobCoordinator.java   | 172 +++++---
 .../org/apache/samza/zk/ZkLeaderElector.java    |  17 +-
 .../main/java/org/apache/samza/zk/ZkUtils.java  |  72 ++--
 .../apache/samza/checkpoint/OffsetManager.scala |   6 +-
 .../org/apache/samza/config/JobConfig.scala     |  13 +
 .../org/apache/samza/config/MetricsConfig.scala |   3 +
 .../org/apache/samza/config/StorageConfig.scala |  17 +-
 .../org/apache/samza/config/StreamConfig.scala  |   2 +-
 .../org/apache/samza/config/TaskConfig.scala    |   8 +-
 .../org/apache/samza/container/RunLoop.scala    |   5 +-
 .../apache/samza/container/SamzaContainer.scala | 142 +++++--
 .../samza/container/SamzaContainerMetrics.scala |   3 +
 .../apache/samza/container/TaskInstance.scala   | 140 +++++--
 .../diagnostics/DiagnosticsExceptionEvent.java  |  90 +++++
 .../samza/job/local/ThreadJobFactory.scala      |   6 +-
 .../ContainerProcessManagerMetrics.scala        |  22 +-
 .../apache/samza/metrics/MetricsHelper.scala    |   6 +-
 .../samza/metrics/MetricsRegistryMap.scala      |  15 +
 .../samza/metrics/reporter/JmxReporter.scala    |  29 +-
 .../apache/samza/metrics/reporter/Metrics.scala |   9 +-
 .../samza/metrics/reporter/MetricsHeader.scala  |   3 +
 .../reporter/MetricsSnapshotReporter.scala      |  67 +++-
 .../MetricsSnapshotReporterFactory.scala        |  11 +-
 .../serializers/MetricsSnapshotSerdeV2.java     |  75 ++++
 .../MetricsSnapshotSerdeV2Factory.java          |  31 ++
 .../samza/storage/TaskStorageManager.scala      | 117 +-----
 .../apache/samza/system/SSPMetadataCache.java   | 126 ++++++
 .../system/chooser/BootstrappingChooser.scala   |  35 +-
 .../scala/org/apache/samza/util/FileUtil.scala  |  21 +-
 .../org/apache/samza/util/ScalaJavaUtil.scala   |  12 +
 .../main/scala/org/apache/samza/util/Util.scala |  22 --
 .../TestHostAwareContainerAllocator.java        |  10 +-
 .../MockCoordinatorStreamSystemFactory.java     |   8 +-
 .../samza/execution/TestExecutionPlanner.java   |  42 +-
 .../apache/samza/execution/TestJobGraph.java    |  28 +-
 .../execution/TestJobGraphJsonGenerator.java    |  45 +--
 .../org/apache/samza/execution/TestJobNode.java |  17 +-
 .../apache/samza/execution/TestStreamEdge.java  |  16 +-
 .../executors/TestKeyBasedExecutorService.java  |  84 ++++
 .../metrics/TestMetricsSnapshotReporter.java    | 115 ++++++
 .../samza/operators/TestJoinOperator.java       |  11 +-
 .../samza/operators/TestOperatorSpecGraph.java  |  23 +-
 .../samza/operators/TestStreamGraphSpec.java    | 336 ++++++----------
 .../operators/impl/TestOperatorImplGraph.java   | 274 +++++++------
 .../operators/impl/TestWindowOperator.java      |  16 +-
 .../impl/store/TestTimeSeriesStoreImpl.java     |   1 +
 .../impl/store/TestTimestampedValueSerde.java   |   1 +
 .../operators/spec/OperatorSpecTestUtils.java   |  14 +-
 .../samza/operators/spec/TestOperatorSpec.java  |  18 +-
 .../spec/TestPartitionByOperatorSpec.java       |  14 +-
 .../samza/processor/TestStreamProcessor.java    | 162 +++++++-
 .../runtime/TestAbstractApplicationRunner.java  | 391 -------------------
 .../runtime/TestApplicationRunnerMain.java      |  11 +-
 .../runtime/TestLocalApplicationRunner.java     |  22 +-
 .../serializers/TestMetricsSnapshotSerdeV2.java |  69 ++++
 .../samza/system/TestSSPMetadataCache.java      | 319 +++++++++++++++
 .../samza/table/caching/TestCachingTable.java   |  18 +-
 .../org/apache/samza/task/TestAsyncRunLoop.java |   2 +-
 .../apache/samza/task/TestTaskFactoryUtil.java  |   5 +-
 .../apache/samza/testUtils/StreamTestUtils.java |  39 ++
 .../org/apache/samza/testUtils/TestClock.java   |  45 ---
 .../org/apache/samza/util/TestStreamUtil.java   | 337 ++++++++++++++++
 .../apache/samza/zk/TestZkJobCoordinator.java   |  19 +-
 .../java/org/apache/samza/zk/TestZkUtils.java   |  21 +-
 .../factories/TestPropertiesConfigFactory.scala |   3 +-
 .../samza/container/TestSamzaContainer.scala    |  44 ++-
 .../samza/container/TestTaskInstance.scala      |   5 +-
 .../org/apache/samza/job/TestJobRunner.scala    |  11 +-
 .../serializers/TestMetricsSnapshotSerde.scala  |   3 +-
 .../samza/storage/TestTaskStorageManager.scala  | 154 ++++----
 .../org/apache/samza/util/TestFileUtil.scala    |  22 ++
 .../hdfs/TestHdfsSystemProducerTestSuite.scala  |   5 +-
 .../samza/system/kafka/KafkaStreamSpec.java     |  15 +-
 .../org/apache/samza/config/KafkaConfig.scala   |   4 +-
 .../samza/config/RegExTopicGenerator.scala      |   8 +-
 .../apache/samza/system/kafka/BrokerProxy.scala |   8 +-
 .../samza/system/kafka/KafkaSystemAdmin.scala   |   4 +-
 .../system/kafka/KafkaSystemConsumer.scala      |  25 +-
 .../kafka/KafkaSystemConsumerMetrics.scala      |   6 +-
 .../util/ClientUtilTopicMetadataStore.scala     |   3 +
 .../scala/org/apache/samza/util/KafkaUtil.scala |   6 +-
 .../samza/system/kafka/TestKafkaStreamSpec.java |   6 +-
 .../kafka/TestKafkaCheckpointManager.scala      |   4 +-
 .../system/kafka/TestKafkaSystemAdmin.scala     |   2 +-
 .../system/kafka/TestKafkaSystemConsumer.scala  |   2 +-
 .../kv/inmemory/InMemoryTableDescriptor.java    |   3 +-
 .../storage/kv/RocksDbTableDescriptor.java      |   3 +-
 .../samza/storage/kv/RocksDbKeyValueStore.scala |   9 +-
 .../kv/BaseLocalStoreBackedTableDescriptor.java |  21 +
 .../kv/BaseLocalStoreBackedTableProvider.java   |  12 +
 .../kv/LocalStoreBackedReadWriteTable.java      |  33 +-
 .../kv/LocalStoreBackedReadableTable.java       |  33 +-
 .../TestLocalBaseStoreBackedTableProvider.java  |   6 +-
 .../log4j/SimpleDiagnosticsAppender.java        | 101 +++++
 .../apache/samza/rest/SamzaRestApplication.java |   1 -
 .../org/apache/samza/rest/SamzaRestService.java |  15 +-
 .../apache/samza/sql/avro/AvroRelConverter.java |  17 +-
 .../samza/sql/data/SamzaSqlRelMessage.java      |  85 +---
 .../sql/runner/SamzaSqlApplicationRunner.java   |   2 -
 .../SamzaSqlRelMessageSerdeFactory.java         |   2 +-
 .../SamzaSqlRelRecordSerdeFactory.java          |  18 +-
 .../samza/sql/TestSamzaSqlRelMessageSerde.java  | 102 -----
 .../samza/sql/TestSamzaSqlRelRecordSerde.java   |  86 ----
 .../samza/sql/avro/TestAvroRelConversion.java   |  17 +-
 .../samza/sql/avro/schemas/ComplexRecord.java   |   2 +-
 .../apache/samza/sql/avro/schemas/MyFixed.java  |   5 +-
 .../samza/sql/data/TestSamzaSqlRelMessage.java  |  18 +
 .../TestSamzaSqlRelMessageSerde.java            | 102 +++++
 .../serializers/TestSamzaSqlRelRecordSerde.java |  85 ++++
 .../samza/sql/system/TestAvroSystemFactory.java |   9 +
 .../sql/translator/TestQueryTranslator.java     | 235 ++++++-----
 .../config/standalone.failure.test.properties   |  45 +++
 .../test/framework/MessageStreamAssert.java     | 192 +++++++++
 .../samza/test/framework/StreamAssert.java      | 220 ++++-------
 .../apache/samza/test/framework/TestRunner.java | 367 +++++++++++++++++
 .../test/framework/stream/CollectionStream.java | 204 ++++++++++
 .../system/CollectionStreamSystemSpec.java      |  79 ++++
 .../integration/LocalApplicationRunnerMain.java |  63 +++
 .../test/integration/NegateNumberTask.java      |   4 +-
 .../TestStandaloneIntegrationApplication.java   |  42 ++
 samza-test/src/main/python/configs/kafka.json   |   2 +-
 samza-test/src/main/python/deployment.py        |   4 +-
 samza-test/src/main/python/requirements.txt     |   1 +
 .../src/main/python/standalone_deployment.py    | 123 ++++++
 .../main/python/standalone_integration_tests.py |  29 ++
 samza-test/src/main/python/stream_processor.py  | 121 ++++++
 .../python/tests/standalone_failure_tests.py    | 311 +++++++++++++++
 samza-test/src/main/python/tests/zk_client.py   | 129 ++++++
 .../test/performance/TestPerformanceTask.scala  |   4 +-
 .../samza/processor/TestZkStreamProcessor.java  |   4 +-
 .../TestZkStreamProcessorFailures.java          |   4 +-
 .../processor/TestZkStreamProcessorSession.java |   4 +-
 .../AsyncStreamTaskIntegrationTest.java         | 144 +++++++
 .../test/framework/BroadcastAssertApp.java      |  58 +++
 .../samza/test/framework/MyAsyncStreamTask.java |  67 ++++
 .../samza/test/framework/MyStreamTestTask.java  |  38 ++
 .../StreamApplicationIntegrationTest.java       | 132 +++++++
 ...StreamApplicationIntegrationTestHarness.java | 302 ++++++++++++++
 .../framework/StreamTaskIntegrationTest.java    | 138 +++++++
 .../samza/test/framework/TestTimerApp.java      |  86 ++++
 .../apache/samza/test/framework/TimerTest.java  |  50 +++
 .../samza/test/operator/BroadcastAssertApp.java |  59 ---
 .../test/operator/RepartitionJoinWindowApp.java |  13 +-
 ...StreamApplicationIntegrationTestHarness.java | 277 -------------
 .../operator/TestRepartitionJoinWindowApp.java  |  13 +-
 .../test/operator/TestRepartitionWindowApp.java |   1 +
 .../processor/TestZkLocalApplicationRunner.java | 145 +++++--
 .../test/samzasql/TestSamzaSqlEndToEnd.java     |   5 +-
 .../table/TestTableDescriptorsProvider.java     | 164 ++++++++
 .../apache/samza/test/timer/TestTimerApp.java   |  87 -----
 .../org/apache/samza/test/timer/TimerTest.java  |  51 ---
 .../tools/json/JsonRelConverterFactory.java     |  14 +-
 .../org/apache/samza/config/YarnConfig.java     |  56 ++-
 .../job/yarn/YarnClusterResourceManager.java    |   7 +
 .../apache/samza/job/yarn/ClientHelper.scala    |   7 +-
 .../webapp/ApplicationMasterRestServlet.scala   |   3 +
 253 files changed, 9637 insertions(+), 3594 deletions(-)
----------------------------------------------------------------------



[25/47] samza git commit: added JobModelManager to ThreadJob

Posted by bo...@apache.org.
added JobModelManager to ThreadJob


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/22034947
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/22034947
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/22034947

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 22034947b998d3604bc3911a417b9c1e761bb90f
Parents: c14557f
Author: Boris S <bo...@apache.org>
Authored: Fri Aug 31 14:36:51 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Fri Aug 31 14:36:51 2018 -0700

----------------------------------------------------------------------
 .../stream/CoordinatorStreamSystemConsumer.java |   4 +-
 .../org/apache/samza/job/local/ThreadJob.scala  |   5 +-
 .../samza/job/local/ThreadJobFactory.scala      |   2 +-
 .../apache/samza/job/local/TestThreadJob.scala  |   9 ++
 .../system/kafka/NewKafkaSystemConsumer.java    | 121 +++++++++----------
 .../integration/TestShutdownStatefulTask.scala  |   4 +-
 6 files changed, 75 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/22034947/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java b/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
index 38255a2..0bdb874 100644
--- a/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
+++ b/samza-core/src/main/java/org/apache/samza/coordinator/stream/CoordinatorStreamSystemConsumer.java
@@ -176,7 +176,7 @@ public class CoordinatorStreamSystemConsumer {
             valueMap = messageSerde.fromBytes((byte[]) envelope.getMessage());
           }
           CoordinatorStreamMessage coordinatorStreamMessage = new CoordinatorStreamMessage(keyArray, valueMap);
-          log.debug("Received coordinator stream message: {}", coordinatorStreamMessage);
+          log.info("Received coordinator stream message: {}", coordinatorStreamMessage);
           // Remove any existing entry. Set.add() does not add if the element already exists.
           if (bootstrappedMessages.remove(coordinatorStreamMessage)) {
             log.debug("Removed duplicate message: {}", coordinatorStreamMessage);
@@ -194,7 +194,7 @@ public class CoordinatorStreamSystemConsumer {
         }
 
         bootstrappedStreamSet = Collections.unmodifiableSet(bootstrappedMessages);
-        log.debug("Bootstrapped configuration: {}", configMap);
+        log.info("Bootstrapped configuration: {}", configMap);
         isBootstrapped = true;
       } catch (Exception e) {
         throw new SamzaException(e);

http://git-wip-us.apache.org/repos/asf/samza/blob/22034947/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
index a61a297..33dde52 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJob.scala
@@ -19,11 +19,12 @@
 
 package org.apache.samza.job.local
 
+import org.apache.samza.coordinator.JobModelManager
 import org.apache.samza.job.ApplicationStatus.{New, Running, SuccessfulFinish, UnsuccessfulFinish}
 import org.apache.samza.job.{ApplicationStatus, StreamJob}
 import org.apache.samza.util.Logging
 
-class ThreadJob(runnable: Runnable) extends StreamJob with Logging {
+class ThreadJob(runnable: Runnable, val jobModelManager: JobModelManager) extends StreamJob with Logging {
   @volatile var jobStatus: Option[ApplicationStatus] = None
   var thread: Thread = null
 
@@ -43,6 +44,8 @@ class ThreadJob(runnable: Runnable) extends StreamJob with Logging {
             jobStatus = Some(UnsuccessfulFinish)
             throw e
           }
+        } finally {
+          jobModelManager.stop
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/samza/blob/22034947/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
index 0b472aa..4b08721 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
@@ -110,7 +110,7 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
         taskFactory)
       container.setContainerListener(containerListener)
 
-      val threadJob = new ThreadJob(container)
+      val threadJob = new ThreadJob(container, coordinator)
       threadJob
     } finally {
       coordinator.stop

http://git-wip-us.apache.org/repos/asf/samza/blob/22034947/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala b/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
index 4f3f511..b1de215 100644
--- a/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
+++ b/samza-core/src/test/scala/org/apache/samza/job/local/TestThreadJob.scala
@@ -19,6 +19,7 @@
 
 package org.apache.samza.job.local
 
+import org.apache.samza.coordinator.JobModelManager
 import org.junit.Assert._
 import org.junit.Test
 import org.apache.samza.job.ApplicationStatus
@@ -29,6 +30,10 @@ class TestThreadJob {
     val job = new ThreadJob(new Runnable {
       override def run {
       }
+    }, new JobModelManager(null) {
+      override def stop: Unit = {
+
+      }
     })
     job.submit
     job.waitForFinish(999999)
@@ -40,6 +45,10 @@ class TestThreadJob {
       override def run {
         Thread.sleep(999999)
       }
+    }, new JobModelManager(null) {
+      override def stop: Unit = {
+
+      }
     })
     job.submit
     job.waitForFinish(500)

http://git-wip-us.apache.org/repos/asf/samza/blob/22034947/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
index b745628..e34812f 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/NewKafkaSystemConsumer.java
@@ -1,3 +1,4 @@
+
 /*
  *
  * Licensed to the Apache Software Foundation (ASF) under one
@@ -21,47 +22,38 @@
 
 package org.apache.samza.system.kafka;
 
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.stream.Collectors;
 import kafka.common.TopicAndPartition;
 import org.apache.kafka.clients.consumer.Consumer;
-import org.apache.kafka.clients.consumer.ConsumerConfig;
 import org.apache.kafka.clients.consumer.KafkaConsumer;
 import org.apache.kafka.clients.consumer.KafkaConsumerConfig;
 import org.apache.kafka.common.TopicPartition;
-import org.apache.kafka.common.serialization.ByteArrayDeserializer;
-import org.apache.kafka.common.serialization.Deserializer;
 import org.apache.samza.Partition;
 import org.apache.samza.SamzaException;
 import org.apache.samza.config.Config;
 import org.apache.samza.config.KafkaConfig;
-import org.apache.samza.config.StreamConfig;
 import org.apache.samza.system.IncomingMessageEnvelope;
 import org.apache.samza.system.SystemConsumer;
-import org.apache.samza.system.SystemStream;
 import org.apache.samza.system.SystemStreamPartition;
 import org.apache.samza.util.BlockingEnvelopeMap;
 import org.apache.samza.util.Clock;
-import org.apache.samza.util.KafkaUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import scala.Option;
-import scala.collection.JavaConversions;
 
 
-public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements SystemConsumer{
+public class NewKafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements SystemConsumer {
 
   private static final Logger LOG = LoggerFactory.getLogger(NewKafkaSystemConsumer.class);
 
   private static final long FETCH_THRESHOLD = 50000;
   private static final long FETCH_THRESHOLD_BYTES = -1L;
-  private final Consumer<K,V> kafkaConsumer;
+  private final Consumer<K, V> kafkaConsumer;
   private final String systemName;
   private final KafkaSystemConsumerMetrics samzaConsumerMetrics;
   private final String clientId;
@@ -78,8 +70,8 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
   private KafkaConsumerProxy proxy;
 
   /* package private */final Map<TopicPartition, String> topicPartitions2Offset = new HashMap<>();
-  /* package private */long perPartitionFetchThreshold;
-  /* package private */long perPartitionFetchThresholdBytes;
+  /* package private */ long perPartitionFetchThreshold;
+  /* package private */ long perPartitionFetchThresholdBytes;
 
   // TODO - consider new class for KafkaSystemConsumerMetrics
 
@@ -88,15 +80,10 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
    * @param config
    * @param metrics
    */
-  public NewKafkaSystemConsumer(
-      Consumer<K,V> kafkaConsumer,
-      String systemName,
-      Config config,
-      String clientId,
-      KafkaSystemConsumerMetrics metrics,
-      Clock clock) {
+  protected NewKafkaSystemConsumer(Consumer<K, V> kafkaConsumer, String systemName, Config config, String clientId,
+      KafkaSystemConsumerMetrics metrics, Clock clock) {
 
-    super(metrics.registry(),clock, metrics.getClass().getName());
+    super(metrics.registry(), clock, metrics.getClass().getName());
 
     this.samzaConsumerMetrics = metrics;
     this.clientId = clientId;
@@ -109,26 +96,20 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
     this.fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName);
 
     LOG.info(String.format(
-        "Created SamzaLiKafkaSystemConsumer for system=%s, clientId=%s, metricName=%s with liKafkaConsumer=%s",
-        systemName, clientId, metricName, this.kafkaConsumer.toString()));
+        "Created SamzaKafkaSystemConsumer for system=%s, clientId=%s, metricName=%s with KafkaConsumer=%s", systemName,
+        clientId, metricName, this.kafkaConsumer.toString()));
   }
 
-  public static <K, V> NewKafkaSystemConsumer getNewKafkaSystemConsumer(
-      String systemName,
-      Config config,
-      String clientId,
-      KafkaSystemConsumerMetrics metrics,
-      Clock clock) {
+  public static <K, V> NewKafkaSystemConsumer getNewKafkaSystemConsumer(String systemName, Config config,
+      String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) {
+
+    System.out.println("GETTING FOR " + systemName);
 
+    System.out.printf("RETURNING NEW ONE");
     // extract consumer configs and create kafka consumer
     KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
 
-    return new NewKafkaSystemConsumer(kafkaConsumer,
-        systemName,
-        config,
-        clientId,
-        metrics,
-        clock);
+    return new NewKafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
   }
 
   /**
@@ -146,7 +127,8 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
     KafkaConsumerConfig consumerConfig =
         KafkaConsumerConfig.getKafkaSystemConsumerConfig(config, systemName, clientId, injectProps);
 
-    LOG.info("==============>Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}", systemName, consumerConfig.originals());
+    LOG.info("==============>Consumer properties in getKafkaConsumerImpl: systemName: {}, consumerProperties: {}",
+        systemName, consumerConfig.originals());
 
     return new KafkaConsumer<>(consumerConfig.originals());
   }
@@ -157,7 +139,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
       LOG.warn("attempting to start the consumer for the second (or more) time.");
       return;
     }
-    if(stopped.get()) {
+    if (stopped.get()) {
       LOG.warn("attempting to start a stopped consumer");
       return;
     }
@@ -197,8 +179,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
     messageSink = new KafkaConsumerMessageSink();
 
     // create the thread with the consumer
-    proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink,
-        samzaConsumerMetrics, metricName);
+    proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, samzaConsumerMetrics, metricName);
 
     LOG.info("==============>Created consumer proxy: " + proxy);
   }
@@ -231,8 +212,10 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
       proxy.addTopicPartition(topicPartitions2SSP.get(tp), startingOffset);
     });
 
+    System.out.println("#####################started " + this + "; kc=" + kafkaConsumer);
     // start the proxy thread
     if (proxy != null && !proxy.isRunning()) {
+      System.out.println("#####################starting proxy " + proxy);
       proxy.start();
     }
   }
@@ -242,33 +225,37 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
     KafkaConfig kafkaConfig = new KafkaConfig(config);
     Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName);
     long fetchThreshold = FETCH_THRESHOLD;
-    if(fetchThresholdOption.isDefined()) {
+    if (fetchThresholdOption.isDefined()) {
       fetchThreshold = Long.valueOf(fetchThresholdOption.get());
       LOG.info("fetchThresholdOption is defined. fetchThreshold=" + fetchThreshold);
     }
     Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName);
     long fetchThresholdBytes = FETCH_THRESHOLD_BYTES;
-    if(fetchThresholdBytesOption.isDefined()) {
+    if (fetchThresholdBytesOption.isDefined()) {
       fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get());
       LOG.info("fetchThresholdBytesOption is defined. fetchThresholdBytes=" + fetchThresholdBytes);
     }
     LOG.info("fetchThresholdBytes = " + fetchThresholdBytes + "; fetchThreshold=" + fetchThreshold);
-    LOG.info("topicPartitions2Offset #=" + topicPartitions2Offset.size() + "; topicPartition2SSP #=" + topicPartitions2SSP.size());
+    LOG.info("topicPartitions2Offset #=" + topicPartitions2Offset.size() + "; topicPartition2SSP #="
+        + topicPartitions2SSP.size());
 
     if (topicPartitions2SSP.size() > 0) {
       perPartitionFetchThreshold = fetchThreshold / topicPartitions2SSP.size();
       LOG.info("perPartitionFetchThreshold=" + perPartitionFetchThreshold);
-      if(fetchThresholdBytesEnabled) {
+      if (fetchThresholdBytesEnabled) {
         // currently this feature cannot be enabled, because we do not have the size of the messages available.
         // messages get double buffered, hence divide by 2
         perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / topicPartitions2SSP.size();
-        LOG.info("perPartitionFetchThresholdBytes is enabled. perPartitionFetchThresholdBytes=" + perPartitionFetchThresholdBytes);
+        LOG.info("perPartitionFetchThresholdBytes is enabled. perPartitionFetchThresholdBytes="
+            + perPartitionFetchThresholdBytes);
       }
     }
   }
 
   @Override
   public void stop() {
+    System.out.println("##################### stopping " + this + "; kc=" + kafkaConsumer);
+
     if (!stopped.compareAndSet(false, true)) {
       LOG.warn("attempting to stop stopped consumer.");
       return;
@@ -276,8 +263,10 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
 
     LOG.warn("Stopping SamzaRawLiKafkaConsumer + " + this);
     // stop the proxy (with 5 minutes timeout)
-    if(proxy != null)
+    if (proxy != null) {
+      System.out.println("##################### stopping proxy " + proxy);
       proxy.stop(TimeUnit.MINUTES.toMillis(5));
+    }
 
     try {
       synchronized (kafkaConsumer) {
@@ -293,6 +282,14 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
    */
   @Override
   public void register(SystemStreamPartition systemStreamPartition, String offset) {
+    if (started.get()) {
+      String msg =
+          String.format("Trying to register partition after consumer has been started. sn=%s, ssp=%s", systemName,
+              systemStreamPartition);
+      LOG.error(msg);
+      throw new SamzaException(msg);
+    }
+
     if (!systemStreamPartition.getSystem().equals(systemName)) {
       LOG.warn("ignoring SSP " + systemStreamPartition + ", because this consumer's system is " + systemName);
       return;
@@ -332,16 +329,17 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
 
   @Override
   public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
-      Set<SystemStreamPartition> systemStreamPartitions, long timeout)
-      throws InterruptedException {
+      Set<SystemStreamPartition> systemStreamPartitions, long timeout) throws InterruptedException {
 
     // check if the proxy is running
-    if(!proxy.isRunning()) {
+    if (!proxy.isRunning()) {
       stop();
       if (proxy.getFailureCause() != null) {
         String message = "LiKafkaConsumerProxy has stopped";
-        if(proxy.getFailureCause() instanceof org.apache.kafka.common.errors.TopicAuthorizationException)
-          message += " due to TopicAuthorizationException Please refer to go/samzaacluserguide to correctly set up acls for your topic";
+        if (proxy.getFailureCause() instanceof org.apache.kafka.common.errors.TopicAuthorizationException) {
+          message +=
+              " due to TopicAuthorizationException Please refer to go/samzaacluserguide to correctly set up acls for your topic";
+        }
         throw new SamzaException(message, proxy.getFailureCause());
       } else {
         LOG.warn("Failure cause not populated for LiKafkaConsumerProxy");
@@ -349,7 +347,9 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
       }
     }
 
-    return super.poll(systemStreamPartitions, timeout);
+    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> res = super.poll(systemStreamPartitions, timeout);
+    LOG.info("=============================>. Res in POLL:" + res.toString());
+    return res;
   }
 
   public static TopicAndPartition toTopicAndPartition(TopicPartition tp) {
@@ -376,15 +376,6 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
     return systemName;
   }
 
-  private static Set<SystemStream> getIntermediateStreams(Config config) {
-    StreamConfig streamConfig = new StreamConfig(config);
-    Collection<String> streamIds = JavaConversions.asJavaCollection(streamConfig.getStreamIds());
-    return streamIds.stream()
-        .filter(streamConfig::getIsIntermediateStream)
-        .map(id -> streamConfig.streamIdToSystemStream(id))
-        .collect(Collectors.toSet());
-  }
-
   ////////////////////////////////////
   // inner class for the message sink
   ////////////////////////////////////
@@ -395,10 +386,11 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
     }
 
     boolean needsMoreMessages(SystemStreamPartition ssp) {
-      if(LOG.isDebugEnabled()) {
+      if (LOG.isDebugEnabled()) {
         LOG.debug("needsMoreMessages from following SSP: {}. fetchLimitByBytes enabled={}; messagesSizeInQueue={};"
-                + "(limit={}); messagesNumInQueue={}(limit={};", ssp, fetchThresholdBytesEnabled, getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes,
-            getNumMessagesInQueue(ssp), perPartitionFetchThreshold);
+                + "(limit={}); messagesNumInQueue={}(limit={};", ssp, fetchThresholdBytesEnabled,
+            getMessagesSizeInQueue(ssp), perPartitionFetchThresholdBytes, getNumMessagesInQueue(ssp),
+            perPartitionFetchThreshold);
       }
 
       if (fetchThresholdBytesEnabled) {
@@ -415,8 +407,7 @@ public class NewKafkaSystemConsumer<K,V> extends BlockingEnvelopeMap implements
         put(ssp, envelope);
       } catch (InterruptedException e) {
         throw new SamzaException(
-            String.format("Interrupted while trying to add message with offset %s for ssp %s",
-                envelope.getOffset(),
+            String.format("Interrupted while trying to add message with offset %s for ssp %s", envelope.getOffset(),
                 ssp));
       }
     }

http://git-wip-us.apache.org/repos/asf/samza/blob/22034947/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
----------------------------------------------------------------------
diff --git a/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala b/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
index e4d47d1..a42433c 100644
--- a/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
+++ b/samza-test/src/test/scala/org/apache/samza/test/integration/TestShutdownStatefulTask.scala
@@ -82,13 +82,15 @@ class TestShutdownStatefulTask extends StreamTaskTestUtil {
     assertEquals(0, task.received.size)
 
     // Send some messages to input stream.
+    System.out.println("************************BEFORE DONE sending")
     send(task, "1")
+    System.out.println("************************FIRST DONE sending")
     send(task, "2")
     send(task, "3")
     send(task, "2")
     send(task, "99")
     send(task, "99")
-
+    System.out.println("************************DONE sending")
     stopJob(job)
 
   }


[07/47] samza git commit: Merge branch 'master' of https://github.com/sborya/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/sborya/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/06b1ac36
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/06b1ac36
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/06b1ac36

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 06b1ac36e9c67a3bd558a0fa592639b16fcbfda9
Parents: 5e6f5fb 010fa16
Author: Boris Shkolnik <bs...@linkedin.com>
Authored: Wed Oct 25 09:50:55 2017 -0700
Committer: Boris Shkolnik <bs...@linkedin.com>
Committed: Wed Oct 25 09:50:55 2017 -0700

----------------------------------------------------------------------

----------------------------------------------------------------------



[16/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/57fca52c
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/57fca52c
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/57fca52c

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 57fca52cc48a37622aa6b8b1f71a37733c378524
Parents: afb34d9 ce57657
Author: Boris S <bo...@apache.org>
Authored: Wed Aug 15 12:13:37 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Aug 15 12:13:37 2018 -0700

----------------------------------------------------------------------
 .../org/apache/samza/config/MetricsConfig.scala |  1 +
 .../scala/org/apache/samza/job/JobRunner.scala  | 43 ++++++++++++++++----
 2 files changed, 35 insertions(+), 9 deletions(-)
----------------------------------------------------------------------



[32/47] samza git commit: formatting

Posted by bo...@apache.org.
formatting


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/ceb0f6ae
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/ceb0f6ae
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/ceb0f6ae

Branch: refs/heads/NewKafkaSystemConsumer
Commit: ceb0f6aef45822191e29b6f43b9df76168c161e8
Parents: 332a048
Author: Boris S <bo...@apache.org>
Authored: Wed Sep 5 14:13:47 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Sep 5 14:13:47 2018 -0700

----------------------------------------------------------------------
 .../scala/org/apache/samza/job/local/ThreadJobFactory.scala | 3 ++-
 .../org/apache/samza/system/kafka/KafkaConsumerProxy.java   | 9 +--------
 .../samza/system/kafka/KafkaSystemConsumerMetrics.scala     | 8 ++++----
 .../org/apache/samza/validation/YarnJobValidationTool.java  | 2 +-
 .../samza/job/yarn/TestSamzaYarnAppMasterService.scala      | 4 +++-
 5 files changed, 11 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/ceb0f6ae/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
----------------------------------------------------------------------
diff --git a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
index 15aa5a6..0d71303 100644
--- a/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
+++ b/samza-core/src/main/scala/org/apache/samza/job/local/ThreadJobFactory.scala
@@ -50,7 +50,7 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
     val changelogStreamManager = new ChangelogStreamManager(coordinatorStreamManager)
 
     val coordinator = JobModelManager(coordinatorStreamManager.getConfig, changelogStreamManager.readPartitionMapping())
-    coordinatorStreamManager.stop()
+
     val jobModel = coordinator.jobModel
 
     val taskPartitionMappings: mutable.Map[TaskName, Integer] = mutable.Map[TaskName, Integer]()
@@ -116,6 +116,7 @@ class ThreadJobFactory extends StreamJobFactory with Logging {
       threadJob
     } finally {
       coordinator.stop
+      coordinatorStreamManager.stop()
       jmxServer.stop
     }
   }

http://git-wip-us.apache.org/repos/asf/samza/blob/ceb0f6ae/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
index a6272cd..7232a0a 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaConsumerProxy.java
@@ -99,20 +99,14 @@ public class KafkaConsumerProxy<K, V> {
           "Samza KafkaConsumerProxy Poll " + consumerPollThread.getName() + " - " + systemName);
       consumerPollThread.start();
 
-      System.out.println("THREAD: starting" + consumerPollThread.getName());
-
-
       // we need to wait until the thread starts
       while (!isRunning) {
         try {
           consumerPollThreadStartLatch.await(3000, TimeUnit.MILLISECONDS);
         } catch (InterruptedException e) {
-          LOG.info("WTH");
+          LOG.info("Got InterruptedException", e);
         }
       }
-      new Exception().printStackTrace(System.out);
-      System.out.println("THREAD: started" + consumerPollThread.getName());
-
     } else {
       LOG.debug("Tried to start an already started LiKafkaConsumerProxy (%s). Ignoring.", this.toString());
     }
@@ -220,7 +214,6 @@ public class KafkaConsumerProxy<K, V> {
         kafkaConsumer.resume(topicPartitionsToPause);
       }
     } catch (InvalidOffsetException e) {
-      LOG.error("LiKafkaConsumer with invalidOffsetException", e);
       // If the consumer has thrown this exception it means that auto reset is not set for this consumer.
       // So we just rethrow.
       LOG.error("Caught InvalidOffsetException in pollConsumer", e);

http://git-wip-us.apache.org/repos/asf/samza/blob/ceb0f6ae/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
index 415bd38..7dce261 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumerMetrics.scala
@@ -47,10 +47,10 @@ class KafkaSystemConsumerMetrics(val systemName: String = "unknown", val registr
   }
 
   def registerClientProxy(clientName: String) {
-    clientBytesRead.put(clientName, newCounter("%s-%s-bytes-read" format clientName))
-    clientReads.put((clientName), newCounter("%s-%s-messages-read" format clientName))
-    clientSkippedFetchRequests.put((clientName), newCounter("%s-%s-skipped-fetch-requests" format clientName))
-    topicPartitions.put(clientName, newGauge("%s-%s-topic-partitions" format clientName, 0))
+    clientBytesRead.put(clientName, newCounter("%s-bytes-read" format clientName))
+    clientReads.put((clientName), newCounter("%s-messages-read" format clientName))
+    clientSkippedFetchRequests.put((clientName), newCounter("%s-skipped-fetch-requests" format clientName))
+    topicPartitions.put(clientName, newGauge("%s-topic-partitions" format clientName, 0))
   }
 
   // java friendlier interfaces

http://git-wip-us.apache.org/repos/asf/samza/blob/ceb0f6ae/samza-yarn/src/main/java/org/apache/samza/validation/YarnJobValidationTool.java
----------------------------------------------------------------------
diff --git a/samza-yarn/src/main/java/org/apache/samza/validation/YarnJobValidationTool.java b/samza-yarn/src/main/java/org/apache/samza/validation/YarnJobValidationTool.java
index 0b405f0..b30b896 100644
--- a/samza-yarn/src/main/java/org/apache/samza/validation/YarnJobValidationTool.java
+++ b/samza-yarn/src/main/java/org/apache/samza/validation/YarnJobValidationTool.java
@@ -157,7 +157,7 @@ public class YarnJobValidationTool {
     coordinatorStreamManager.start();
     coordinatorStreamManager.bootstrap();
     ChangelogStreamManager changelogStreamManager = new ChangelogStreamManager(coordinatorStreamManager);
-    JobModelManager jobModelManager = JobModelManager.apply(coordinatorStreamManager, changelogStreamManager.readPartitionMapping());
+    JobModelManager jobModelManager = JobModelManager.apply(coordinatorStreamManager.getConfig(), changelogStreamManager.readPartitionMapping());
     validator.init(config);
     Map<String, String> jmxUrls = jobModelManager.jobModel().getAllContainerToHostValues(SetContainerHostMapping.JMX_TUNNELING_URL_KEY);
     for (Map.Entry<String, String> entry : jmxUrls.entrySet()) {

http://git-wip-us.apache.org/repos/asf/samza/blob/ceb0f6ae/samza-yarn/src/test/scala/org/apache/samza/job/yarn/TestSamzaYarnAppMasterService.scala
----------------------------------------------------------------------
diff --git a/samza-yarn/src/test/scala/org/apache/samza/job/yarn/TestSamzaYarnAppMasterService.scala b/samza-yarn/src/test/scala/org/apache/samza/job/yarn/TestSamzaYarnAppMasterService.scala
index da23b91..1ad4522 100644
--- a/samza-yarn/src/test/scala/org/apache/samza/job/yarn/TestSamzaYarnAppMasterService.scala
+++ b/samza-yarn/src/test/scala/org/apache/samza/job/yarn/TestSamzaYarnAppMasterService.scala
@@ -106,7 +106,9 @@ class TestSamzaYarnAppMasterService {
     coordinatorStreamManager.start
     coordinatorStreamManager.bootstrap
     val changelogPartitionManager = new ChangelogStreamManager(coordinatorStreamManager)
-    JobModelManager(coordinatorStreamManager, changelogPartitionManager.readPartitionMapping())
+    val jobModelManager = JobModelManager(coordinatorStreamManager.getConfig, changelogPartitionManager.readPartitionMapping())
+    coordinatorStreamManager.stop()
+    jobModelManager
   }
 
   private def getDummyConfig: Config = new MapConfig(Map[String, String](


[21/47] samza git commit: Merge branch 'master' into NewConsumer

Posted by bo...@apache.org.
Merge branch 'master' into NewConsumer


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/34ae8ba2
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/34ae8ba2
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/34ae8ba2

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 34ae8ba2dd18c7d7af46464033b330a35966db3f
Parents: c0ea25c 7f7b559
Author: Boris S <bo...@apache.org>
Authored: Wed Aug 29 10:52:37 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Wed Aug 29 10:52:37 2018 -0700

----------------------------------------------------------------------
 .../samza/execution/ExecutionPlanner.java       | 20 +++++---
 .../runtime/AbstractApplicationRunner.java      | 20 +++++---
 .../samza/runtime/LocalApplicationRunner.java   | 41 ++++++++--------
 .../samza/runtime/RemoteApplicationRunner.java  | 36 +++++++-------
 .../org/apache/samza/config/MetricsConfig.scala | 11 +++--
 .../diagnostics/DiagnosticsExceptionEvent.java  |  6 +--
 .../scala/org/apache/samza/job/JobRunner.scala  |  2 +-
 .../reporter/MetricsSnapshotReporter.scala      | 35 +++++++-------
 .../MetricsSnapshotReporterFactory.scala        |  6 +--
 .../runtime/TestLocalApplicationRunner.java     | 50 ++++++++++++--------
 10 files changed, 129 insertions(+), 98 deletions(-)
----------------------------------------------------------------------



[23/47] samza git commit: Merge branch 'master' of https://github.com/apache/samza

Posted by bo...@apache.org.
Merge branch 'master' of https://github.com/apache/samza


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/8ab04b20
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/8ab04b20
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/8ab04b20

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 8ab04b209eec4d035f126259305ae84318737d2e
Parents: 7f7b559 9eadfa0
Author: Boris S <bo...@apache.org>
Authored: Thu Aug 30 11:43:21 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Thu Aug 30 11:43:21 2018 -0700

----------------------------------------------------------------------
 build.gradle                                    |   5 +-
 .../samza/metadatastore/MetadataStore.java      |  80 ++++
 .../metadatastore/MetadataStoreFactory.java     |  30 ++
 .../apache/samza/operators/MessageStream.java   |  12 +-
 .../org/apache/samza/operators/StreamGraph.java | 109 ++----
 .../descriptors/GenericInputDescriptor.java     |  43 +++
 .../descriptors/GenericOutputDescriptor.java    |  43 +++
 .../descriptors/GenericSystemDescriptor.java    |  67 ++++
 .../base/stream/InputDescriptor.java            | 187 +++++++++
 .../base/stream/OutputDescriptor.java           |  44 +++
 .../base/stream/StreamDescriptor.java           | 136 +++++++
 .../ExpandingInputDescriptorProvider.java       |  44 +++
 .../base/system/OutputDescriptorProvider.java   |  48 +++
 .../system/SimpleInputDescriptorProvider.java   |  43 +++
 .../base/system/SystemDescriptor.java           | 177 +++++++++
 .../TransformingInputDescriptorProvider.java    |  44 +++
 .../operators/functions/InputTransformer.java   |  45 +++
 .../operators/functions/StreamExpander.java     |  58 +++
 .../org/apache/samza/table/TableProvider.java   |  12 +-
 .../TestExpandingInputDescriptor.java           |  61 +++
 .../descriptors/TestGenericInputDescriptor.java | 123 ++++++
 .../TestGenericSystemDescriptor.java            |  63 +++
 .../descriptors/TestSimpleInputDescriptor.java  |  65 ++++
 .../TestTransformingInputDescriptor.java        |  66 ++++
 .../ExampleExpandingInputDescriptor.java        |  30 ++
 .../ExampleExpandingOutputDescriptor.java       |  29 ++
 .../ExampleExpandingSystemDescriptor.java       |  49 +++
 .../serde/ExampleSimpleInputDescriptor.java     |  30 ++
 .../serde/ExampleSimpleOutputDescriptor.java    |  29 ++
 .../serde/ExampleSimpleSystemDescriptor.java    |  43 +++
 .../ExampleTransformingInputDescriptor.java     |  30 ++
 .../ExampleTransformingOutputDescriptor.java    |  29 ++
 .../ExampleTransformingSystemDescriptor.java    |  43 +++
 .../apache/samza/container/LocalityManager.java | 126 +++---
 .../grouper/task/TaskAssignmentManager.java     | 109 ++++--
 .../CoordinatorStreamMetadataStoreFactory.java  |  36 ++
 .../metadatastore/CoordinatorStreamStore.java   | 188 +++++++++
 .../stream/CoordinatorStreamKeySerde.java       |  52 +++
 .../stream/CoordinatorStreamValueSerde.java     |  80 ++++
 .../samza/execution/JobGraphJsonGenerator.java  |   4 +-
 .../org/apache/samza/execution/JobNode.java     |  23 +-
 .../samza/operators/BaseTableDescriptor.java    |   2 +-
 .../samza/operators/MessageStreamImpl.java      |   2 +-
 .../apache/samza/operators/StreamGraphSpec.java | 152 +++++---
 .../descriptors/DelegatingSystemDescriptor.java |  70 ++++
 .../samza/operators/impl/InputOperatorImpl.java |  23 +-
 .../samza/operators/spec/InputOperatorSpec.java |  57 ++-
 .../samza/operators/spec/OperatorSpecs.java     |  11 +-
 .../samza/operators/spec/OutputStreamImpl.java  |  12 +
 .../stream/IntermediateMessageStreamImpl.java   |   2 +-
 .../apache/samza/processor/StreamProcessor.java |   2 +-
 .../runtime/AbstractApplicationRunner.java      |  12 +
 .../samza/table/TableConfigGenerator.java       |  11 +-
 .../table/caching/CachingTableDescriptor.java   |   3 +-
 .../table/caching/CachingTableProvider.java     |  56 +--
 .../guava/GuavaCacheTableDescriptor.java        |   3 +-
 .../caching/guava/GuavaCacheTableProvider.java  |  44 +--
 .../samza/table/remote/RemoteReadableTable.java |   5 +
 .../table/remote/RemoteTableDescriptor.java     |   3 +-
 .../samza/table/remote/RemoteTableProvider.java |  43 +--
 .../samza/table/utils/BaseTableProvider.java    |  76 ++++
 .../apache/samza/task/StreamOperatorTask.java   |   3 +-
 .../org/apache/samza/task/TaskCallbackImpl.java |   2 +-
 .../java/org/apache/samza/util/StreamUtil.java  |   1 +
 .../org/apache/samza/config/JobConfig.scala     |   4 +
 .../org/apache/samza/config/StorageConfig.scala |   1 +
 .../apache/samza/container/SamzaContainer.scala |  28 +-
 .../samza/coordinator/JobModelManager.scala     |  20 +-
 .../TestClusterBasedJobCoordinator.java         |  26 +-
 .../samza/container/TestLocalityManager.java    | 106 ++---
 .../grouper/task/TestTaskAssignmentManager.java | 132 ++-----
 .../coordinator/JobModelManagerTestUtil.java    |   4 +-
 .../TestCoordinatorStreamStore.java             | 129 +++++++
 .../samza/execution/TestExecutionPlanner.java   |  92 +++--
 .../execution/TestJobGraphJsonGenerator.java    |  38 +-
 .../org/apache/samza/execution/TestJobNode.java |  81 +++-
 .../samza/operators/TestJoinOperator.java       |  50 ++-
 .../samza/operators/TestMessageStreamImpl.java  |   4 +-
 .../samza/operators/TestOperatorSpecGraph.java  |   4 +-
 .../samza/operators/TestStreamGraphSpec.java    | 387 +++++++++----------
 .../operators/impl/TestInputOperatorImpl.java   |  80 ++++
 .../operators/impl/TestOperatorImplGraph.java   |  96 +++--
 .../operators/impl/TestWindowOperator.java      |  24 +-
 .../samza/operators/spec/TestOperatorSpec.java  |  16 +-
 .../spec/TestPartitionByOperatorSpec.java       |  63 ++-
 .../scala/org/apache/samza/util/TestUtil.scala  |   2 -
 .../system/kafka/KafkaInputDescriptor.java      | 108 ++++++
 .../system/kafka/KafkaOutputDescriptor.java     |  39 ++
 .../system/kafka/KafkaSystemDescriptor.java     | 251 ++++++++++++
 .../apache/samza/system/kafka/GetOffset.scala   |   3 +-
 .../system/kafka/TestKafkaInputDescriptor.java  |  68 ++++
 .../system/kafka/TestKafkaSystemDescriptor.java |  69 ++++
 .../samza/system/kafka/TestGetOffset.scala      |  31 +-
 .../kv/inmemory/InMemoryTableDescriptor.java    |   3 +
 .../kv/inmemory/InMemoryTableProvider.java      |  21 +-
 .../kv/inmemory/TestInMemoryTableProvider.java  |   9 +-
 .../storage/kv/RocksDbTableDescriptor.java      |   7 +-
 .../samza/storage/kv/RocksDbTableProvider.java  |  21 +-
 .../storage/kv/TestRocksDbTableDescriptor.java  |  15 +
 .../storage/kv/TestRocksDbTableProvider.java    |   9 +-
 .../kv/BaseLocalStoreBackedTableDescriptor.java |  68 ++++
 .../kv/BaseLocalStoreBackedTableProvider.java   |  60 ++-
 .../kv/LocalStoreBackedReadableTable.java       |   1 +
 .../TestBaseLocalStoreBackedTableProvider.java  | 149 +++++++
 .../TestLocalBaseStoreBackedTableProvider.java  |  85 ----
 .../samza/rest/proxy/task/SamzaTaskProxy.java   |   4 +-
 .../apache/samza/sql/planner/QueryPlanner.java  |   8 -
 .../sql/planner/SamzaSqlOperatorTable.java      |   1 +
 .../samza/sql/translator/QueryTranslator.java   |  10 +-
 .../samza/sql/translator/ScanTranslator.java    |  13 +-
 .../samza/sql/translator/TranslatorContext.java |  34 +-
 .../sql/testutil/TestIOResolverFactory.java     |  15 +-
 .../sql/translator/TestJoinTranslator.java      |   2 +-
 .../sql/translator/TestQueryTranslator.java     |   5 +-
 .../example/AppWithGlobalConfigExample.java     |  17 +-
 .../apache/samza/example/BroadcastExample.java  |  24 +-
 .../samza/example/KeyValueStoreExample.java     |  27 +-
 .../org/apache/samza/example/MergeExample.java  |  25 +-
 .../samza/example/OrderShipmentJoinExample.java |  29 +-
 .../samza/example/PageViewCounterExample.java   |  17 +-
 .../samza/example/RepartitionExample.java       |  24 +-
 .../org/apache/samza/example/WindowExample.java |  19 +-
 .../apache/samza/test/framework/TestRunner.java |   2 +-
 .../system/CollectionStreamSystemSpec.java      |  25 +-
 .../TestStandaloneIntegrationApplication.java   |  21 +-
 .../EndOfStreamIntegrationTest.java             |  11 +-
 .../WatermarkIntegrationTest.java               |  12 +-
 .../test/framework/BroadcastAssertApp.java      |  11 +-
 .../StreamApplicationIntegrationTest.java       |  29 +-
 .../samza/test/framework/TestTimerApp.java      |  17 +-
 .../test/operator/RepartitionJoinWindowApp.java |  32 +-
 .../test/operator/RepartitionWindowApp.java     |  19 +-
 .../samza/test/operator/SessionWindowApp.java   |  19 +-
 .../operator/TestRepartitionJoinWindowApp.java  |  12 +-
 .../test/operator/TestRepartitionWindowApp.java |   6 +-
 .../samza/test/operator/TumblingWindowApp.java  |  19 +-
 .../test/processor/TestStreamApplication.java   |  20 +-
 .../processor/TestZkLocalApplicationRunner.java |  60 ++-
 .../test/samzasql/TestSamzaSqlEndToEnd.java     |   2 +-
 .../table/PageViewToProfileJoinFunction.java    |  48 +++
 .../apache/samza/test/table/TestLocalTable.java |  75 +---
 .../table/TestLocalTableWithSideInputs.java     |  47 ++-
 .../samza/test/table/TestRemoteTable.java       |  50 +--
 .../table/TestTableDescriptorsProvider.java     |  10 +-
 .../benchmark/SystemConsumerWithSamzaBench.java |  11 +-
 145 files changed, 4990 insertions(+), 1376 deletions(-)
----------------------------------------------------------------------



[24/47] samza git commit: Merge branch 'master' into NewConsumer

Posted by bo...@apache.org.
Merge branch 'master' into NewConsumer


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/c14557fb
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/c14557fb
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/c14557fb

Branch: refs/heads/NewKafkaSystemConsumer
Commit: c14557fb401f0d718e78163a159695f50bc82845
Parents: 89f7982 8ab04b2
Author: Boris S <bo...@apache.org>
Authored: Thu Aug 30 12:45:23 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Thu Aug 30 12:45:23 2018 -0700

----------------------------------------------------------------------
 build.gradle                                    |   5 +-
 .../samza/metadatastore/MetadataStore.java      |  80 ++++
 .../metadatastore/MetadataStoreFactory.java     |  30 ++
 .../apache/samza/operators/MessageStream.java   |  12 +-
 .../org/apache/samza/operators/StreamGraph.java | 109 ++----
 .../descriptors/GenericInputDescriptor.java     |  43 +++
 .../descriptors/GenericOutputDescriptor.java    |  43 +++
 .../descriptors/GenericSystemDescriptor.java    |  67 ++++
 .../base/stream/InputDescriptor.java            | 187 +++++++++
 .../base/stream/OutputDescriptor.java           |  44 +++
 .../base/stream/StreamDescriptor.java           | 136 +++++++
 .../ExpandingInputDescriptorProvider.java       |  44 +++
 .../base/system/OutputDescriptorProvider.java   |  48 +++
 .../system/SimpleInputDescriptorProvider.java   |  43 +++
 .../base/system/SystemDescriptor.java           | 177 +++++++++
 .../TransformingInputDescriptorProvider.java    |  44 +++
 .../operators/functions/InputTransformer.java   |  45 +++
 .../operators/functions/StreamExpander.java     |  58 +++
 .../org/apache/samza/table/TableProvider.java   |  12 +-
 .../TestExpandingInputDescriptor.java           |  61 +++
 .../descriptors/TestGenericInputDescriptor.java | 123 ++++++
 .../TestGenericSystemDescriptor.java            |  63 +++
 .../descriptors/TestSimpleInputDescriptor.java  |  65 ++++
 .../TestTransformingInputDescriptor.java        |  66 ++++
 .../ExampleExpandingInputDescriptor.java        |  30 ++
 .../ExampleExpandingOutputDescriptor.java       |  29 ++
 .../ExampleExpandingSystemDescriptor.java       |  49 +++
 .../serde/ExampleSimpleInputDescriptor.java     |  30 ++
 .../serde/ExampleSimpleOutputDescriptor.java    |  29 ++
 .../serde/ExampleSimpleSystemDescriptor.java    |  43 +++
 .../ExampleTransformingInputDescriptor.java     |  30 ++
 .../ExampleTransformingOutputDescriptor.java    |  29 ++
 .../ExampleTransformingSystemDescriptor.java    |  43 +++
 .../apache/samza/container/LocalityManager.java | 126 +++---
 .../grouper/task/TaskAssignmentManager.java     | 109 ++++--
 .../CoordinatorStreamMetadataStoreFactory.java  |  36 ++
 .../metadatastore/CoordinatorStreamStore.java   | 188 +++++++++
 .../stream/CoordinatorStreamKeySerde.java       |  52 +++
 .../stream/CoordinatorStreamValueSerde.java     |  80 ++++
 .../samza/execution/JobGraphJsonGenerator.java  |   4 +-
 .../org/apache/samza/execution/JobNode.java     |  23 +-
 .../samza/operators/BaseTableDescriptor.java    |   2 +-
 .../samza/operators/MessageStreamImpl.java      |   2 +-
 .../apache/samza/operators/StreamGraphSpec.java | 152 +++++---
 .../descriptors/DelegatingSystemDescriptor.java |  70 ++++
 .../samza/operators/impl/InputOperatorImpl.java |  23 +-
 .../samza/operators/spec/InputOperatorSpec.java |  57 ++-
 .../samza/operators/spec/OperatorSpecs.java     |  11 +-
 .../samza/operators/spec/OutputStreamImpl.java  |  12 +
 .../stream/IntermediateMessageStreamImpl.java   |   2 +-
 .../apache/samza/processor/StreamProcessor.java |   2 +-
 .../runtime/AbstractApplicationRunner.java      |  12 +
 .../samza/table/TableConfigGenerator.java       |  11 +-
 .../table/caching/CachingTableDescriptor.java   |   3 +-
 .../table/caching/CachingTableProvider.java     |  56 +--
 .../guava/GuavaCacheTableDescriptor.java        |   3 +-
 .../caching/guava/GuavaCacheTableProvider.java  |  44 +--
 .../samza/table/remote/RemoteReadableTable.java |   5 +
 .../table/remote/RemoteTableDescriptor.java     |   3 +-
 .../samza/table/remote/RemoteTableProvider.java |  43 +--
 .../samza/table/utils/BaseTableProvider.java    |  76 ++++
 .../apache/samza/task/StreamOperatorTask.java   |   3 +-
 .../org/apache/samza/task/TaskCallbackImpl.java |   2 +-
 .../java/org/apache/samza/util/StreamUtil.java  |   1 +
 .../org/apache/samza/config/JobConfig.scala     |   4 +
 .../org/apache/samza/config/StorageConfig.scala |   1 +
 .../apache/samza/container/SamzaContainer.scala |  28 +-
 .../samza/coordinator/JobModelManager.scala     |  20 +-
 .../TestClusterBasedJobCoordinator.java         |  26 +-
 .../samza/container/TestLocalityManager.java    | 106 ++---
 .../grouper/task/TestTaskAssignmentManager.java | 132 ++-----
 .../coordinator/JobModelManagerTestUtil.java    |   4 +-
 .../TestCoordinatorStreamStore.java             | 129 +++++++
 .../samza/execution/TestExecutionPlanner.java   |  92 +++--
 .../execution/TestJobGraphJsonGenerator.java    |  38 +-
 .../org/apache/samza/execution/TestJobNode.java |  81 +++-
 .../samza/operators/TestJoinOperator.java       |  50 ++-
 .../samza/operators/TestMessageStreamImpl.java  |   4 +-
 .../samza/operators/TestOperatorSpecGraph.java  |   4 +-
 .../samza/operators/TestStreamGraphSpec.java    | 387 +++++++++----------
 .../operators/impl/TestInputOperatorImpl.java   |  80 ++++
 .../operators/impl/TestOperatorImplGraph.java   |  96 +++--
 .../operators/impl/TestWindowOperator.java      |  24 +-
 .../samza/operators/spec/TestOperatorSpec.java  |  16 +-
 .../spec/TestPartitionByOperatorSpec.java       |  63 ++-
 .../scala/org/apache/samza/util/TestUtil.scala  |   2 -
 .../system/kafka/KafkaInputDescriptor.java      | 108 ++++++
 .../system/kafka/KafkaOutputDescriptor.java     |  39 ++
 .../system/kafka/KafkaSystemDescriptor.java     | 251 ++++++++++++
 .../apache/samza/system/kafka/GetOffset.scala   |   3 +-
 .../system/kafka/TestKafkaInputDescriptor.java  |  68 ++++
 .../system/kafka/TestKafkaSystemDescriptor.java |  69 ++++
 .../samza/system/kafka/TestGetOffset.scala      |  31 +-
 .../kv/inmemory/InMemoryTableDescriptor.java    |   3 +
 .../kv/inmemory/InMemoryTableProvider.java      |  21 +-
 .../kv/inmemory/TestInMemoryTableProvider.java  |   9 +-
 .../storage/kv/RocksDbTableDescriptor.java      |   7 +-
 .../samza/storage/kv/RocksDbTableProvider.java  |  21 +-
 .../storage/kv/TestRocksDbTableDescriptor.java  |  15 +
 .../storage/kv/TestRocksDbTableProvider.java    |   9 +-
 .../kv/BaseLocalStoreBackedTableDescriptor.java |  68 ++++
 .../kv/BaseLocalStoreBackedTableProvider.java   |  60 ++-
 .../kv/LocalStoreBackedReadableTable.java       |   1 +
 .../TestBaseLocalStoreBackedTableProvider.java  | 149 +++++++
 .../TestLocalBaseStoreBackedTableProvider.java  |  85 ----
 .../samza/rest/proxy/task/SamzaTaskProxy.java   |   4 +-
 .../apache/samza/sql/planner/QueryPlanner.java  |   8 -
 .../sql/planner/SamzaSqlOperatorTable.java      |   1 +
 .../samza/sql/translator/QueryTranslator.java   |  10 +-
 .../samza/sql/translator/ScanTranslator.java    |  13 +-
 .../samza/sql/translator/TranslatorContext.java |  34 +-
 .../sql/testutil/TestIOResolverFactory.java     |  15 +-
 .../sql/translator/TestJoinTranslator.java      |   2 +-
 .../sql/translator/TestQueryTranslator.java     |   5 +-
 .../example/AppWithGlobalConfigExample.java     |  17 +-
 .../apache/samza/example/BroadcastExample.java  |  24 +-
 .../samza/example/KeyValueStoreExample.java     |  27 +-
 .../org/apache/samza/example/MergeExample.java  |  25 +-
 .../samza/example/OrderShipmentJoinExample.java |  29 +-
 .../samza/example/PageViewCounterExample.java   |  17 +-
 .../samza/example/RepartitionExample.java       |  24 +-
 .../org/apache/samza/example/WindowExample.java |  19 +-
 .../apache/samza/test/framework/TestRunner.java |   2 +-
 .../system/CollectionStreamSystemSpec.java      |  25 +-
 .../TestStandaloneIntegrationApplication.java   |  21 +-
 .../EndOfStreamIntegrationTest.java             |  11 +-
 .../WatermarkIntegrationTest.java               |  12 +-
 .../test/framework/BroadcastAssertApp.java      |  11 +-
 .../StreamApplicationIntegrationTest.java       |  29 +-
 .../samza/test/framework/TestTimerApp.java      |  17 +-
 .../test/operator/RepartitionJoinWindowApp.java |  32 +-
 .../test/operator/RepartitionWindowApp.java     |  19 +-
 .../samza/test/operator/SessionWindowApp.java   |  19 +-
 .../operator/TestRepartitionJoinWindowApp.java  |  12 +-
 .../test/operator/TestRepartitionWindowApp.java |   6 +-
 .../samza/test/operator/TumblingWindowApp.java  |  19 +-
 .../test/processor/TestStreamApplication.java   |  20 +-
 .../processor/TestZkLocalApplicationRunner.java |  60 ++-
 .../test/samzasql/TestSamzaSqlEndToEnd.java     |   2 +-
 .../table/PageViewToProfileJoinFunction.java    |  48 +++
 .../apache/samza/test/table/TestLocalTable.java |  75 +---
 .../table/TestLocalTableWithSideInputs.java     |  47 ++-
 .../samza/test/table/TestRemoteTable.java       |  50 +--
 .../table/TestTableDescriptorsProvider.java     |  10 +-
 .../benchmark/SystemConsumerWithSamzaBench.java |  11 +-
 145 files changed, 4990 insertions(+), 1376 deletions(-)
----------------------------------------------------------------------



[42/47] samza git commit: comments

Posted by bo...@apache.org.
comments


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/2480aa36
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/2480aa36
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/2480aa36

Branch: refs/heads/NewKafkaSystemConsumer
Commit: 2480aa36ac7afe10b931d3148ab6e41f70c778cb
Parents: 053fe3b
Author: Boris S <bo...@apache.org>
Authored: Tue Sep 11 14:08:02 2018 -0700
Committer: Boris S <bo...@apache.org>
Committed: Tue Sep 11 14:08:02 2018 -0700

----------------------------------------------------------------------
 .../clients/consumer/KafkaConsumerConfig.java      |  6 +++---
 .../samza/system/kafka/KafkaSystemConsumer.java    | 17 ++---------------
 .../samza/system/kafka/KafkaSystemFactory.scala    | 11 ++++++++---
 3 files changed, 13 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/2480aa36/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
index 8ca5b93..1a97ec7 100644
--- a/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
+++ b/samza-kafka/src/main/scala/org/apache/kafka/clients/consumer/KafkaConsumerConfig.java
@@ -93,12 +93,12 @@ public class KafkaConsumerConfig extends ConsumerConfig {
     // make sure bootstrap configs are in ?? SHOULD WE FAIL IF THEY ARE NOT?
     if (!subConf.containsKey(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
       // get it from the producer config
-      String bootstrapServer =
+      String bootstrapServers =
           config.get(String.format("systems.%s.producer.%s", systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
-      if (StringUtils.isEmpty(bootstrapServer)) {
+      if (StringUtils.isEmpty(bootstrapServers)) {
         throw new SamzaException("Missing " + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + " config  for " + systemName);
       }
-      consumerProps.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer);
+      consumerProps.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
     }
 
     // Always use default partition assignment strategy. Do not allow override.

http://git-wip-us.apache.org/repos/asf/samza/blob/2480aa36/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
index 196fb85..9cdfce1 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemConsumer.java
@@ -109,19 +109,6 @@ public class KafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements Sy
         clientId, metricName, this.kafkaConsumer.toString());
   }
 
-  public static <K, V> KafkaSystemConsumer getNewKafkaSystemConsumer(String systemName, Config config,
-      String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) {
-
-    // extract consumer configs and create kafka consumer
-    KafkaConsumer<K, V> kafkaConsumer = getKafkaConsumerImpl(systemName, clientId, config);
-    LOG.info("Created kafka consumer for system {}, clientId {}: {}", systemName, clientId, kafkaConsumer);
-
-    KafkaSystemConsumer kc = new KafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, clock);
-    LOG.info("Created samza system consumer {}", kc.toString());
-
-    return kc;
-  }
-
   /**
    * create kafka consumer
    * @param systemName system name for which we create the consumer
@@ -129,7 +116,7 @@ public class KafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements Sy
    * @param config config
    * @return kafka consumer
    */
-  public static <K, V> KafkaConsumer<K, V> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
+  public static KafkaConsumer<byte[], byte[]> getKafkaConsumerImpl(String systemName, String clientId, Config config) {
 
     Map<String, String> injectProps = new HashMap<>();
 
@@ -263,7 +250,7 @@ public class KafkaSystemConsumer<K, V> extends BlockingEnvelopeMap implements Sy
     // stop the proxy (with 5 minutes timeout)
     if (proxy != null) {
       LOG.info("Stopping proxy " + proxy);
-      proxy.stop(TimeUnit.MINUTES.toMillis(5));
+      proxy.stop(TimeUnit.SECONDS.toMillis(60));
     }
 
     try {

http://git-wip-us.apache.org/repos/asf/samza/blob/2480aa36/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
----------------------------------------------------------------------
diff --git a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
index e0e85be..9f92583 100644
--- a/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
+++ b/samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemFactory.scala
@@ -22,7 +22,7 @@ package org.apache.samza.system.kafka
 import java.util.Properties
 
 import kafka.utils.ZkUtils
-import org.apache.kafka.clients.consumer.KafkaConsumerConfig
+import org.apache.kafka.clients.consumer.{KafkaConsumer, KafkaConsumerConfig}
 import org.apache.kafka.clients.producer.KafkaProducer
 import org.apache.samza.SamzaException
 import org.apache.samza.config.ApplicationConfig.ApplicationMode
@@ -50,8 +50,13 @@ class KafkaSystemFactory extends SystemFactory with Logging {
     val clientId = KafkaConsumerConfig.getConsumerClientId( config)
     val metrics = new KafkaSystemConsumerMetrics(systemName, registry)
 
-    KafkaSystemConsumer.getNewKafkaSystemConsumer(
-      systemName, config, clientId, metrics, new SystemClock)
+    val kafkaConsumer = KafkaSystemConsumer.getKafkaConsumerImpl(systemName, clientId, config)
+    info("Created kafka consumer for system %s, clientId %s: %s" format (systemName, clientId, kafkaConsumer))
+
+    val kc = new KafkaSystemConsumer(kafkaConsumer, systemName, config, clientId, metrics, new SystemClock)
+    info("Created samza system consumer %s" format  (kc.toString))
+
+    kc
   }
 
   def getProducer(systemName: String, config: Config, registry: MetricsRegistry): SystemProducer = {