You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@gobblin.apache.org by "ASF GitHub Bot (Jira)" <ji...@apache.org> on 2020/03/02 02:43:02 UTC

[jira] [Work logged] (GOBBLIN-1040) Fix High level consumer

     [ https://issues.apache.org/jira/browse/GOBBLIN-1040?focusedWorklogId=395739&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-395739 ]

ASF GitHub Bot logged work on GOBBLIN-1040:
-------------------------------------------

                Author: ASF GitHub Bot
            Created on: 02/Mar/20 02:42
            Start Date: 02/Mar/20 02:42
    Worklog Time Spent: 10m 
      Work Description: sv2000 commented on pull request #2900: [GOBBLIN-1040] HighLevelConsumer re-design by removing references to …
URL: https://github.com/apache/incubator-gobblin/pull/2900#discussion_r386171815
 
 

 ##########
 File path: gobblin-runtime/src/main/java/org/apache/gobblin/runtime/kafka/HighLevelConsumer.java
 ##########
 @@ -125,81 +181,127 @@ protected void createMetrics() {
   protected List<Tag<?>> getTagsForMetrics() {
     List<Tag<?>> tags = Lists.newArrayList();
     tags.add(new Tag<>(RuntimeMetrics.TOPIC, this.topic));
-    tags.add(new Tag<>(RuntimeMetrics.GROUP_ID, this.consumerConfig.groupId()));
+    tags.add(new Tag<>(RuntimeMetrics.GROUP_ID, ConfigUtils.getString(this.config, GROUP_ID_KEY, DEFAULT_GROUP_ID)));
     return tags;
   }
 
   /**
-   * Called every time a message is read from the stream. Implementation must be thread-safe if {@link #numThreads} is
+   * Called every time a message is read from the queue. Implementation must be thread-safe if {@link #numThreads} is
    * set larger than 1.
    */
-  protected abstract void processMessage(MessageAndMetadata<K, V> message);
+  protected abstract void processMessage(DecodeableKafkaRecord<K,V> message);
 
   @Override
   protected void startUp() {
     buildMetricsContextAndMetrics();
-    this.consumer = createConsumerConnector();
+    processQueues();
+    if(!enableAutoCommit) {
+      offsetCommitExecutor = Executors.newSingleThreadScheduledExecutor();
+      offsetCommitExecutor.scheduleAtFixedRate(new Runnable() {
+        @Override
+        public void run() {
+          offsetCommitter();
+        }
+      }, 0,100, TimeUnit.MILLISECONDS);
+    }
 
-    List<KafkaStream<byte[], byte[]>> streams = createStreams();
-    this.executor = Executors.newFixedThreadPool(this.numThreads);
+    mainExecutor.scheduleAtFixedRate(new Runnable() {
+      @Override
+      public void run() {
+        consume();
+      }
+    }, 0, 50, TimeUnit.MILLISECONDS);
+  }
 
-    // now create an object to consume the messages
-    //
-    int threadNumber = 0;
-    for (final KafkaStream stream : streams) {
-      this.executor.execute(new MonitorConsumer(stream));
-      threadNumber++;
+  /**
+   * Consumes {@link KafkaConsumerRecord}s and adds to a queue
+   * Note: All records from a KafkaPartition are added to the same queue.
+   * A queue can contain records from multiple partitions if partitions > numThreads(queues)
+   */
+  private void consume() {
+    try {
+      Iterator<KafkaConsumerRecord> itr = gobblinKafkaConsumerClient.consume();
+      while (itr.hasNext()) {
+        KafkaConsumerRecord record = itr.next();
+        int idx = record.getPartition() % numThreads;
+        queues[idx].put(record);
+      }
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
     }
   }
 
-  protected ConsumerConfig createConsumerConfig(Config config) {
-    Properties props = ConfigUtils.configToProperties(config);
 
-    if (!props.containsKey(GROUP_ID_KEY)) {
-      props.setProperty(GROUP_ID_KEY, DEFAULT_GROUP_ID);
+  private void processQueues() {
+    for(BlockingQueue queue : queues) {
+      queueExecutor.execute(new QueueProcessor(queue));
     }
-    return new ConsumerConfig(props);
   }
 
-  protected ConsumerConnector createConsumerConnector() {
-    return Consumer.createJavaConsumerConnector(this.consumerConfig);
+  /**
+   * A thread that commits offsets to kafka at regular intervals
+   */
+  private void offsetCommitter() {
+    if(recordsProcessed.intValue() >= offsetsCommitNumRecordsThreshold || ((System.currentTimeMillis() - lastCommitTime) / 1000 >= offsetsCommitTimeThresholdSecs)) {
+      Map<KafkaPartition, Long> copy = new HashMap<>(partitionOffsetsToCommit);
+      partitionOffsetsToCommit.clear();
+      recordsProcessed.set(0);
+      lastCommitTime = System.currentTimeMillis();
+      commitOffsets(copy);
+    }
   }
 
-  protected List<KafkaStream<byte[], byte[]>> createStreams() {
-    Map<String, Integer> topicCountMap = Maps.newHashMap();
-    topicCountMap.put(this.topic, this.numThreads);
-    Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = this.consumer.createMessageStreams(topicCountMap);
-    return consumerMap.get(this.topic);
+  protected void commitOffsets(Map<KafkaPartition, Long> partitionOffsets) {
+    gobblinKafkaConsumerClient.commitOffsets(partitionOffsets);
   }
 
+
   @Override
   public void shutDown() {
-    if (this.consumer != null) {
-      this.consumer.shutdown();
-    }
-    if (this.executor != null) {
-      ExecutorsUtils.shutdownExecutorService(this.executor, Optional.of(log), 5000, TimeUnit.MILLISECONDS);
+    //mainExecutor.shutdown();
 
 Review comment:
   Remove the commented line if not needed.
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 395739)

> Fix High level consumer 
> ------------------------
>
>                 Key: GOBBLIN-1040
>                 URL: https://issues.apache.org/jira/browse/GOBBLIN-1040
>             Project: Apache Gobblin
>          Issue Type: Improvement
>            Reporter: Vikram Bohra
>            Priority: Major
>          Time Spent: 2h 40m
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian Jira
(v8.3.4#803005)