You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@inlong.apache.org by GitBox <gi...@apache.org> on 2022/08/23 11:52:34 UTC

[GitHub] [inlong] vernedeng opened a new pull request, #5659: [INLONG-5623][SDK] Support kafka multi topics fetcher

vernedeng opened a new pull request, #5659:
URL: https://github.com/apache/inlong/pull/5659

   
   - Fixes #5623 
   
   ### Motivation
   
   1. Introduce **_skiplist_** to maintain the offset commit of each TopicPartitions
   2. The costomized msgOffset is different from **_KafkaSingleTopicFetcher_**, which is consist of topic:partitionId:offset.
   
   ### Modifications
   
   *Describe the modifications you've done.*
   
   ### Verifying this change
   
   *(Please pick either of the following options)*
   
   - [ ] This change is a trivial rework/code cleanup without any test coverage.
   
   - [ ] This change is already covered by existing tests, such as:
     *(please describe tests)*
   
   - [ ] This change added tests and can be verified as follows:
   
     *(example:)*
     - *Added integration tests for end-to-end deployment with large payloads (10MB)*
     - *Extended integration test for recovery after broker failure*
   
   ### Documentation
   
     - Does this pull request introduce a new feature? (yes / no)
     - If yes, how is the feature documented? (not applicable / docs / JavaDocs / not documented)
     - If a feature is not applicable for documentation, explain why?
     - If a feature is not documented yet in this PR, please create a follow-up issue for adding the documentation
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [inlong] vernedeng commented on a diff in pull request #5659: [INLONG-5623][SDK] Support kafka multi topics fetcher

Posted by GitBox <gi...@apache.org>.
vernedeng commented on code in PR #5659:
URL: https://github.com/apache/inlong/pull/5659#discussion_r953469117


##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/kafka/KafkaMultiTopicsFetcher.java:
##########
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.kafka;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.inlong.sdk.sort.fetcher.pulsar.PulsarMultiTopicsFetcher;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.RangeAssignor;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * Kafka multi topics fetcher
+ */
+public class KafkaMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private final ConcurrentHashMap<TopicPartition, OffsetAndMetadata> commitOffsetMap;
+    private final ConcurrentHashMap<TopicPartition, ConcurrentSkipListMap<Long, Boolean>> ackOffsetMap;
+    private final String bootstrapServers;
+    private ConsumerRebalanceListener listener;
+    private KafkaConsumer<byte[], byte[]> consumer;
+
+    public KafkaMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            String bootstrapServers) {
+        super(topics, context, interceptor, deserializer);
+        this.bootstrapServers = bootstrapServers;
+        this.commitOffsetMap = new ConcurrentHashMap<>();
+        this.ackOffsetMap = new ConcurrentHashMap<>();
+    }
+
+    @Override
+    public boolean init() {
+        try {
+            this.consumer = createKafkaConsumer();
+            InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+            this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+            this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                    commitOffsetMap);
+            consumer.subscribe(onlineTopics.keySet(), listener);
+            return true;
+        } catch (Throwable t) {
+            LOGGER.error("failed to init kafka consumer: {}", t.getMessage(), t);
+            return false;
+        }
+    }
+
+    private KafkaConsumer<byte[], byte[]> createKafkaConsumer() {
+        Properties properties = new Properties();
+        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
+        properties.put(ConsumerConfig.GROUP_ID_CONFIG, context.getConfig().getSortTaskId());
+        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.RECEIVE_BUFFER_CONFIG,
+                context.getConfig().getKafkaSocketRecvBufferSize());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+        if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
+        } else if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        } else {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none");
+        }
+        properties.put(ConsumerConfig.FETCH_MAX_BYTES_CONFIG,
+                context.getConfig().getKafkaFetchSizeBytes());
+        properties.put(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG,
+                context.getConfig().getKafkaFetchWaitMs());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        properties.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,
+                RangeAssignor.class.getName());
+        properties.put(ConsumerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG, 120000L);
+        LOGGER.info("start to create kafka consumer:{}", properties);
+        return new KafkaConsumer<>(properties);
+    }
+
+    @Override
+    public void ack(String msgOffset) throws Exception {
+        // the format of multi topic kafka fetcher msg offset is topic:partitionId:offset, such as topic1:20:1746839
+        String[] offset = msgOffset.split(":");
+        if (offset.length != 3) {
+            throw new Exception("offset is illegal, the correct format is topic:partitionId:offset, "
+                    + "the error offset is:" + msgOffset);
+        }
+
+        // parse topic partition offset
+        TopicPartition topicPartition = new TopicPartition(offset[0], Integer.parseInt(offset[1]));
+        long ackOffset = Long.parseLong(offset[2]);
+
+        // ack
+        if (!ackOffsetMap.containsKey(topicPartition) || !ackOffsetMap.get(topicPartition).containsKey(ackOffset)) {
+            LOGGER.warn("did not find offsetMap or ack offset of {}, offset {}, just ignore it",
+                    topicPartition, ackOffset);
+            return;
+        }
+
+        // mark this offset has been ack.
+        ConcurrentSkipListMap<Long, Boolean> tpOffsetMap = ackOffsetMap.get(topicPartition);
+        // to prevent race condition in AckOffsetOnRebalance::onPartitionsRevoked
+        if (Objects.nonNull(tpOffsetMap)) {
+            tpOffsetMap.put(ackOffset, true);
+        }
+    }
+
+    @Override
+    public void pause() {
+        consumer.pause(consumer.assignment());
+    }
+
+    @Override
+    public void resume() {
+        consumer.resume(consumer.assignment());
+    }
+
+    @Override
+    public boolean close() {
+        this.closed = true;
+        try {
+            if (fetchThread != null) {
+                fetchThread.interrupt();
+            }
+            if (consumer != null) {
+                prepareCommit();
+                consumer.commitSync(commitOffsetMap);
+                consumer.close();
+            }
+            commitOffsetMap.clear();
+        } catch (Throwable t) {
+            LOGGER.warn(t.getMessage(), t);
+        }
+        LOGGER.info("closed kafka multi topic fetcher");
+        return true;
+    }
+
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
+    @Override
+    public void setStopConsume(boolean stopConsume) {
+        this.stopConsume = stopConsume;
+    }
+
+    @Override
+    public boolean isStopConsume() {
+        return stopConsume;
+    }
+
+    @Override
+    public List<InLongTopic> getTopics() {
+        return new ArrayList<>(onlineTopics.values());
+    }
+
+    @Override
+    public boolean updateTopics(List<InLongTopic> topics) {
+        if (needUpdate(topics)) {
+            return updateAll(topics);
+        }
+        LOGGER.info("no need to update topics");
+        return false;
+    }
+
+    private boolean updateAll(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topics is empty or null");
+            return false;
+        }
+
+        // stop
+        this.setStopConsume(true);
+
+        // update
+        this.onlineTopics = newTopics.stream().collect(Collectors.toMap(InLongTopic::getTopic, t -> t));
+        InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+        this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                commitOffsetMap, ackOffsetMap);
+        Optional.ofNullable(interceptor).ifPresent(i -> i.configure(topic));
+
+        // subscribe new
+        consumer.subscribe(onlineTopics.keySet(), listener);
+
+        // resume
+        this.setStopConsume(false);
+        return true;
+    }
+
+    private void prepareCommit() {
+        ackOffsetMap.forEach((topicPartition, tpOffsetMap) -> {
+            synchronized (tpOffsetMap) {
+                // get the remove list
+                List<Long> removeOffsets = new ArrayList<>();
+                long commitOffset = -1;
+                for (Long ackOffset : tpOffsetMap.keySet()) {
+                    if (!tpOffsetMap.get(ackOffset)) {
+                        break;
+                    }
+                    removeOffsets.add(ackOffset);
+                    commitOffset = ackOffset;
+                }
+                // the first haven't ack, do nothing
+                if (commitOffset == -1) {
+                    return;
+                }
+
+                // remove offset and commit offset
+                removeOffsets.forEach(tpOffsetMap::remove);
+                commitOffsetMap.put(topicPartition, new OffsetAndMetadata(commitOffset));
+            }
+        });
+    }
+
+    public class Fetcher implements Runnable {
+
+        private void commitKafkaOffset() {
+            prepareCommit();
+            if (consumer != null) {
+                try {
+                    consumer.commitAsync(commitOffsetMap, null);
+                    commitOffsetMap.clear();
+                } catch (Exception e) {
+                    LOGGER.error("commit kafka offset failed: {}", e.getMessage(), e);

Review Comment:
   fixed, thx



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [inlong] vernedeng commented on a diff in pull request #5659: [INLONG-5623][SDK] Support kafka multi topics fetcher

Posted by GitBox <gi...@apache.org>.
vernedeng commented on code in PR #5659:
URL: https://github.com/apache/inlong/pull/5659#discussion_r953470716


##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/pulsar/PulsarMultiTopicsFetcher.java:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.pulsar;
+
+import com.google.common.base.Preconditions;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.Seeker;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.pulsar.client.api.Consumer;
+import org.apache.pulsar.client.api.Message;
+import org.apache.pulsar.client.api.MessageId;
+import org.apache.pulsar.client.api.Messages;
+import org.apache.pulsar.client.api.PulsarClient;
+import org.apache.pulsar.client.api.PulsarClientException;
+import org.apache.pulsar.client.api.Schema;
+import org.apache.pulsar.client.api.SubscriptionInitialPosition;
+import org.apache.pulsar.client.api.SubscriptionType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * MultiTopicsFetcher for pulsar.
+ *
+ */
+public class PulsarMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private PulsarConsumer currentConsumer;
+    private List<PulsarConsumer> toBeRemovedConsumers = new LinkedList<>();
+    private PulsarClient pulsarClient;
+
+    public PulsarMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            PulsarClient pulsarClient) {
+        super(topics, context, interceptor, deserializer);
+        this.pulsarClient = Preconditions.checkNotNull(pulsarClient);
+    }
+
+    @Override
+    public boolean init() {
+        Consumer<byte[]> newConsumer = createConsumer(onlineTopics.values());
+        if (Objects.isNull(newConsumer)) {
+            LOGGER.error("create new consumer is null");
+            return false;
+        }
+        this.currentConsumer = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        String threadName = String.format("sort_sdk_pulsar_multi_topic_fetch_thread_%d", this.hashCode());
+        this.fetchThread = new Thread(new PulsarMultiTopicsFetcher.Fetcher(), threadName);
+        this.fetchThread.start();
+        this.executor.scheduleWithFixedDelay(this::clearRemovedConsumerList,
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                TimeUnit.SECONDS);
+        return true;
+    }
+
+    private void clearRemovedConsumerList() {
+        long cur = System.currentTimeMillis();
+        List<PulsarConsumer> newList = new LinkedList<>();
+        toBeRemovedConsumers.forEach(consumer -> {
+            long diff = cur - consumer.stopTime;
+            if (diff > context.getConfig().getCleanOldConsumerIntervalSec() * 1000L || consumer.isEmpty()) {
+                try {
+                    consumer.close();
+                } catch (PulsarClientException e) {
+                    LOGGER.warn("exception in close old consumer {}", e.getMessage(), e);
+                }
+                return;
+            }
+            newList.add(consumer);
+        });
+        LOGGER.info("after clear old consumers, the old size is {}, current size is {}",
+                toBeRemovedConsumers.size(), newList.size());
+        this.toBeRemovedConsumers = newList;
+    }
+
+    private boolean updateAll(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topics is empty or null");
+            return false;
+        }
+        // stop old;
+        this.setStopConsume(true);
+        this.currentConsumer.pause();
+        // create new;
+        Consumer<byte[]> newConsumer = createConsumer(newTopics);
+        if (Objects.isNull(newConsumer)) {
+            currentConsumer.resume();
+            this.setStopConsume(false);
+            return false;
+        }
+        PulsarConsumer newConsumerWrapper = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = newTopics.stream().findFirst().get();
+        final Seeker newSeeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        // save
+        currentConsumer.setStopTime(System.currentTimeMillis());
+        toBeRemovedConsumers.add(currentConsumer);
+        // replace
+        this.currentConsumer = newConsumerWrapper;
+        this.seeker = newSeeker;
+        this.interceptor.configure(firstTopic);
+        this.onlineTopics = newTopics.stream().collect(Collectors.toMap(InLongTopic::getTopic, t -> t));
+        // resume
+        this.setStopConsume(false);
+        return true;
+    }
+
+    private Consumer<byte[]> createConsumer(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topic is empty or null");
+            return null;
+        }
+        try {
+            SubscriptionInitialPosition position = SubscriptionInitialPosition.Latest;
+            SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+            if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                    || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+                LOGGER.info("the subscription initial position is earliest!");
+                position = SubscriptionInitialPosition.Earliest;
+            }
+
+            List<String> topicNames = newTopics.stream()
+                    .map(InLongTopic::getTopic)
+                    .collect(Collectors.toList());
+            Consumer<byte[]> consumer = pulsarClient.newConsumer(Schema.BYTES)
+                    .topics(topicNames)
+                    .subscriptionName(context.getConfig().getSortTaskId())
+                    .subscriptionType(SubscriptionType.Shared)
+                    .startMessageIdInclusive()
+                    .subscriptionInitialPosition(position)
+                    .ackTimeout(context.getConfig().getAckTimeoutSec(), TimeUnit.SECONDS)
+                    .receiverQueueSize(context.getConfig().getPulsarReceiveQueueSize())
+                    .subscribe();
+            LOGGER.info("create consumer for topics {}", topicNames);
+            return consumer;
+        } catch (Exception e) {
+            LOGGER.error(e.getMessage(), e);

Review Comment:
   fixed #5625, thx



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [inlong] vernedeng commented on a diff in pull request #5659: [INLONG-5623][SDK] Support kafka multi topics fetcher

Posted by GitBox <gi...@apache.org>.
vernedeng commented on code in PR #5659:
URL: https://github.com/apache/inlong/pull/5659#discussion_r953470533


##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/pulsar/PulsarMultiTopicsFetcher.java:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.pulsar;
+
+import com.google.common.base.Preconditions;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.Seeker;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.pulsar.client.api.Consumer;
+import org.apache.pulsar.client.api.Message;
+import org.apache.pulsar.client.api.MessageId;
+import org.apache.pulsar.client.api.Messages;
+import org.apache.pulsar.client.api.PulsarClient;
+import org.apache.pulsar.client.api.PulsarClientException;
+import org.apache.pulsar.client.api.Schema;
+import org.apache.pulsar.client.api.SubscriptionInitialPosition;
+import org.apache.pulsar.client.api.SubscriptionType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * MultiTopicsFetcher for pulsar.
+ *
+ */
+public class PulsarMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private PulsarConsumer currentConsumer;
+    private List<PulsarConsumer> toBeRemovedConsumers = new LinkedList<>();
+    private PulsarClient pulsarClient;
+
+    public PulsarMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            PulsarClient pulsarClient) {
+        super(topics, context, interceptor, deserializer);
+        this.pulsarClient = Preconditions.checkNotNull(pulsarClient);
+    }
+
+    @Override
+    public boolean init() {
+        Consumer<byte[]> newConsumer = createConsumer(onlineTopics.values());
+        if (Objects.isNull(newConsumer)) {
+            LOGGER.error("create new consumer is null");
+            return false;
+        }
+        this.currentConsumer = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        String threadName = String.format("sort_sdk_pulsar_multi_topic_fetch_thread_%d", this.hashCode());
+        this.fetchThread = new Thread(new PulsarMultiTopicsFetcher.Fetcher(), threadName);
+        this.fetchThread.start();
+        this.executor.scheduleWithFixedDelay(this::clearRemovedConsumerList,
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                TimeUnit.SECONDS);
+        return true;
+    }
+
+    private void clearRemovedConsumerList() {
+        long cur = System.currentTimeMillis();
+        List<PulsarConsumer> newList = new LinkedList<>();
+        toBeRemovedConsumers.forEach(consumer -> {
+            long diff = cur - consumer.stopTime;
+            if (diff > context.getConfig().getCleanOldConsumerIntervalSec() * 1000L || consumer.isEmpty()) {
+                try {
+                    consumer.close();
+                } catch (PulsarClientException e) {
+                    LOGGER.warn("exception in close old consumer {}", e.getMessage(), e);

Review Comment:
   thx, fixed in #5625



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [inlong] healchow commented on a diff in pull request #5659: [INLONG-5623][SDK] Support kafka multi topics fetcher

Posted by GitBox <gi...@apache.org>.
healchow commented on code in PR #5659:
URL: https://github.com/apache/inlong/pull/5659#discussion_r953296680


##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/kafka/KafkaMultiTopicsFetcher.java:
##########
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.kafka;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.inlong.sdk.sort.fetcher.pulsar.PulsarMultiTopicsFetcher;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.RangeAssignor;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * Kafka multi topics fetcher
+ */
+public class KafkaMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private final ConcurrentHashMap<TopicPartition, OffsetAndMetadata> commitOffsetMap;
+    private final ConcurrentHashMap<TopicPartition, ConcurrentSkipListMap<Long, Boolean>> ackOffsetMap;
+    private final String bootstrapServers;
+    private ConsumerRebalanceListener listener;
+    private KafkaConsumer<byte[], byte[]> consumer;
+
+    public KafkaMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            String bootstrapServers) {
+        super(topics, context, interceptor, deserializer);
+        this.bootstrapServers = bootstrapServers;
+        this.commitOffsetMap = new ConcurrentHashMap<>();
+        this.ackOffsetMap = new ConcurrentHashMap<>();
+    }
+
+    @Override
+    public boolean init() {
+        try {
+            this.consumer = createKafkaConsumer();
+            InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+            this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+            this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                    commitOffsetMap);
+            consumer.subscribe(onlineTopics.keySet(), listener);
+            return true;
+        } catch (Throwable t) {
+            LOGGER.error("failed to init kafka consumer: {}", t.getMessage(), t);

Review Comment:
   Suggested changing to `LOGGER.error("failed to init kafka consumer: ", t);`.



##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/pulsar/PulsarMultiTopicsFetcher.java:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.pulsar;
+
+import com.google.common.base.Preconditions;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.Seeker;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.pulsar.client.api.Consumer;
+import org.apache.pulsar.client.api.Message;
+import org.apache.pulsar.client.api.MessageId;
+import org.apache.pulsar.client.api.Messages;
+import org.apache.pulsar.client.api.PulsarClient;
+import org.apache.pulsar.client.api.PulsarClientException;
+import org.apache.pulsar.client.api.Schema;
+import org.apache.pulsar.client.api.SubscriptionInitialPosition;
+import org.apache.pulsar.client.api.SubscriptionType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * MultiTopicsFetcher for pulsar.
+ *
+ */
+public class PulsarMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private PulsarConsumer currentConsumer;
+    private List<PulsarConsumer> toBeRemovedConsumers = new LinkedList<>();
+    private PulsarClient pulsarClient;
+
+    public PulsarMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            PulsarClient pulsarClient) {
+        super(topics, context, interceptor, deserializer);
+        this.pulsarClient = Preconditions.checkNotNull(pulsarClient);
+    }
+
+    @Override
+    public boolean init() {
+        Consumer<byte[]> newConsumer = createConsumer(onlineTopics.values());
+        if (Objects.isNull(newConsumer)) {
+            LOGGER.error("create new consumer is null");
+            return false;
+        }
+        this.currentConsumer = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        String threadName = String.format("sort_sdk_pulsar_multi_topic_fetch_thread_%d", this.hashCode());
+        this.fetchThread = new Thread(new PulsarMultiTopicsFetcher.Fetcher(), threadName);
+        this.fetchThread.start();
+        this.executor.scheduleWithFixedDelay(this::clearRemovedConsumerList,
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                TimeUnit.SECONDS);
+        return true;
+    }
+
+    private void clearRemovedConsumerList() {
+        long cur = System.currentTimeMillis();
+        List<PulsarConsumer> newList = new LinkedList<>();
+        toBeRemovedConsumers.forEach(consumer -> {
+            long diff = cur - consumer.stopTime;
+            if (diff > context.getConfig().getCleanOldConsumerIntervalSec() * 1000L || consumer.isEmpty()) {
+                try {
+                    consumer.close();
+                } catch (PulsarClientException e) {
+                    LOGGER.warn("exception in close old consumer {}", e.getMessage(), e);
+                }
+                return;
+            }
+            newList.add(consumer);
+        });
+        LOGGER.info("after clear old consumers, the old size is {}, current size is {}",
+                toBeRemovedConsumers.size(), newList.size());
+        this.toBeRemovedConsumers = newList;
+    }
+
+    private boolean updateAll(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topics is empty or null");
+            return false;
+        }
+        // stop old;
+        this.setStopConsume(true);
+        this.currentConsumer.pause();
+        // create new;
+        Consumer<byte[]> newConsumer = createConsumer(newTopics);
+        if (Objects.isNull(newConsumer)) {
+            currentConsumer.resume();
+            this.setStopConsume(false);
+            return false;
+        }
+        PulsarConsumer newConsumerWrapper = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = newTopics.stream().findFirst().get();
+        final Seeker newSeeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        // save
+        currentConsumer.setStopTime(System.currentTimeMillis());
+        toBeRemovedConsumers.add(currentConsumer);
+        // replace
+        this.currentConsumer = newConsumerWrapper;
+        this.seeker = newSeeker;
+        this.interceptor.configure(firstTopic);
+        this.onlineTopics = newTopics.stream().collect(Collectors.toMap(InLongTopic::getTopic, t -> t));
+        // resume
+        this.setStopConsume(false);
+        return true;
+    }
+
+    private Consumer<byte[]> createConsumer(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topic is empty or null");
+            return null;
+        }
+        try {
+            SubscriptionInitialPosition position = SubscriptionInitialPosition.Latest;
+            SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+            if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                    || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+                LOGGER.info("the subscription initial position is earliest!");
+                position = SubscriptionInitialPosition.Earliest;
+            }
+
+            List<String> topicNames = newTopics.stream()
+                    .map(InLongTopic::getTopic)
+                    .collect(Collectors.toList());
+            Consumer<byte[]> consumer = pulsarClient.newConsumer(Schema.BYTES)
+                    .topics(topicNames)
+                    .subscriptionName(context.getConfig().getSortTaskId())
+                    .subscriptionType(SubscriptionType.Shared)
+                    .startMessageIdInclusive()
+                    .subscriptionInitialPosition(position)
+                    .ackTimeout(context.getConfig().getAckTimeoutSec(), TimeUnit.SECONDS)
+                    .receiverQueueSize(context.getConfig().getPulsarReceiveQueueSize())
+                    .subscribe();
+            LOGGER.info("create consumer for topics {}", topicNames);
+            return consumer;
+        } catch (Exception e) {
+            LOGGER.error(e.getMessage(), e);

Review Comment:
   ditto.



##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/kafka/KafkaMultiTopicsFetcher.java:
##########
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.kafka;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.inlong.sdk.sort.fetcher.pulsar.PulsarMultiTopicsFetcher;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.RangeAssignor;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * Kafka multi topics fetcher
+ */
+public class KafkaMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private final ConcurrentHashMap<TopicPartition, OffsetAndMetadata> commitOffsetMap;
+    private final ConcurrentHashMap<TopicPartition, ConcurrentSkipListMap<Long, Boolean>> ackOffsetMap;
+    private final String bootstrapServers;
+    private ConsumerRebalanceListener listener;
+    private KafkaConsumer<byte[], byte[]> consumer;
+
+    public KafkaMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            String bootstrapServers) {
+        super(topics, context, interceptor, deserializer);
+        this.bootstrapServers = bootstrapServers;
+        this.commitOffsetMap = new ConcurrentHashMap<>();
+        this.ackOffsetMap = new ConcurrentHashMap<>();
+    }
+
+    @Override
+    public boolean init() {
+        try {
+            this.consumer = createKafkaConsumer();
+            InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+            this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+            this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                    commitOffsetMap);
+            consumer.subscribe(onlineTopics.keySet(), listener);
+            return true;
+        } catch (Throwable t) {
+            LOGGER.error("failed to init kafka consumer: {}", t.getMessage(), t);
+            return false;
+        }
+    }
+
+    private KafkaConsumer<byte[], byte[]> createKafkaConsumer() {
+        Properties properties = new Properties();
+        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
+        properties.put(ConsumerConfig.GROUP_ID_CONFIG, context.getConfig().getSortTaskId());
+        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.RECEIVE_BUFFER_CONFIG,
+                context.getConfig().getKafkaSocketRecvBufferSize());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+        if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
+        } else if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        } else {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none");
+        }
+        properties.put(ConsumerConfig.FETCH_MAX_BYTES_CONFIG,
+                context.getConfig().getKafkaFetchSizeBytes());
+        properties.put(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG,
+                context.getConfig().getKafkaFetchWaitMs());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        properties.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,
+                RangeAssignor.class.getName());
+        properties.put(ConsumerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG, 120000L);
+        LOGGER.info("start to create kafka consumer:{}", properties);
+        return new KafkaConsumer<>(properties);
+    }
+
+    @Override
+    public void ack(String msgOffset) throws Exception {
+        // the format of multi topic kafka fetcher msg offset is topic:partitionId:offset, such as topic1:20:1746839
+        String[] offset = msgOffset.split(":");
+        if (offset.length != 3) {
+            throw new Exception("offset is illegal, the correct format is topic:partitionId:offset, "
+                    + "the error offset is:" + msgOffset);
+        }
+
+        // parse topic partition offset
+        TopicPartition topicPartition = new TopicPartition(offset[0], Integer.parseInt(offset[1]));
+        long ackOffset = Long.parseLong(offset[2]);
+
+        // ack
+        if (!ackOffsetMap.containsKey(topicPartition) || !ackOffsetMap.get(topicPartition).containsKey(ackOffset)) {
+            LOGGER.warn("did not find offsetMap or ack offset of {}, offset {}, just ignore it",
+                    topicPartition, ackOffset);
+            return;
+        }
+
+        // mark this offset has been ack.
+        ConcurrentSkipListMap<Long, Boolean> tpOffsetMap = ackOffsetMap.get(topicPartition);
+        // to prevent race condition in AckOffsetOnRebalance::onPartitionsRevoked
+        if (Objects.nonNull(tpOffsetMap)) {
+            tpOffsetMap.put(ackOffset, true);
+        }
+    }
+
+    @Override
+    public void pause() {
+        consumer.pause(consumer.assignment());
+    }
+
+    @Override
+    public void resume() {
+        consumer.resume(consumer.assignment());
+    }
+
+    @Override
+    public boolean close() {
+        this.closed = true;
+        try {
+            if (fetchThread != null) {
+                fetchThread.interrupt();
+            }
+            if (consumer != null) {
+                prepareCommit();
+                consumer.commitSync(commitOffsetMap);
+                consumer.close();
+            }
+            commitOffsetMap.clear();
+        } catch (Throwable t) {
+            LOGGER.warn(t.getMessage(), t);
+        }
+        LOGGER.info("closed kafka multi topic fetcher");
+        return true;
+    }
+
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
+    @Override
+    public void setStopConsume(boolean stopConsume) {
+        this.stopConsume = stopConsume;
+    }
+
+    @Override
+    public boolean isStopConsume() {
+        return stopConsume;
+    }
+
+    @Override
+    public List<InLongTopic> getTopics() {
+        return new ArrayList<>(onlineTopics.values());
+    }
+
+    @Override
+    public boolean updateTopics(List<InLongTopic> topics) {
+        if (needUpdate(topics)) {
+            return updateAll(topics);
+        }
+        LOGGER.info("no need to update topics");
+        return false;
+    }
+
+    private boolean updateAll(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topics is empty or null");
+            return false;
+        }
+
+        // stop
+        this.setStopConsume(true);
+
+        // update
+        this.onlineTopics = newTopics.stream().collect(Collectors.toMap(InLongTopic::getTopic, t -> t));
+        InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+        this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                commitOffsetMap, ackOffsetMap);
+        Optional.ofNullable(interceptor).ifPresent(i -> i.configure(topic));
+
+        // subscribe new
+        consumer.subscribe(onlineTopics.keySet(), listener);
+
+        // resume
+        this.setStopConsume(false);
+        return true;
+    }
+
+    private void prepareCommit() {
+        ackOffsetMap.forEach((topicPartition, tpOffsetMap) -> {
+            synchronized (tpOffsetMap) {
+                // get the remove list
+                List<Long> removeOffsets = new ArrayList<>();
+                long commitOffset = -1;
+                for (Long ackOffset : tpOffsetMap.keySet()) {
+                    if (!tpOffsetMap.get(ackOffset)) {
+                        break;
+                    }
+                    removeOffsets.add(ackOffset);
+                    commitOffset = ackOffset;
+                }
+                // the first haven't ack, do nothing
+                if (commitOffset == -1) {
+                    return;
+                }
+
+                // remove offset and commit offset
+                removeOffsets.forEach(tpOffsetMap::remove);
+                commitOffsetMap.put(topicPartition, new OffsetAndMetadata(commitOffset));
+            }
+        });
+    }
+
+    public class Fetcher implements Runnable {
+
+        private void commitKafkaOffset() {
+            prepareCommit();
+            if (consumer != null) {
+                try {
+                    consumer.commitAsync(commitOffsetMap, null);
+                    commitOffsetMap.clear();
+                } catch (Exception e) {
+                    LOGGER.error("commit kafka offset failed: {}", e.getMessage(), e);
+                }
+            }
+        }
+
+        /**
+         * put the received msg to onFinished method
+         *
+         * @param messageRecords {@link List < MessageRecord >}
+         */
+        private void handleAndCallbackMsg(List<MessageRecord> messageRecords) {
+            long start = System.currentTimeMillis();
+            try {
+                context.getDefaultStateCounter().addCallbackTimes(1);
+                context.getConfig().getCallback().onFinishedBatch(messageRecords);
+                context.getDefaultStateCounter()
+                        .addCallbackTimeCost(System.currentTimeMillis() - start)
+                        .addCallbackDoneTimes(1);
+            } catch (Exception e) {
+                context.getDefaultStateCounter().addCallbackErrorTimes(1);
+                LOGGER.error("failed to callback: {}", e.getMessage(), e);

Review Comment:
   `LOGGER.error("failed to callback: ", e);`



##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/pulsar/PulsarMultiTopicsFetcher.java:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.pulsar;
+
+import com.google.common.base.Preconditions;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.Seeker;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.pulsar.client.api.Consumer;
+import org.apache.pulsar.client.api.Message;
+import org.apache.pulsar.client.api.MessageId;
+import org.apache.pulsar.client.api.Messages;
+import org.apache.pulsar.client.api.PulsarClient;
+import org.apache.pulsar.client.api.PulsarClientException;
+import org.apache.pulsar.client.api.Schema;
+import org.apache.pulsar.client.api.SubscriptionInitialPosition;
+import org.apache.pulsar.client.api.SubscriptionType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * MultiTopicsFetcher for pulsar.
+ *
+ */
+public class PulsarMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private PulsarConsumer currentConsumer;
+    private List<PulsarConsumer> toBeRemovedConsumers = new LinkedList<>();
+    private PulsarClient pulsarClient;
+
+    public PulsarMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            PulsarClient pulsarClient) {
+        super(topics, context, interceptor, deserializer);
+        this.pulsarClient = Preconditions.checkNotNull(pulsarClient);
+    }
+
+    @Override
+    public boolean init() {
+        Consumer<byte[]> newConsumer = createConsumer(onlineTopics.values());
+        if (Objects.isNull(newConsumer)) {
+            LOGGER.error("create new consumer is null");
+            return false;
+        }
+        this.currentConsumer = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        String threadName = String.format("sort_sdk_pulsar_multi_topic_fetch_thread_%d", this.hashCode());
+        this.fetchThread = new Thread(new PulsarMultiTopicsFetcher.Fetcher(), threadName);
+        this.fetchThread.start();
+        this.executor.scheduleWithFixedDelay(this::clearRemovedConsumerList,
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                TimeUnit.SECONDS);
+        return true;
+    }
+
+    private void clearRemovedConsumerList() {
+        long cur = System.currentTimeMillis();
+        List<PulsarConsumer> newList = new LinkedList<>();
+        toBeRemovedConsumers.forEach(consumer -> {
+            long diff = cur - consumer.stopTime;
+            if (diff > context.getConfig().getCleanOldConsumerIntervalSec() * 1000L || consumer.isEmpty()) {
+                try {
+                    consumer.close();
+                } catch (PulsarClientException e) {
+                    LOGGER.warn("exception in close old consumer {}", e.getMessage(), e);
+                }
+                return;
+            }
+            newList.add(consumer);
+        });
+        LOGGER.info("after clear old consumers, the old size is {}, current size is {}",
+                toBeRemovedConsumers.size(), newList.size());
+        this.toBeRemovedConsumers = newList;
+    }
+
+    private boolean updateAll(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topics is empty or null");
+            return false;
+        }
+        // stop old;
+        this.setStopConsume(true);
+        this.currentConsumer.pause();
+        // create new;
+        Consumer<byte[]> newConsumer = createConsumer(newTopics);
+        if (Objects.isNull(newConsumer)) {
+            currentConsumer.resume();
+            this.setStopConsume(false);
+            return false;
+        }
+        PulsarConsumer newConsumerWrapper = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = newTopics.stream().findFirst().get();
+        final Seeker newSeeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        // save
+        currentConsumer.setStopTime(System.currentTimeMillis());
+        toBeRemovedConsumers.add(currentConsumer);
+        // replace
+        this.currentConsumer = newConsumerWrapper;
+        this.seeker = newSeeker;
+        this.interceptor.configure(firstTopic);
+        this.onlineTopics = newTopics.stream().collect(Collectors.toMap(InLongTopic::getTopic, t -> t));
+        // resume
+        this.setStopConsume(false);
+        return true;
+    }
+
+    private Consumer<byte[]> createConsumer(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topic is empty or null");
+            return null;
+        }
+        try {
+            SubscriptionInitialPosition position = SubscriptionInitialPosition.Latest;
+            SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+            if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                    || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+                LOGGER.info("the subscription initial position is earliest!");
+                position = SubscriptionInitialPosition.Earliest;
+            }
+
+            List<String> topicNames = newTopics.stream()
+                    .map(InLongTopic::getTopic)
+                    .collect(Collectors.toList());
+            Consumer<byte[]> consumer = pulsarClient.newConsumer(Schema.BYTES)
+                    .topics(topicNames)
+                    .subscriptionName(context.getConfig().getSortTaskId())
+                    .subscriptionType(SubscriptionType.Shared)
+                    .startMessageIdInclusive()
+                    .subscriptionInitialPosition(position)
+                    .ackTimeout(context.getConfig().getAckTimeoutSec(), TimeUnit.SECONDS)
+                    .receiverQueueSize(context.getConfig().getPulsarReceiveQueueSize())
+                    .subscribe();
+            LOGGER.info("create consumer for topics {}", topicNames);
+            return consumer;
+        } catch (Exception e) {
+            LOGGER.error(e.getMessage(), e);
+            return null;
+        }
+    }
+
+    @Override
+    public void ack(String msgOffset) throws Exception {
+        if (StringUtils.isBlank(msgOffset)) {
+            LOGGER.error("ack failed, msg offset should not be blank");
+            return;
+        }
+        if (Objects.isNull(currentConsumer)) {
+            LOGGER.error("ack failed, consumer is null");
+            return;
+        }
+        // if this ack belongs to current consumer
+        MessageId messageId = currentConsumer.getMessageId(msgOffset);
+        if (!Objects.isNull(messageId)) {
+            doAck(msgOffset, this.currentConsumer, messageId);
+            return;
+        }
+
+        // if this ack doesn't belong to current consumer, find in to be removed ones.
+        for (PulsarConsumer oldConsumer : toBeRemovedConsumers) {
+            MessageId id = oldConsumer.getMessageId(msgOffset);
+            if (Objects.isNull(id)) {
+                continue;
+            }
+            doAck(msgOffset, oldConsumer, id);
+            LOGGER.info("ack an old consumer message");
+            return;
+        }
+        context.getDefaultStateCounter().addAckFailTimes(1L);
+        LOGGER.error("in pulsar multi topic fetcher, messageId == null");
+    }
+
+    private void doAck(String msgOffset, PulsarConsumer consumer, MessageId messageId) {
+        if (!consumer.isConnected()) {
+            return;
+        }
+        InLongTopic topic = consumer.getTopic(msgOffset);
+        consumer.acknowledgeAsync(messageId)
+                .thenAccept(ctx -> ackSucc(msgOffset, topic, this.currentConsumer))
+                .exceptionally(exception -> {
+                    LOGGER.error("ack fail:{} {},error:{}",
+                            topic, msgOffset, exception.getMessage(), exception);
+                    context.getStateCounterByTopic(topic).addAckFailTimes(1L);
+                    return null;
+                });
+    }
+
+    private void ackSucc(String offset, InLongTopic topic, PulsarConsumer consumer) {
+        consumer.remove(offset);
+        context.getStateCounterByTopic(topic).addAckSuccTimes(1L);
+    }
+
+    @Override
+    public void pause() {
+        if (Objects.nonNull(currentConsumer)) {
+            currentConsumer.pause();
+        }
+    }
+
+    @Override
+    public void resume() {
+        if (Objects.nonNull(currentConsumer)) {
+            currentConsumer.resume();
+        }
+    }
+
+    @Override
+    public boolean close() {
+        mainLock.writeLock().lock();
+        try {
+            LOGGER.info("closed online topics {}", onlineTopics);
+            try {
+                if (currentConsumer != null) {
+                    currentConsumer.close();
+                }
+                if (fetchThread != null) {
+                    fetchThread.interrupt();
+                }
+            } catch (PulsarClientException e) {
+                LOGGER.warn(e.getMessage(), e);
+            }
+            toBeRemovedConsumers.stream()
+                    .filter(Objects::nonNull)
+                    .forEach(c -> {
+                        try {
+                            c.close();
+                        } catch (PulsarClientException e) {
+                            LOGGER.warn(e.getMessage(), e);
+                        }
+                    });
+            toBeRemovedConsumers.clear();
+            return true;
+        } finally {
+            this.closed = true;
+            mainLock.writeLock().unlock();
+        }
+    }
+
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
+    @Override
+    public void setStopConsume(boolean stopConsume) {
+        this.stopConsume = stopConsume;
+    }
+
+    @Override
+    public boolean isStopConsume() {
+        return stopConsume;
+    }
+
+    @Override
+    public List<InLongTopic> getTopics() {
+        return new ArrayList<>(onlineTopics.values());
+    }
+
+    @Override
+    public boolean updateTopics(List<InLongTopic> topics) {
+        if (needUpdate(topics)) {
+            return updateAll(topics);
+        }
+        LOGGER.info("no need to update multi topic fetcher");
+        return false;
+    }
+
+    public class Fetcher implements Runnable {
+
+        /**
+         * put the received msg to onFinished method
+         *
+         * @param messageRecords {@link List}
+         */
+        private void handleAndCallbackMsg(List<MessageRecord> messageRecords) {
+            long start = System.currentTimeMillis();
+            try {
+                context.getDefaultStateCounter().addCallbackTimes(1L);
+                context.getConfig().getCallback().onFinishedBatch(messageRecords);
+                context.getDefaultStateCounter()
+                        .addCallbackTimeCost(System.currentTimeMillis() - start).addCallbackDoneTimes(1L);
+            } catch (Exception e) {
+                context.getDefaultStateCounter().addCallbackErrorTimes(1L);
+                LOGGER.error("failed to callback {}", e.getMessage(), e);

Review Comment:
   ditto.



##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/pulsar/PulsarMultiTopicsFetcher.java:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.pulsar;
+
+import com.google.common.base.Preconditions;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.Seeker;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.pulsar.client.api.Consumer;
+import org.apache.pulsar.client.api.Message;
+import org.apache.pulsar.client.api.MessageId;
+import org.apache.pulsar.client.api.Messages;
+import org.apache.pulsar.client.api.PulsarClient;
+import org.apache.pulsar.client.api.PulsarClientException;
+import org.apache.pulsar.client.api.Schema;
+import org.apache.pulsar.client.api.SubscriptionInitialPosition;
+import org.apache.pulsar.client.api.SubscriptionType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * MultiTopicsFetcher for pulsar.
+ *
+ */
+public class PulsarMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private PulsarConsumer currentConsumer;
+    private List<PulsarConsumer> toBeRemovedConsumers = new LinkedList<>();
+    private PulsarClient pulsarClient;
+
+    public PulsarMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            PulsarClient pulsarClient) {
+        super(topics, context, interceptor, deserializer);
+        this.pulsarClient = Preconditions.checkNotNull(pulsarClient);
+    }
+
+    @Override
+    public boolean init() {
+        Consumer<byte[]> newConsumer = createConsumer(onlineTopics.values());
+        if (Objects.isNull(newConsumer)) {
+            LOGGER.error("create new consumer is null");
+            return false;
+        }
+        this.currentConsumer = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        String threadName = String.format("sort_sdk_pulsar_multi_topic_fetch_thread_%d", this.hashCode());
+        this.fetchThread = new Thread(new PulsarMultiTopicsFetcher.Fetcher(), threadName);
+        this.fetchThread.start();
+        this.executor.scheduleWithFixedDelay(this::clearRemovedConsumerList,
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                TimeUnit.SECONDS);
+        return true;
+    }
+
+    private void clearRemovedConsumerList() {
+        long cur = System.currentTimeMillis();
+        List<PulsarConsumer> newList = new LinkedList<>();
+        toBeRemovedConsumers.forEach(consumer -> {
+            long diff = cur - consumer.stopTime;
+            if (diff > context.getConfig().getCleanOldConsumerIntervalSec() * 1000L || consumer.isEmpty()) {
+                try {
+                    consumer.close();
+                } catch (PulsarClientException e) {
+                    LOGGER.warn("exception in close old consumer {}", e.getMessage(), e);

Review Comment:
   `LOGGER.warn("exception in close old consumer ", e);`



##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/kafka/KafkaMultiTopicsFetcher.java:
##########
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.kafka;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.inlong.sdk.sort.fetcher.pulsar.PulsarMultiTopicsFetcher;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.RangeAssignor;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * Kafka multi topics fetcher
+ */
+public class KafkaMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private final ConcurrentHashMap<TopicPartition, OffsetAndMetadata> commitOffsetMap;
+    private final ConcurrentHashMap<TopicPartition, ConcurrentSkipListMap<Long, Boolean>> ackOffsetMap;
+    private final String bootstrapServers;
+    private ConsumerRebalanceListener listener;
+    private KafkaConsumer<byte[], byte[]> consumer;
+
+    public KafkaMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            String bootstrapServers) {
+        super(topics, context, interceptor, deserializer);
+        this.bootstrapServers = bootstrapServers;
+        this.commitOffsetMap = new ConcurrentHashMap<>();
+        this.ackOffsetMap = new ConcurrentHashMap<>();
+    }
+
+    @Override
+    public boolean init() {
+        try {
+            this.consumer = createKafkaConsumer();
+            InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+            this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+            this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                    commitOffsetMap);
+            consumer.subscribe(onlineTopics.keySet(), listener);
+            return true;
+        } catch (Throwable t) {
+            LOGGER.error("failed to init kafka consumer: {}", t.getMessage(), t);
+            return false;
+        }
+    }
+
+    private KafkaConsumer<byte[], byte[]> createKafkaConsumer() {
+        Properties properties = new Properties();
+        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
+        properties.put(ConsumerConfig.GROUP_ID_CONFIG, context.getConfig().getSortTaskId());
+        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.RECEIVE_BUFFER_CONFIG,
+                context.getConfig().getKafkaSocketRecvBufferSize());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+        if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
+        } else if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        } else {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none");
+        }
+        properties.put(ConsumerConfig.FETCH_MAX_BYTES_CONFIG,
+                context.getConfig().getKafkaFetchSizeBytes());
+        properties.put(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG,
+                context.getConfig().getKafkaFetchWaitMs());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        properties.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,
+                RangeAssignor.class.getName());
+        properties.put(ConsumerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG, 120000L);
+        LOGGER.info("start to create kafka consumer:{}", properties);
+        return new KafkaConsumer<>(properties);
+    }
+
+    @Override
+    public void ack(String msgOffset) throws Exception {
+        // the format of multi topic kafka fetcher msg offset is topic:partitionId:offset, such as topic1:20:1746839
+        String[] offset = msgOffset.split(":");
+        if (offset.length != 3) {
+            throw new Exception("offset is illegal, the correct format is topic:partitionId:offset, "
+                    + "the error offset is:" + msgOffset);
+        }
+
+        // parse topic partition offset
+        TopicPartition topicPartition = new TopicPartition(offset[0], Integer.parseInt(offset[1]));
+        long ackOffset = Long.parseLong(offset[2]);
+
+        // ack
+        if (!ackOffsetMap.containsKey(topicPartition) || !ackOffsetMap.get(topicPartition).containsKey(ackOffset)) {
+            LOGGER.warn("did not find offsetMap or ack offset of {}, offset {}, just ignore it",
+                    topicPartition, ackOffset);
+            return;
+        }
+
+        // mark this offset has been ack.
+        ConcurrentSkipListMap<Long, Boolean> tpOffsetMap = ackOffsetMap.get(topicPartition);
+        // to prevent race condition in AckOffsetOnRebalance::onPartitionsRevoked
+        if (Objects.nonNull(tpOffsetMap)) {
+            tpOffsetMap.put(ackOffset, true);
+        }
+    }
+
+    @Override
+    public void pause() {
+        consumer.pause(consumer.assignment());
+    }
+
+    @Override
+    public void resume() {
+        consumer.resume(consumer.assignment());
+    }
+
+    @Override
+    public boolean close() {
+        this.closed = true;
+        try {
+            if (fetchThread != null) {
+                fetchThread.interrupt();
+            }
+            if (consumer != null) {
+                prepareCommit();
+                consumer.commitSync(commitOffsetMap);
+                consumer.close();
+            }
+            commitOffsetMap.clear();
+        } catch (Throwable t) {
+            LOGGER.warn(t.getMessage(), t);
+        }
+        LOGGER.info("closed kafka multi topic fetcher");
+        return true;
+    }
+
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
+    @Override
+    public void setStopConsume(boolean stopConsume) {
+        this.stopConsume = stopConsume;
+    }
+
+    @Override
+    public boolean isStopConsume() {
+        return stopConsume;
+    }
+
+    @Override
+    public List<InLongTopic> getTopics() {
+        return new ArrayList<>(onlineTopics.values());
+    }
+
+    @Override
+    public boolean updateTopics(List<InLongTopic> topics) {
+        if (needUpdate(topics)) {
+            return updateAll(topics);
+        }
+        LOGGER.info("no need to update topics");
+        return false;
+    }
+
+    private boolean updateAll(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topics is empty or null");
+            return false;
+        }
+
+        // stop
+        this.setStopConsume(true);
+
+        // update
+        this.onlineTopics = newTopics.stream().collect(Collectors.toMap(InLongTopic::getTopic, t -> t));
+        InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+        this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                commitOffsetMap, ackOffsetMap);
+        Optional.ofNullable(interceptor).ifPresent(i -> i.configure(topic));
+
+        // subscribe new
+        consumer.subscribe(onlineTopics.keySet(), listener);
+
+        // resume
+        this.setStopConsume(false);
+        return true;
+    }
+
+    private void prepareCommit() {
+        ackOffsetMap.forEach((topicPartition, tpOffsetMap) -> {
+            synchronized (tpOffsetMap) {
+                // get the remove list
+                List<Long> removeOffsets = new ArrayList<>();
+                long commitOffset = -1;
+                for (Long ackOffset : tpOffsetMap.keySet()) {
+                    if (!tpOffsetMap.get(ackOffset)) {
+                        break;
+                    }
+                    removeOffsets.add(ackOffset);
+                    commitOffset = ackOffset;
+                }
+                // the first haven't ack, do nothing
+                if (commitOffset == -1) {
+                    return;
+                }
+
+                // remove offset and commit offset
+                removeOffsets.forEach(tpOffsetMap::remove);
+                commitOffsetMap.put(topicPartition, new OffsetAndMetadata(commitOffset));
+            }
+        });
+    }
+
+    public class Fetcher implements Runnable {
+
+        private void commitKafkaOffset() {
+            prepareCommit();
+            if (consumer != null) {
+                try {
+                    consumer.commitAsync(commitOffsetMap, null);
+                    commitOffsetMap.clear();
+                } catch (Exception e) {
+                    LOGGER.error("commit kafka offset failed: {}", e.getMessage(), e);

Review Comment:
   `LOGGER.error("commit kafka offset failed: ", e);`



##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/pulsar/PulsarMultiTopicsFetcher.java:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.pulsar;
+
+import com.google.common.base.Preconditions;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.Seeker;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.pulsar.client.api.Consumer;
+import org.apache.pulsar.client.api.Message;
+import org.apache.pulsar.client.api.MessageId;
+import org.apache.pulsar.client.api.Messages;
+import org.apache.pulsar.client.api.PulsarClient;
+import org.apache.pulsar.client.api.PulsarClientException;
+import org.apache.pulsar.client.api.Schema;
+import org.apache.pulsar.client.api.SubscriptionInitialPosition;
+import org.apache.pulsar.client.api.SubscriptionType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * MultiTopicsFetcher for pulsar.
+ *
+ */
+public class PulsarMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private PulsarConsumer currentConsumer;
+    private List<PulsarConsumer> toBeRemovedConsumers = new LinkedList<>();
+    private PulsarClient pulsarClient;
+
+    public PulsarMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            PulsarClient pulsarClient) {
+        super(topics, context, interceptor, deserializer);
+        this.pulsarClient = Preconditions.checkNotNull(pulsarClient);
+    }
+
+    @Override
+    public boolean init() {
+        Consumer<byte[]> newConsumer = createConsumer(onlineTopics.values());
+        if (Objects.isNull(newConsumer)) {
+            LOGGER.error("create new consumer is null");
+            return false;
+        }
+        this.currentConsumer = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        String threadName = String.format("sort_sdk_pulsar_multi_topic_fetch_thread_%d", this.hashCode());
+        this.fetchThread = new Thread(new PulsarMultiTopicsFetcher.Fetcher(), threadName);
+        this.fetchThread.start();
+        this.executor.scheduleWithFixedDelay(this::clearRemovedConsumerList,
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                TimeUnit.SECONDS);
+        return true;
+    }
+
+    private void clearRemovedConsumerList() {
+        long cur = System.currentTimeMillis();
+        List<PulsarConsumer> newList = new LinkedList<>();
+        toBeRemovedConsumers.forEach(consumer -> {
+            long diff = cur - consumer.stopTime;
+            if (diff > context.getConfig().getCleanOldConsumerIntervalSec() * 1000L || consumer.isEmpty()) {
+                try {
+                    consumer.close();
+                } catch (PulsarClientException e) {
+                    LOGGER.warn("exception in close old consumer {}", e.getMessage(), e);
+                }
+                return;
+            }
+            newList.add(consumer);
+        });
+        LOGGER.info("after clear old consumers, the old size is {}, current size is {}",
+                toBeRemovedConsumers.size(), newList.size());
+        this.toBeRemovedConsumers = newList;
+    }
+
+    private boolean updateAll(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topics is empty or null");
+            return false;
+        }
+        // stop old;
+        this.setStopConsume(true);
+        this.currentConsumer.pause();
+        // create new;
+        Consumer<byte[]> newConsumer = createConsumer(newTopics);
+        if (Objects.isNull(newConsumer)) {
+            currentConsumer.resume();
+            this.setStopConsume(false);
+            return false;
+        }
+        PulsarConsumer newConsumerWrapper = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = newTopics.stream().findFirst().get();
+        final Seeker newSeeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        // save
+        currentConsumer.setStopTime(System.currentTimeMillis());
+        toBeRemovedConsumers.add(currentConsumer);
+        // replace
+        this.currentConsumer = newConsumerWrapper;
+        this.seeker = newSeeker;
+        this.interceptor.configure(firstTopic);
+        this.onlineTopics = newTopics.stream().collect(Collectors.toMap(InLongTopic::getTopic, t -> t));
+        // resume
+        this.setStopConsume(false);
+        return true;
+    }
+
+    private Consumer<byte[]> createConsumer(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topic is empty or null");
+            return null;
+        }
+        try {
+            SubscriptionInitialPosition position = SubscriptionInitialPosition.Latest;
+            SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+            if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                    || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+                LOGGER.info("the subscription initial position is earliest!");
+                position = SubscriptionInitialPosition.Earliest;
+            }
+
+            List<String> topicNames = newTopics.stream()
+                    .map(InLongTopic::getTopic)
+                    .collect(Collectors.toList());
+            Consumer<byte[]> consumer = pulsarClient.newConsumer(Schema.BYTES)
+                    .topics(topicNames)
+                    .subscriptionName(context.getConfig().getSortTaskId())
+                    .subscriptionType(SubscriptionType.Shared)
+                    .startMessageIdInclusive()
+                    .subscriptionInitialPosition(position)
+                    .ackTimeout(context.getConfig().getAckTimeoutSec(), TimeUnit.SECONDS)
+                    .receiverQueueSize(context.getConfig().getPulsarReceiveQueueSize())
+                    .subscribe();
+            LOGGER.info("create consumer for topics {}", topicNames);
+            return consumer;
+        } catch (Exception e) {
+            LOGGER.error(e.getMessage(), e);
+            return null;
+        }
+    }
+
+    @Override
+    public void ack(String msgOffset) throws Exception {
+        if (StringUtils.isBlank(msgOffset)) {
+            LOGGER.error("ack failed, msg offset should not be blank");
+            return;
+        }
+        if (Objects.isNull(currentConsumer)) {
+            LOGGER.error("ack failed, consumer is null");
+            return;
+        }
+        // if this ack belongs to current consumer
+        MessageId messageId = currentConsumer.getMessageId(msgOffset);
+        if (!Objects.isNull(messageId)) {
+            doAck(msgOffset, this.currentConsumer, messageId);
+            return;
+        }
+
+        // if this ack doesn't belong to current consumer, find in to be removed ones.
+        for (PulsarConsumer oldConsumer : toBeRemovedConsumers) {
+            MessageId id = oldConsumer.getMessageId(msgOffset);
+            if (Objects.isNull(id)) {
+                continue;
+            }
+            doAck(msgOffset, oldConsumer, id);
+            LOGGER.info("ack an old consumer message");
+            return;
+        }
+        context.getDefaultStateCounter().addAckFailTimes(1L);
+        LOGGER.error("in pulsar multi topic fetcher, messageId == null");
+    }
+
+    private void doAck(String msgOffset, PulsarConsumer consumer, MessageId messageId) {
+        if (!consumer.isConnected()) {
+            return;
+        }
+        InLongTopic topic = consumer.getTopic(msgOffset);
+        consumer.acknowledgeAsync(messageId)
+                .thenAccept(ctx -> ackSucc(msgOffset, topic, this.currentConsumer))
+                .exceptionally(exception -> {
+                    LOGGER.error("ack fail:{} {},error:{}",
+                            topic, msgOffset, exception.getMessage(), exception);
+                    context.getStateCounterByTopic(topic).addAckFailTimes(1L);
+                    return null;
+                });
+    }
+
+    private void ackSucc(String offset, InLongTopic topic, PulsarConsumer consumer) {
+        consumer.remove(offset);
+        context.getStateCounterByTopic(topic).addAckSuccTimes(1L);
+    }
+
+    @Override
+    public void pause() {
+        if (Objects.nonNull(currentConsumer)) {
+            currentConsumer.pause();
+        }
+    }
+
+    @Override
+    public void resume() {
+        if (Objects.nonNull(currentConsumer)) {
+            currentConsumer.resume();
+        }
+    }
+
+    @Override
+    public boolean close() {
+        mainLock.writeLock().lock();
+        try {
+            LOGGER.info("closed online topics {}", onlineTopics);
+            try {
+                if (currentConsumer != null) {
+                    currentConsumer.close();
+                }
+                if (fetchThread != null) {
+                    fetchThread.interrupt();
+                }
+            } catch (PulsarClientException e) {
+                LOGGER.warn(e.getMessage(), e);
+            }
+            toBeRemovedConsumers.stream()
+                    .filter(Objects::nonNull)
+                    .forEach(c -> {
+                        try {
+                            c.close();
+                        } catch (PulsarClientException e) {
+                            LOGGER.warn(e.getMessage(), e);
+                        }
+                    });
+            toBeRemovedConsumers.clear();
+            return true;
+        } finally {
+            this.closed = true;
+            mainLock.writeLock().unlock();
+        }
+    }
+
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
+    @Override
+    public void setStopConsume(boolean stopConsume) {
+        this.stopConsume = stopConsume;
+    }
+
+    @Override
+    public boolean isStopConsume() {
+        return stopConsume;
+    }
+
+    @Override
+    public List<InLongTopic> getTopics() {
+        return new ArrayList<>(onlineTopics.values());
+    }
+
+    @Override
+    public boolean updateTopics(List<InLongTopic> topics) {
+        if (needUpdate(topics)) {
+            return updateAll(topics);
+        }
+        LOGGER.info("no need to update multi topic fetcher");
+        return false;
+    }
+
+    public class Fetcher implements Runnable {
+
+        /**
+         * put the received msg to onFinished method
+         *
+         * @param messageRecords {@link List}
+         */
+        private void handleAndCallbackMsg(List<MessageRecord> messageRecords) {
+            long start = System.currentTimeMillis();
+            try {
+                context.getDefaultStateCounter().addCallbackTimes(1L);
+                context.getConfig().getCallback().onFinishedBatch(messageRecords);
+                context.getDefaultStateCounter()
+                        .addCallbackTimeCost(System.currentTimeMillis() - start).addCallbackDoneTimes(1L);
+            } catch (Exception e) {
+                context.getDefaultStateCounter().addCallbackErrorTimes(1L);
+                LOGGER.error("failed to callback {}", e.getMessage(), e);
+            }
+        }
+
+        private String getOffset(MessageId msgId) {
+            return Base64.getEncoder().encodeToString(msgId.toByteArray());
+        }
+
+        @Override
+        public void run() {
+            boolean hasPermit;
+            while (true) {
+                hasPermit = false;
+                try {
+                    if (context.getConfig().isStopConsume() || stopConsume) {
+                        TimeUnit.MILLISECONDS.sleep(50);
+                        continue;
+                    }
+
+                    if (sleepTime > 0) {
+                        TimeUnit.MILLISECONDS.sleep(sleepTime);
+                    }
+
+                    context.acquireRequestPermit();
+                    hasPermit = true;
+                    context.getDefaultStateCounter().addMsgCount(1L).addFetchTimes(1L);
+
+                    long startFetchTime = System.currentTimeMillis();
+                    Messages<byte[]> messages = currentConsumer.batchReceive();
+
+                    context.getDefaultStateCounter().addFetchTimeCost(System.currentTimeMillis() - startFetchTime);
+                    if (null != messages && messages.size() != 0) {
+                        List<MessageRecord> msgs = new ArrayList<>();
+                        for (Message<byte[]> msg : messages) {
+                            String topicName = msg.getTopicName();
+                            InLongTopic topic = onlineTopics.get(topicName);
+                            if (Objects.isNull(topic)) {
+                                LOGGER.error("got a message with topic {}, which is not subscribe", topicName);
+                                continue;
+                            }
+                            // if need seek
+                            if (msg.getPublishTime() < seeker.getSeekTime()) {
+                                seeker.seek();
+                                break;
+                            }
+                            String offsetKey = getOffset(msg.getMessageId());
+                            currentConsumer.put(offsetKey, topic, msg.getMessageId());
+
+                            //deserialize
+                            List<InLongMessage> inLongMessages = deserializer
+                                    .deserialize(context, topic, msg.getProperties(), msg.getData());
+                            // intercept
+                            inLongMessages = interceptor.intercept(inLongMessages);
+                            if (inLongMessages.isEmpty()) {
+                                ack(offsetKey);
+                                continue;
+                            }
+
+                            msgs.add(new MessageRecord(topic.getTopicKey(),
+                                    inLongMessages,
+                                    offsetKey, System.currentTimeMillis()));
+                            context.getStateCounterByTopic(topic).addConsumeSize(msg.getData().length);
+                        }
+                        context.getDefaultStateCounter().addMsgCount(msgs.size());
+                        handleAndCallbackMsg(msgs);
+                        sleepTime = 0L;
+                    } else {
+                        context.getDefaultStateCounter().addEmptyFetchTimes(1L);
+                        emptyFetchTimes++;
+                        if (emptyFetchTimes >= context.getConfig().getEmptyPollTimes()) {
+                            sleepTime = Math.min((sleepTime += context.getConfig().getEmptyPollSleepStepMs()),
+                                    context.getConfig().getMaxEmptyPollSleepMs());
+                            emptyFetchTimes = 0;
+                        }
+                    }
+                } catch (Exception e) {
+                    context.getDefaultStateCounter().addFetchErrorTimes(1L);
+                    LOGGER.error("failed to fetch msg: {}", e.getMessage(), e);

Review Comment:
   ditto.



##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/kafka/KafkaMultiTopicsFetcher.java:
##########
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.kafka;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.inlong.sdk.sort.fetcher.pulsar.PulsarMultiTopicsFetcher;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.RangeAssignor;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * Kafka multi topics fetcher
+ */
+public class KafkaMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private final ConcurrentHashMap<TopicPartition, OffsetAndMetadata> commitOffsetMap;
+    private final ConcurrentHashMap<TopicPartition, ConcurrentSkipListMap<Long, Boolean>> ackOffsetMap;
+    private final String bootstrapServers;
+    private ConsumerRebalanceListener listener;
+    private KafkaConsumer<byte[], byte[]> consumer;
+
+    public KafkaMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            String bootstrapServers) {
+        super(topics, context, interceptor, deserializer);
+        this.bootstrapServers = bootstrapServers;
+        this.commitOffsetMap = new ConcurrentHashMap<>();
+        this.ackOffsetMap = new ConcurrentHashMap<>();
+    }
+
+    @Override
+    public boolean init() {
+        try {
+            this.consumer = createKafkaConsumer();
+            InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+            this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+            this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                    commitOffsetMap);
+            consumer.subscribe(onlineTopics.keySet(), listener);
+            return true;
+        } catch (Throwable t) {
+            LOGGER.error("failed to init kafka consumer: {}", t.getMessage(), t);
+            return false;
+        }
+    }
+
+    private KafkaConsumer<byte[], byte[]> createKafkaConsumer() {
+        Properties properties = new Properties();
+        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
+        properties.put(ConsumerConfig.GROUP_ID_CONFIG, context.getConfig().getSortTaskId());
+        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.RECEIVE_BUFFER_CONFIG,
+                context.getConfig().getKafkaSocketRecvBufferSize());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+        if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
+        } else if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        } else {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none");
+        }
+        properties.put(ConsumerConfig.FETCH_MAX_BYTES_CONFIG,
+                context.getConfig().getKafkaFetchSizeBytes());
+        properties.put(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG,
+                context.getConfig().getKafkaFetchWaitMs());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        properties.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,
+                RangeAssignor.class.getName());
+        properties.put(ConsumerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG, 120000L);
+        LOGGER.info("start to create kafka consumer:{}", properties);
+        return new KafkaConsumer<>(properties);
+    }
+
+    @Override
+    public void ack(String msgOffset) throws Exception {
+        // the format of multi topic kafka fetcher msg offset is topic:partitionId:offset, such as topic1:20:1746839
+        String[] offset = msgOffset.split(":");
+        if (offset.length != 3) {
+            throw new Exception("offset is illegal, the correct format is topic:partitionId:offset, "
+                    + "the error offset is:" + msgOffset);
+        }
+
+        // parse topic partition offset
+        TopicPartition topicPartition = new TopicPartition(offset[0], Integer.parseInt(offset[1]));
+        long ackOffset = Long.parseLong(offset[2]);
+
+        // ack
+        if (!ackOffsetMap.containsKey(topicPartition) || !ackOffsetMap.get(topicPartition).containsKey(ackOffset)) {
+            LOGGER.warn("did not find offsetMap or ack offset of {}, offset {}, just ignore it",
+                    topicPartition, ackOffset);
+            return;
+        }
+
+        // mark this offset has been ack.
+        ConcurrentSkipListMap<Long, Boolean> tpOffsetMap = ackOffsetMap.get(topicPartition);
+        // to prevent race condition in AckOffsetOnRebalance::onPartitionsRevoked
+        if (Objects.nonNull(tpOffsetMap)) {
+            tpOffsetMap.put(ackOffset, true);
+        }
+    }
+
+    @Override
+    public void pause() {
+        consumer.pause(consumer.assignment());
+    }
+
+    @Override
+    public void resume() {
+        consumer.resume(consumer.assignment());
+    }
+
+    @Override
+    public boolean close() {
+        this.closed = true;
+        try {
+            if (fetchThread != null) {
+                fetchThread.interrupt();
+            }
+            if (consumer != null) {
+                prepareCommit();
+                consumer.commitSync(commitOffsetMap);
+                consumer.close();
+            }
+            commitOffsetMap.clear();
+        } catch (Throwable t) {
+            LOGGER.warn(t.getMessage(), t);
+        }
+        LOGGER.info("closed kafka multi topic fetcher");
+        return true;
+    }
+
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
+    @Override
+    public void setStopConsume(boolean stopConsume) {
+        this.stopConsume = stopConsume;
+    }
+
+    @Override
+    public boolean isStopConsume() {
+        return stopConsume;
+    }
+
+    @Override
+    public List<InLongTopic> getTopics() {
+        return new ArrayList<>(onlineTopics.values());
+    }
+
+    @Override
+    public boolean updateTopics(List<InLongTopic> topics) {
+        if (needUpdate(topics)) {
+            return updateAll(topics);
+        }
+        LOGGER.info("no need to update topics");
+        return false;
+    }
+
+    private boolean updateAll(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topics is empty or null");
+            return false;
+        }
+
+        // stop
+        this.setStopConsume(true);
+
+        // update
+        this.onlineTopics = newTopics.stream().collect(Collectors.toMap(InLongTopic::getTopic, t -> t));
+        InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+        this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                commitOffsetMap, ackOffsetMap);
+        Optional.ofNullable(interceptor).ifPresent(i -> i.configure(topic));
+
+        // subscribe new
+        consumer.subscribe(onlineTopics.keySet(), listener);
+
+        // resume
+        this.setStopConsume(false);
+        return true;
+    }
+
+    private void prepareCommit() {
+        ackOffsetMap.forEach((topicPartition, tpOffsetMap) -> {
+            synchronized (tpOffsetMap) {
+                // get the remove list
+                List<Long> removeOffsets = new ArrayList<>();
+                long commitOffset = -1;
+                for (Long ackOffset : tpOffsetMap.keySet()) {
+                    if (!tpOffsetMap.get(ackOffset)) {
+                        break;
+                    }
+                    removeOffsets.add(ackOffset);
+                    commitOffset = ackOffset;
+                }
+                // the first haven't ack, do nothing
+                if (commitOffset == -1) {
+                    return;
+                }
+
+                // remove offset and commit offset
+                removeOffsets.forEach(tpOffsetMap::remove);
+                commitOffsetMap.put(topicPartition, new OffsetAndMetadata(commitOffset));
+            }
+        });
+    }
+
+    public class Fetcher implements Runnable {
+
+        private void commitKafkaOffset() {
+            prepareCommit();
+            if (consumer != null) {
+                try {
+                    consumer.commitAsync(commitOffsetMap, null);
+                    commitOffsetMap.clear();
+                } catch (Exception e) {
+                    LOGGER.error("commit kafka offset failed: {}", e.getMessage(), e);
+                }
+            }
+        }
+
+        /**
+         * put the received msg to onFinished method
+         *
+         * @param messageRecords {@link List < MessageRecord >}
+         */
+        private void handleAndCallbackMsg(List<MessageRecord> messageRecords) {
+            long start = System.currentTimeMillis();
+            try {
+                context.getDefaultStateCounter().addCallbackTimes(1);
+                context.getConfig().getCallback().onFinishedBatch(messageRecords);
+                context.getDefaultStateCounter()
+                        .addCallbackTimeCost(System.currentTimeMillis() - start)
+                        .addCallbackDoneTimes(1);
+            } catch (Exception e) {
+                context.getDefaultStateCounter().addCallbackErrorTimes(1);
+                LOGGER.error("failed to callback: {}", e.getMessage(), e);
+            }
+        }
+
+        private String getOffset(String topic, int partitionId, long offset) {
+            TopicPartition topicPartition = new TopicPartition(topic, partitionId);
+            ackOffsetMap.computeIfAbsent(topicPartition, k -> new ConcurrentSkipListMap<>()).put(offset, false);
+            return topic + ":" + partitionId + ":" + offset;
+        }
+
+        private Map<String, String> getMsgHeaders(Headers headers) {
+            Map<String, String> headerMap = new HashMap<>();
+            for (Header header : headers) {
+                headerMap.put(header.key(), new String(header.value()));
+            }
+            return headerMap;
+        }
+
+        @Override
+        public void run() {
+            boolean hasPermit;
+            while (true) {
+                hasPermit = false;
+                try {
+                    if (context.getConfig().isStopConsume() || stopConsume) {
+                        TimeUnit.MILLISECONDS.sleep(50);
+                        continue;
+                    }
+
+                    if (sleepTime > 0) {
+                        TimeUnit.MILLISECONDS.sleep(sleepTime);
+                    }
+
+                    context.acquireRequestPermit();
+                    hasPermit = true;
+                    // fetch from kafka
+                    fetchFromKafka();
+                    // commit
+                    commitKafkaOffset();
+                } catch (Exception e) {
+                    context.getDefaultStateCounter().addFetchErrorTimes(1);
+                    LOGGER.error(e.getMessage(), e);

Review Comment:
   ditto.



##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/kafka/KafkaMultiTopicsFetcher.java:
##########
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.kafka;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.inlong.sdk.sort.fetcher.pulsar.PulsarMultiTopicsFetcher;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.RangeAssignor;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * Kafka multi topics fetcher
+ */
+public class KafkaMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private final ConcurrentHashMap<TopicPartition, OffsetAndMetadata> commitOffsetMap;
+    private final ConcurrentHashMap<TopicPartition, ConcurrentSkipListMap<Long, Boolean>> ackOffsetMap;
+    private final String bootstrapServers;
+    private ConsumerRebalanceListener listener;
+    private KafkaConsumer<byte[], byte[]> consumer;
+
+    public KafkaMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            String bootstrapServers) {
+        super(topics, context, interceptor, deserializer);
+        this.bootstrapServers = bootstrapServers;
+        this.commitOffsetMap = new ConcurrentHashMap<>();
+        this.ackOffsetMap = new ConcurrentHashMap<>();
+    }
+
+    @Override
+    public boolean init() {
+        try {
+            this.consumer = createKafkaConsumer();
+            InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+            this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+            this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                    commitOffsetMap);
+            consumer.subscribe(onlineTopics.keySet(), listener);
+            return true;
+        } catch (Throwable t) {
+            LOGGER.error("failed to init kafka consumer: {}", t.getMessage(), t);
+            return false;
+        }
+    }
+
+    private KafkaConsumer<byte[], byte[]> createKafkaConsumer() {
+        Properties properties = new Properties();
+        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
+        properties.put(ConsumerConfig.GROUP_ID_CONFIG, context.getConfig().getSortTaskId());
+        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.RECEIVE_BUFFER_CONFIG,
+                context.getConfig().getKafkaSocketRecvBufferSize());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+        if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
+        } else if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        } else {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none");
+        }
+        properties.put(ConsumerConfig.FETCH_MAX_BYTES_CONFIG,
+                context.getConfig().getKafkaFetchSizeBytes());
+        properties.put(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG,
+                context.getConfig().getKafkaFetchWaitMs());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        properties.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,
+                RangeAssignor.class.getName());
+        properties.put(ConsumerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG, 120000L);
+        LOGGER.info("start to create kafka consumer:{}", properties);
+        return new KafkaConsumer<>(properties);
+    }
+
+    @Override
+    public void ack(String msgOffset) throws Exception {
+        // the format of multi topic kafka fetcher msg offset is topic:partitionId:offset, such as topic1:20:1746839
+        String[] offset = msgOffset.split(":");
+        if (offset.length != 3) {
+            throw new Exception("offset is illegal, the correct format is topic:partitionId:offset, "
+                    + "the error offset is:" + msgOffset);
+        }
+
+        // parse topic partition offset
+        TopicPartition topicPartition = new TopicPartition(offset[0], Integer.parseInt(offset[1]));
+        long ackOffset = Long.parseLong(offset[2]);
+
+        // ack
+        if (!ackOffsetMap.containsKey(topicPartition) || !ackOffsetMap.get(topicPartition).containsKey(ackOffset)) {
+            LOGGER.warn("did not find offsetMap or ack offset of {}, offset {}, just ignore it",
+                    topicPartition, ackOffset);
+            return;
+        }
+
+        // mark this offset has been ack.
+        ConcurrentSkipListMap<Long, Boolean> tpOffsetMap = ackOffsetMap.get(topicPartition);
+        // to prevent race condition in AckOffsetOnRebalance::onPartitionsRevoked
+        if (Objects.nonNull(tpOffsetMap)) {
+            tpOffsetMap.put(ackOffset, true);
+        }
+    }
+
+    @Override
+    public void pause() {
+        consumer.pause(consumer.assignment());
+    }
+
+    @Override
+    public void resume() {
+        consumer.resume(consumer.assignment());
+    }
+
+    @Override
+    public boolean close() {
+        this.closed = true;
+        try {
+            if (fetchThread != null) {
+                fetchThread.interrupt();
+            }
+            if (consumer != null) {
+                prepareCommit();
+                consumer.commitSync(commitOffsetMap);
+                consumer.close();
+            }
+            commitOffsetMap.clear();
+        } catch (Throwable t) {
+            LOGGER.warn(t.getMessage(), t);

Review Comment:
   No need getMessage when printing all stack traces of t.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [inlong] vernedeng commented on a diff in pull request #5659: [INLONG-5623][SDK] Support kafka multi topics fetcher

Posted by GitBox <gi...@apache.org>.
vernedeng commented on code in PR #5659:
URL: https://github.com/apache/inlong/pull/5659#discussion_r953440955


##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/api/SortClientConfig.java:
##########
@@ -367,6 +376,8 @@ public void setParameters(Map<String, String> sortSdkParams) {
         this.updateMetaDataIntervalSec = NumberUtils.toInt(sortSdkParams.get("updateMetaDataIntervalSec"),
                 updateMetaDataIntervalSec);
         this.ackTimeoutSec = NumberUtils.toInt(sortSdkParams.get("ackTimeoutSec"), ackTimeoutSec);
+        this.cleanOldConsumerIntervalSec = NumberUtils.toInt(sortSdkParams.get("cleanOldConsumerIntervalSec"),

Review Comment:
   thx, I will fix it at #5669 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [inlong] vernedeng commented on a diff in pull request #5659: [INLONG-5623][SDK] Support kafka multi topics fetcher

Posted by GitBox <gi...@apache.org>.
vernedeng commented on code in PR #5659:
URL: https://github.com/apache/inlong/pull/5659#discussion_r953468538


##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/kafka/KafkaMultiTopicsFetcher.java:
##########
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.kafka;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.inlong.sdk.sort.fetcher.pulsar.PulsarMultiTopicsFetcher;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.RangeAssignor;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * Kafka multi topics fetcher
+ */
+public class KafkaMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private final ConcurrentHashMap<TopicPartition, OffsetAndMetadata> commitOffsetMap;
+    private final ConcurrentHashMap<TopicPartition, ConcurrentSkipListMap<Long, Boolean>> ackOffsetMap;
+    private final String bootstrapServers;
+    private ConsumerRebalanceListener listener;
+    private KafkaConsumer<byte[], byte[]> consumer;
+
+    public KafkaMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            String bootstrapServers) {
+        super(topics, context, interceptor, deserializer);
+        this.bootstrapServers = bootstrapServers;
+        this.commitOffsetMap = new ConcurrentHashMap<>();
+        this.ackOffsetMap = new ConcurrentHashMap<>();
+    }
+
+    @Override
+    public boolean init() {
+        try {
+            this.consumer = createKafkaConsumer();
+            InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+            this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+            this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                    commitOffsetMap);
+            consumer.subscribe(onlineTopics.keySet(), listener);
+            return true;
+        } catch (Throwable t) {
+            LOGGER.error("failed to init kafka consumer: {}", t.getMessage(), t);
+            return false;
+        }
+    }
+
+    private KafkaConsumer<byte[], byte[]> createKafkaConsumer() {
+        Properties properties = new Properties();
+        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
+        properties.put(ConsumerConfig.GROUP_ID_CONFIG, context.getConfig().getSortTaskId());
+        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.RECEIVE_BUFFER_CONFIG,
+                context.getConfig().getKafkaSocketRecvBufferSize());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+        if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
+        } else if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        } else {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none");
+        }
+        properties.put(ConsumerConfig.FETCH_MAX_BYTES_CONFIG,
+                context.getConfig().getKafkaFetchSizeBytes());
+        properties.put(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG,
+                context.getConfig().getKafkaFetchWaitMs());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        properties.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,
+                RangeAssignor.class.getName());
+        properties.put(ConsumerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG, 120000L);
+        LOGGER.info("start to create kafka consumer:{}", properties);
+        return new KafkaConsumer<>(properties);
+    }
+
+    @Override
+    public void ack(String msgOffset) throws Exception {
+        // the format of multi topic kafka fetcher msg offset is topic:partitionId:offset, such as topic1:20:1746839
+        String[] offset = msgOffset.split(":");
+        if (offset.length != 3) {
+            throw new Exception("offset is illegal, the correct format is topic:partitionId:offset, "
+                    + "the error offset is:" + msgOffset);
+        }
+
+        // parse topic partition offset
+        TopicPartition topicPartition = new TopicPartition(offset[0], Integer.parseInt(offset[1]));
+        long ackOffset = Long.parseLong(offset[2]);
+
+        // ack
+        if (!ackOffsetMap.containsKey(topicPartition) || !ackOffsetMap.get(topicPartition).containsKey(ackOffset)) {
+            LOGGER.warn("did not find offsetMap or ack offset of {}, offset {}, just ignore it",
+                    topicPartition, ackOffset);
+            return;
+        }
+
+        // mark this offset has been ack.
+        ConcurrentSkipListMap<Long, Boolean> tpOffsetMap = ackOffsetMap.get(topicPartition);
+        // to prevent race condition in AckOffsetOnRebalance::onPartitionsRevoked
+        if (Objects.nonNull(tpOffsetMap)) {
+            tpOffsetMap.put(ackOffset, true);
+        }
+    }
+
+    @Override
+    public void pause() {
+        consumer.pause(consumer.assignment());
+    }
+
+    @Override
+    public void resume() {
+        consumer.resume(consumer.assignment());
+    }
+
+    @Override
+    public boolean close() {
+        this.closed = true;
+        try {
+            if (fetchThread != null) {
+                fetchThread.interrupt();
+            }
+            if (consumer != null) {
+                prepareCommit();
+                consumer.commitSync(commitOffsetMap);
+                consumer.close();
+            }
+            commitOffsetMap.clear();
+        } catch (Throwable t) {
+            LOGGER.warn(t.getMessage(), t);

Review Comment:
   fixed, thx



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [inlong] vernedeng commented on a diff in pull request #5659: [INLONG-5623][SDK] Support kafka multi topics fetcher

Posted by GitBox <gi...@apache.org>.
vernedeng commented on code in PR #5659:
URL: https://github.com/apache/inlong/pull/5659#discussion_r953468297


##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/kafka/KafkaMultiTopicsFetcher.java:
##########
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.kafka;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.inlong.sdk.sort.fetcher.pulsar.PulsarMultiTopicsFetcher;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.RangeAssignor;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * Kafka multi topics fetcher
+ */
+public class KafkaMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private final ConcurrentHashMap<TopicPartition, OffsetAndMetadata> commitOffsetMap;
+    private final ConcurrentHashMap<TopicPartition, ConcurrentSkipListMap<Long, Boolean>> ackOffsetMap;
+    private final String bootstrapServers;
+    private ConsumerRebalanceListener listener;
+    private KafkaConsumer<byte[], byte[]> consumer;
+
+    public KafkaMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            String bootstrapServers) {
+        super(topics, context, interceptor, deserializer);
+        this.bootstrapServers = bootstrapServers;
+        this.commitOffsetMap = new ConcurrentHashMap<>();
+        this.ackOffsetMap = new ConcurrentHashMap<>();
+    }
+
+    @Override
+    public boolean init() {
+        try {
+            this.consumer = createKafkaConsumer();
+            InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+            this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+            this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                    commitOffsetMap);
+            consumer.subscribe(onlineTopics.keySet(), listener);
+            return true;
+        } catch (Throwable t) {
+            LOGGER.error("failed to init kafka consumer: {}", t.getMessage(), t);

Review Comment:
   fixed, thx



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [inlong] EMsnap commented on a diff in pull request #5659: [INLONG-5623][SDK] Support kafka multi topics fetcher

Posted by GitBox <gi...@apache.org>.
EMsnap commented on code in PR #5659:
URL: https://github.com/apache/inlong/pull/5659#discussion_r953292984


##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/api/SortClientConfig.java:
##########
@@ -367,6 +376,8 @@ public void setParameters(Map<String, String> sortSdkParams) {
         this.updateMetaDataIntervalSec = NumberUtils.toInt(sortSdkParams.get("updateMetaDataIntervalSec"),
                 updateMetaDataIntervalSec);
         this.ackTimeoutSec = NumberUtils.toInt(sortSdkParams.get("ackTimeoutSec"), ackTimeoutSec);
+        this.cleanOldConsumerIntervalSec = NumberUtils.toInt(sortSdkParams.get("cleanOldConsumerIntervalSec"),

Review Comment:
   extract constant pls



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [inlong] vernedeng commented on a diff in pull request #5659: [INLONG-5623][SDK] Support kafka multi topics fetcher

Posted by GitBox <gi...@apache.org>.
vernedeng commented on code in PR #5659:
URL: https://github.com/apache/inlong/pull/5659#discussion_r953470985


##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/pulsar/PulsarMultiTopicsFetcher.java:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.pulsar;
+
+import com.google.common.base.Preconditions;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.Seeker;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.pulsar.client.api.Consumer;
+import org.apache.pulsar.client.api.Message;
+import org.apache.pulsar.client.api.MessageId;
+import org.apache.pulsar.client.api.Messages;
+import org.apache.pulsar.client.api.PulsarClient;
+import org.apache.pulsar.client.api.PulsarClientException;
+import org.apache.pulsar.client.api.Schema;
+import org.apache.pulsar.client.api.SubscriptionInitialPosition;
+import org.apache.pulsar.client.api.SubscriptionType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * MultiTopicsFetcher for pulsar.
+ *
+ */
+public class PulsarMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private PulsarConsumer currentConsumer;
+    private List<PulsarConsumer> toBeRemovedConsumers = new LinkedList<>();
+    private PulsarClient pulsarClient;
+
+    public PulsarMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            PulsarClient pulsarClient) {
+        super(topics, context, interceptor, deserializer);
+        this.pulsarClient = Preconditions.checkNotNull(pulsarClient);
+    }
+
+    @Override
+    public boolean init() {
+        Consumer<byte[]> newConsumer = createConsumer(onlineTopics.values());
+        if (Objects.isNull(newConsumer)) {
+            LOGGER.error("create new consumer is null");
+            return false;
+        }
+        this.currentConsumer = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        String threadName = String.format("sort_sdk_pulsar_multi_topic_fetch_thread_%d", this.hashCode());
+        this.fetchThread = new Thread(new PulsarMultiTopicsFetcher.Fetcher(), threadName);
+        this.fetchThread.start();
+        this.executor.scheduleWithFixedDelay(this::clearRemovedConsumerList,
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                TimeUnit.SECONDS);
+        return true;
+    }
+
+    private void clearRemovedConsumerList() {
+        long cur = System.currentTimeMillis();
+        List<PulsarConsumer> newList = new LinkedList<>();
+        toBeRemovedConsumers.forEach(consumer -> {
+            long diff = cur - consumer.stopTime;
+            if (diff > context.getConfig().getCleanOldConsumerIntervalSec() * 1000L || consumer.isEmpty()) {
+                try {
+                    consumer.close();
+                } catch (PulsarClientException e) {
+                    LOGGER.warn("exception in close old consumer {}", e.getMessage(), e);
+                }
+                return;
+            }
+            newList.add(consumer);
+        });
+        LOGGER.info("after clear old consumers, the old size is {}, current size is {}",
+                toBeRemovedConsumers.size(), newList.size());
+        this.toBeRemovedConsumers = newList;
+    }
+
+    private boolean updateAll(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topics is empty or null");
+            return false;
+        }
+        // stop old;
+        this.setStopConsume(true);
+        this.currentConsumer.pause();
+        // create new;
+        Consumer<byte[]> newConsumer = createConsumer(newTopics);
+        if (Objects.isNull(newConsumer)) {
+            currentConsumer.resume();
+            this.setStopConsume(false);
+            return false;
+        }
+        PulsarConsumer newConsumerWrapper = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = newTopics.stream().findFirst().get();
+        final Seeker newSeeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        // save
+        currentConsumer.setStopTime(System.currentTimeMillis());
+        toBeRemovedConsumers.add(currentConsumer);
+        // replace
+        this.currentConsumer = newConsumerWrapper;
+        this.seeker = newSeeker;
+        this.interceptor.configure(firstTopic);
+        this.onlineTopics = newTopics.stream().collect(Collectors.toMap(InLongTopic::getTopic, t -> t));
+        // resume
+        this.setStopConsume(false);
+        return true;
+    }
+
+    private Consumer<byte[]> createConsumer(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topic is empty or null");
+            return null;
+        }
+        try {
+            SubscriptionInitialPosition position = SubscriptionInitialPosition.Latest;
+            SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+            if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                    || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+                LOGGER.info("the subscription initial position is earliest!");
+                position = SubscriptionInitialPosition.Earliest;
+            }
+
+            List<String> topicNames = newTopics.stream()
+                    .map(InLongTopic::getTopic)
+                    .collect(Collectors.toList());
+            Consumer<byte[]> consumer = pulsarClient.newConsumer(Schema.BYTES)
+                    .topics(topicNames)
+                    .subscriptionName(context.getConfig().getSortTaskId())
+                    .subscriptionType(SubscriptionType.Shared)
+                    .startMessageIdInclusive()
+                    .subscriptionInitialPosition(position)
+                    .ackTimeout(context.getConfig().getAckTimeoutSec(), TimeUnit.SECONDS)
+                    .receiverQueueSize(context.getConfig().getPulsarReceiveQueueSize())
+                    .subscribe();
+            LOGGER.info("create consumer for topics {}", topicNames);
+            return consumer;
+        } catch (Exception e) {
+            LOGGER.error(e.getMessage(), e);
+            return null;
+        }
+    }
+
+    @Override
+    public void ack(String msgOffset) throws Exception {
+        if (StringUtils.isBlank(msgOffset)) {
+            LOGGER.error("ack failed, msg offset should not be blank");
+            return;
+        }
+        if (Objects.isNull(currentConsumer)) {
+            LOGGER.error("ack failed, consumer is null");
+            return;
+        }
+        // if this ack belongs to current consumer
+        MessageId messageId = currentConsumer.getMessageId(msgOffset);
+        if (!Objects.isNull(messageId)) {
+            doAck(msgOffset, this.currentConsumer, messageId);
+            return;
+        }
+
+        // if this ack doesn't belong to current consumer, find in to be removed ones.
+        for (PulsarConsumer oldConsumer : toBeRemovedConsumers) {
+            MessageId id = oldConsumer.getMessageId(msgOffset);
+            if (Objects.isNull(id)) {
+                continue;
+            }
+            doAck(msgOffset, oldConsumer, id);
+            LOGGER.info("ack an old consumer message");
+            return;
+        }
+        context.getDefaultStateCounter().addAckFailTimes(1L);
+        LOGGER.error("in pulsar multi topic fetcher, messageId == null");
+    }
+
+    private void doAck(String msgOffset, PulsarConsumer consumer, MessageId messageId) {
+        if (!consumer.isConnected()) {
+            return;
+        }
+        InLongTopic topic = consumer.getTopic(msgOffset);
+        consumer.acknowledgeAsync(messageId)
+                .thenAccept(ctx -> ackSucc(msgOffset, topic, this.currentConsumer))
+                .exceptionally(exception -> {
+                    LOGGER.error("ack fail:{} {},error:{}",
+                            topic, msgOffset, exception.getMessage(), exception);
+                    context.getStateCounterByTopic(topic).addAckFailTimes(1L);
+                    return null;
+                });
+    }
+
+    private void ackSucc(String offset, InLongTopic topic, PulsarConsumer consumer) {
+        consumer.remove(offset);
+        context.getStateCounterByTopic(topic).addAckSuccTimes(1L);
+    }
+
+    @Override
+    public void pause() {
+        if (Objects.nonNull(currentConsumer)) {
+            currentConsumer.pause();
+        }
+    }
+
+    @Override
+    public void resume() {
+        if (Objects.nonNull(currentConsumer)) {
+            currentConsumer.resume();
+        }
+    }
+
+    @Override
+    public boolean close() {
+        mainLock.writeLock().lock();
+        try {
+            LOGGER.info("closed online topics {}", onlineTopics);
+            try {
+                if (currentConsumer != null) {
+                    currentConsumer.close();
+                }
+                if (fetchThread != null) {
+                    fetchThread.interrupt();
+                }
+            } catch (PulsarClientException e) {
+                LOGGER.warn(e.getMessage(), e);
+            }
+            toBeRemovedConsumers.stream()
+                    .filter(Objects::nonNull)
+                    .forEach(c -> {
+                        try {
+                            c.close();
+                        } catch (PulsarClientException e) {
+                            LOGGER.warn(e.getMessage(), e);
+                        }
+                    });
+            toBeRemovedConsumers.clear();
+            return true;
+        } finally {
+            this.closed = true;
+            mainLock.writeLock().unlock();
+        }
+    }
+
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
+    @Override
+    public void setStopConsume(boolean stopConsume) {
+        this.stopConsume = stopConsume;
+    }
+
+    @Override
+    public boolean isStopConsume() {
+        return stopConsume;
+    }
+
+    @Override
+    public List<InLongTopic> getTopics() {
+        return new ArrayList<>(onlineTopics.values());
+    }
+
+    @Override
+    public boolean updateTopics(List<InLongTopic> topics) {
+        if (needUpdate(topics)) {
+            return updateAll(topics);
+        }
+        LOGGER.info("no need to update multi topic fetcher");
+        return false;
+    }
+
+    public class Fetcher implements Runnable {
+
+        /**
+         * put the received msg to onFinished method
+         *
+         * @param messageRecords {@link List}
+         */
+        private void handleAndCallbackMsg(List<MessageRecord> messageRecords) {
+            long start = System.currentTimeMillis();
+            try {
+                context.getDefaultStateCounter().addCallbackTimes(1L);
+                context.getConfig().getCallback().onFinishedBatch(messageRecords);
+                context.getDefaultStateCounter()
+                        .addCallbackTimeCost(System.currentTimeMillis() - start).addCallbackDoneTimes(1L);
+            } catch (Exception e) {
+                context.getDefaultStateCounter().addCallbackErrorTimes(1L);
+                LOGGER.error("failed to callback {}", e.getMessage(), e);
+            }
+        }
+
+        private String getOffset(MessageId msgId) {
+            return Base64.getEncoder().encodeToString(msgId.toByteArray());
+        }
+
+        @Override
+        public void run() {
+            boolean hasPermit;
+            while (true) {
+                hasPermit = false;
+                try {
+                    if (context.getConfig().isStopConsume() || stopConsume) {
+                        TimeUnit.MILLISECONDS.sleep(50);
+                        continue;
+                    }
+
+                    if (sleepTime > 0) {
+                        TimeUnit.MILLISECONDS.sleep(sleepTime);
+                    }
+
+                    context.acquireRequestPermit();
+                    hasPermit = true;
+                    context.getDefaultStateCounter().addMsgCount(1L).addFetchTimes(1L);
+
+                    long startFetchTime = System.currentTimeMillis();
+                    Messages<byte[]> messages = currentConsumer.batchReceive();
+
+                    context.getDefaultStateCounter().addFetchTimeCost(System.currentTimeMillis() - startFetchTime);
+                    if (null != messages && messages.size() != 0) {
+                        List<MessageRecord> msgs = new ArrayList<>();
+                        for (Message<byte[]> msg : messages) {
+                            String topicName = msg.getTopicName();
+                            InLongTopic topic = onlineTopics.get(topicName);
+                            if (Objects.isNull(topic)) {
+                                LOGGER.error("got a message with topic {}, which is not subscribe", topicName);
+                                continue;
+                            }
+                            // if need seek
+                            if (msg.getPublishTime() < seeker.getSeekTime()) {
+                                seeker.seek();
+                                break;
+                            }
+                            String offsetKey = getOffset(msg.getMessageId());
+                            currentConsumer.put(offsetKey, topic, msg.getMessageId());
+
+                            //deserialize
+                            List<InLongMessage> inLongMessages = deserializer
+                                    .deserialize(context, topic, msg.getProperties(), msg.getData());
+                            // intercept
+                            inLongMessages = interceptor.intercept(inLongMessages);
+                            if (inLongMessages.isEmpty()) {
+                                ack(offsetKey);
+                                continue;
+                            }
+
+                            msgs.add(new MessageRecord(topic.getTopicKey(),
+                                    inLongMessages,
+                                    offsetKey, System.currentTimeMillis()));
+                            context.getStateCounterByTopic(topic).addConsumeSize(msg.getData().length);
+                        }
+                        context.getDefaultStateCounter().addMsgCount(msgs.size());
+                        handleAndCallbackMsg(msgs);
+                        sleepTime = 0L;
+                    } else {
+                        context.getDefaultStateCounter().addEmptyFetchTimes(1L);
+                        emptyFetchTimes++;
+                        if (emptyFetchTimes >= context.getConfig().getEmptyPollTimes()) {
+                            sleepTime = Math.min((sleepTime += context.getConfig().getEmptyPollSleepStepMs()),
+                                    context.getConfig().getMaxEmptyPollSleepMs());
+                            emptyFetchTimes = 0;
+                        }
+                    }
+                } catch (Exception e) {
+                    context.getDefaultStateCounter().addFetchErrorTimes(1L);
+                    LOGGER.error("failed to fetch msg: {}", e.getMessage(), e);

Review Comment:
   fixed in #5625, thx



##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/pulsar/PulsarMultiTopicsFetcher.java:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.pulsar;
+
+import com.google.common.base.Preconditions;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.Seeker;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.pulsar.client.api.Consumer;
+import org.apache.pulsar.client.api.Message;
+import org.apache.pulsar.client.api.MessageId;
+import org.apache.pulsar.client.api.Messages;
+import org.apache.pulsar.client.api.PulsarClient;
+import org.apache.pulsar.client.api.PulsarClientException;
+import org.apache.pulsar.client.api.Schema;
+import org.apache.pulsar.client.api.SubscriptionInitialPosition;
+import org.apache.pulsar.client.api.SubscriptionType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * MultiTopicsFetcher for pulsar.
+ *
+ */
+public class PulsarMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private PulsarConsumer currentConsumer;
+    private List<PulsarConsumer> toBeRemovedConsumers = new LinkedList<>();
+    private PulsarClient pulsarClient;
+
+    public PulsarMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            PulsarClient pulsarClient) {
+        super(topics, context, interceptor, deserializer);
+        this.pulsarClient = Preconditions.checkNotNull(pulsarClient);
+    }
+
+    @Override
+    public boolean init() {
+        Consumer<byte[]> newConsumer = createConsumer(onlineTopics.values());
+        if (Objects.isNull(newConsumer)) {
+            LOGGER.error("create new consumer is null");
+            return false;
+        }
+        this.currentConsumer = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        String threadName = String.format("sort_sdk_pulsar_multi_topic_fetch_thread_%d", this.hashCode());
+        this.fetchThread = new Thread(new PulsarMultiTopicsFetcher.Fetcher(), threadName);
+        this.fetchThread.start();
+        this.executor.scheduleWithFixedDelay(this::clearRemovedConsumerList,
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                context.getConfig().getCleanOldConsumerIntervalSec(),
+                TimeUnit.SECONDS);
+        return true;
+    }
+
+    private void clearRemovedConsumerList() {
+        long cur = System.currentTimeMillis();
+        List<PulsarConsumer> newList = new LinkedList<>();
+        toBeRemovedConsumers.forEach(consumer -> {
+            long diff = cur - consumer.stopTime;
+            if (diff > context.getConfig().getCleanOldConsumerIntervalSec() * 1000L || consumer.isEmpty()) {
+                try {
+                    consumer.close();
+                } catch (PulsarClientException e) {
+                    LOGGER.warn("exception in close old consumer {}", e.getMessage(), e);
+                }
+                return;
+            }
+            newList.add(consumer);
+        });
+        LOGGER.info("after clear old consumers, the old size is {}, current size is {}",
+                toBeRemovedConsumers.size(), newList.size());
+        this.toBeRemovedConsumers = newList;
+    }
+
+    private boolean updateAll(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topics is empty or null");
+            return false;
+        }
+        // stop old;
+        this.setStopConsume(true);
+        this.currentConsumer.pause();
+        // create new;
+        Consumer<byte[]> newConsumer = createConsumer(newTopics);
+        if (Objects.isNull(newConsumer)) {
+            currentConsumer.resume();
+            this.setStopConsume(false);
+            return false;
+        }
+        PulsarConsumer newConsumerWrapper = new PulsarConsumer(newConsumer);
+        InLongTopic firstTopic = newTopics.stream().findFirst().get();
+        final Seeker newSeeker = SeekerFactory.createPulsarSeeker(newConsumer, firstTopic);
+        // save
+        currentConsumer.setStopTime(System.currentTimeMillis());
+        toBeRemovedConsumers.add(currentConsumer);
+        // replace
+        this.currentConsumer = newConsumerWrapper;
+        this.seeker = newSeeker;
+        this.interceptor.configure(firstTopic);
+        this.onlineTopics = newTopics.stream().collect(Collectors.toMap(InLongTopic::getTopic, t -> t));
+        // resume
+        this.setStopConsume(false);
+        return true;
+    }
+
+    private Consumer<byte[]> createConsumer(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topic is empty or null");
+            return null;
+        }
+        try {
+            SubscriptionInitialPosition position = SubscriptionInitialPosition.Latest;
+            SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+            if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                    || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+                LOGGER.info("the subscription initial position is earliest!");
+                position = SubscriptionInitialPosition.Earliest;
+            }
+
+            List<String> topicNames = newTopics.stream()
+                    .map(InLongTopic::getTopic)
+                    .collect(Collectors.toList());
+            Consumer<byte[]> consumer = pulsarClient.newConsumer(Schema.BYTES)
+                    .topics(topicNames)
+                    .subscriptionName(context.getConfig().getSortTaskId())
+                    .subscriptionType(SubscriptionType.Shared)
+                    .startMessageIdInclusive()
+                    .subscriptionInitialPosition(position)
+                    .ackTimeout(context.getConfig().getAckTimeoutSec(), TimeUnit.SECONDS)
+                    .receiverQueueSize(context.getConfig().getPulsarReceiveQueueSize())
+                    .subscribe();
+            LOGGER.info("create consumer for topics {}", topicNames);
+            return consumer;
+        } catch (Exception e) {
+            LOGGER.error(e.getMessage(), e);
+            return null;
+        }
+    }
+
+    @Override
+    public void ack(String msgOffset) throws Exception {
+        if (StringUtils.isBlank(msgOffset)) {
+            LOGGER.error("ack failed, msg offset should not be blank");
+            return;
+        }
+        if (Objects.isNull(currentConsumer)) {
+            LOGGER.error("ack failed, consumer is null");
+            return;
+        }
+        // if this ack belongs to current consumer
+        MessageId messageId = currentConsumer.getMessageId(msgOffset);
+        if (!Objects.isNull(messageId)) {
+            doAck(msgOffset, this.currentConsumer, messageId);
+            return;
+        }
+
+        // if this ack doesn't belong to current consumer, find in to be removed ones.
+        for (PulsarConsumer oldConsumer : toBeRemovedConsumers) {
+            MessageId id = oldConsumer.getMessageId(msgOffset);
+            if (Objects.isNull(id)) {
+                continue;
+            }
+            doAck(msgOffset, oldConsumer, id);
+            LOGGER.info("ack an old consumer message");
+            return;
+        }
+        context.getDefaultStateCounter().addAckFailTimes(1L);
+        LOGGER.error("in pulsar multi topic fetcher, messageId == null");
+    }
+
+    private void doAck(String msgOffset, PulsarConsumer consumer, MessageId messageId) {
+        if (!consumer.isConnected()) {
+            return;
+        }
+        InLongTopic topic = consumer.getTopic(msgOffset);
+        consumer.acknowledgeAsync(messageId)
+                .thenAccept(ctx -> ackSucc(msgOffset, topic, this.currentConsumer))
+                .exceptionally(exception -> {
+                    LOGGER.error("ack fail:{} {},error:{}",
+                            topic, msgOffset, exception.getMessage(), exception);
+                    context.getStateCounterByTopic(topic).addAckFailTimes(1L);
+                    return null;
+                });
+    }
+
+    private void ackSucc(String offset, InLongTopic topic, PulsarConsumer consumer) {
+        consumer.remove(offset);
+        context.getStateCounterByTopic(topic).addAckSuccTimes(1L);
+    }
+
+    @Override
+    public void pause() {
+        if (Objects.nonNull(currentConsumer)) {
+            currentConsumer.pause();
+        }
+    }
+
+    @Override
+    public void resume() {
+        if (Objects.nonNull(currentConsumer)) {
+            currentConsumer.resume();
+        }
+    }
+
+    @Override
+    public boolean close() {
+        mainLock.writeLock().lock();
+        try {
+            LOGGER.info("closed online topics {}", onlineTopics);
+            try {
+                if (currentConsumer != null) {
+                    currentConsumer.close();
+                }
+                if (fetchThread != null) {
+                    fetchThread.interrupt();
+                }
+            } catch (PulsarClientException e) {
+                LOGGER.warn(e.getMessage(), e);
+            }
+            toBeRemovedConsumers.stream()
+                    .filter(Objects::nonNull)
+                    .forEach(c -> {
+                        try {
+                            c.close();
+                        } catch (PulsarClientException e) {
+                            LOGGER.warn(e.getMessage(), e);
+                        }
+                    });
+            toBeRemovedConsumers.clear();
+            return true;
+        } finally {
+            this.closed = true;
+            mainLock.writeLock().unlock();
+        }
+    }
+
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
+    @Override
+    public void setStopConsume(boolean stopConsume) {
+        this.stopConsume = stopConsume;
+    }
+
+    @Override
+    public boolean isStopConsume() {
+        return stopConsume;
+    }
+
+    @Override
+    public List<InLongTopic> getTopics() {
+        return new ArrayList<>(onlineTopics.values());
+    }
+
+    @Override
+    public boolean updateTopics(List<InLongTopic> topics) {
+        if (needUpdate(topics)) {
+            return updateAll(topics);
+        }
+        LOGGER.info("no need to update multi topic fetcher");
+        return false;
+    }
+
+    public class Fetcher implements Runnable {
+
+        /**
+         * put the received msg to onFinished method
+         *
+         * @param messageRecords {@link List}
+         */
+        private void handleAndCallbackMsg(List<MessageRecord> messageRecords) {
+            long start = System.currentTimeMillis();
+            try {
+                context.getDefaultStateCounter().addCallbackTimes(1L);
+                context.getConfig().getCallback().onFinishedBatch(messageRecords);
+                context.getDefaultStateCounter()
+                        .addCallbackTimeCost(System.currentTimeMillis() - start).addCallbackDoneTimes(1L);
+            } catch (Exception e) {
+                context.getDefaultStateCounter().addCallbackErrorTimes(1L);
+                LOGGER.error("failed to callback {}", e.getMessage(), e);

Review Comment:
   fixed in #5625, thx



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [inlong] dockerzhang merged pull request #5659: [INLONG-5623][SDK] Support multi-topic fetcher for Kafka

Posted by GitBox <gi...@apache.org>.
dockerzhang merged PR #5659:
URL: https://github.com/apache/inlong/pull/5659


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [inlong] vernedeng commented on a diff in pull request #5659: [INLONG-5623][SDK] Support kafka multi topics fetcher

Posted by GitBox <gi...@apache.org>.
vernedeng commented on code in PR #5659:
URL: https://github.com/apache/inlong/pull/5659#discussion_r953469717


##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/kafka/KafkaMultiTopicsFetcher.java:
##########
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.kafka;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.inlong.sdk.sort.fetcher.pulsar.PulsarMultiTopicsFetcher;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.RangeAssignor;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * Kafka multi topics fetcher
+ */
+public class KafkaMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private final ConcurrentHashMap<TopicPartition, OffsetAndMetadata> commitOffsetMap;
+    private final ConcurrentHashMap<TopicPartition, ConcurrentSkipListMap<Long, Boolean>> ackOffsetMap;
+    private final String bootstrapServers;
+    private ConsumerRebalanceListener listener;
+    private KafkaConsumer<byte[], byte[]> consumer;
+
+    public KafkaMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            String bootstrapServers) {
+        super(topics, context, interceptor, deserializer);
+        this.bootstrapServers = bootstrapServers;
+        this.commitOffsetMap = new ConcurrentHashMap<>();
+        this.ackOffsetMap = new ConcurrentHashMap<>();
+    }
+
+    @Override
+    public boolean init() {
+        try {
+            this.consumer = createKafkaConsumer();
+            InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+            this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+            this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                    commitOffsetMap);
+            consumer.subscribe(onlineTopics.keySet(), listener);
+            return true;
+        } catch (Throwable t) {
+            LOGGER.error("failed to init kafka consumer: {}", t.getMessage(), t);
+            return false;
+        }
+    }
+
+    private KafkaConsumer<byte[], byte[]> createKafkaConsumer() {
+        Properties properties = new Properties();
+        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
+        properties.put(ConsumerConfig.GROUP_ID_CONFIG, context.getConfig().getSortTaskId());
+        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.RECEIVE_BUFFER_CONFIG,
+                context.getConfig().getKafkaSocketRecvBufferSize());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+        if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
+        } else if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        } else {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none");
+        }
+        properties.put(ConsumerConfig.FETCH_MAX_BYTES_CONFIG,
+                context.getConfig().getKafkaFetchSizeBytes());
+        properties.put(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG,
+                context.getConfig().getKafkaFetchWaitMs());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        properties.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,
+                RangeAssignor.class.getName());
+        properties.put(ConsumerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG, 120000L);
+        LOGGER.info("start to create kafka consumer:{}", properties);
+        return new KafkaConsumer<>(properties);
+    }
+
+    @Override
+    public void ack(String msgOffset) throws Exception {
+        // the format of multi topic kafka fetcher msg offset is topic:partitionId:offset, such as topic1:20:1746839
+        String[] offset = msgOffset.split(":");
+        if (offset.length != 3) {
+            throw new Exception("offset is illegal, the correct format is topic:partitionId:offset, "
+                    + "the error offset is:" + msgOffset);
+        }
+
+        // parse topic partition offset
+        TopicPartition topicPartition = new TopicPartition(offset[0], Integer.parseInt(offset[1]));
+        long ackOffset = Long.parseLong(offset[2]);
+
+        // ack
+        if (!ackOffsetMap.containsKey(topicPartition) || !ackOffsetMap.get(topicPartition).containsKey(ackOffset)) {
+            LOGGER.warn("did not find offsetMap or ack offset of {}, offset {}, just ignore it",
+                    topicPartition, ackOffset);
+            return;
+        }
+
+        // mark this offset has been ack.
+        ConcurrentSkipListMap<Long, Boolean> tpOffsetMap = ackOffsetMap.get(topicPartition);
+        // to prevent race condition in AckOffsetOnRebalance::onPartitionsRevoked
+        if (Objects.nonNull(tpOffsetMap)) {
+            tpOffsetMap.put(ackOffset, true);
+        }
+    }
+
+    @Override
+    public void pause() {
+        consumer.pause(consumer.assignment());
+    }
+
+    @Override
+    public void resume() {
+        consumer.resume(consumer.assignment());
+    }
+
+    @Override
+    public boolean close() {
+        this.closed = true;
+        try {
+            if (fetchThread != null) {
+                fetchThread.interrupt();
+            }
+            if (consumer != null) {
+                prepareCommit();
+                consumer.commitSync(commitOffsetMap);
+                consumer.close();
+            }
+            commitOffsetMap.clear();
+        } catch (Throwable t) {
+            LOGGER.warn(t.getMessage(), t);
+        }
+        LOGGER.info("closed kafka multi topic fetcher");
+        return true;
+    }
+
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
+    @Override
+    public void setStopConsume(boolean stopConsume) {
+        this.stopConsume = stopConsume;
+    }
+
+    @Override
+    public boolean isStopConsume() {
+        return stopConsume;
+    }
+
+    @Override
+    public List<InLongTopic> getTopics() {
+        return new ArrayList<>(onlineTopics.values());
+    }
+
+    @Override
+    public boolean updateTopics(List<InLongTopic> topics) {
+        if (needUpdate(topics)) {
+            return updateAll(topics);
+        }
+        LOGGER.info("no need to update topics");
+        return false;
+    }
+
+    private boolean updateAll(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topics is empty or null");
+            return false;
+        }
+
+        // stop
+        this.setStopConsume(true);
+
+        // update
+        this.onlineTopics = newTopics.stream().collect(Collectors.toMap(InLongTopic::getTopic, t -> t));
+        InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+        this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                commitOffsetMap, ackOffsetMap);
+        Optional.ofNullable(interceptor).ifPresent(i -> i.configure(topic));
+
+        // subscribe new
+        consumer.subscribe(onlineTopics.keySet(), listener);
+
+        // resume
+        this.setStopConsume(false);
+        return true;
+    }
+
+    private void prepareCommit() {
+        ackOffsetMap.forEach((topicPartition, tpOffsetMap) -> {
+            synchronized (tpOffsetMap) {
+                // get the remove list
+                List<Long> removeOffsets = new ArrayList<>();
+                long commitOffset = -1;
+                for (Long ackOffset : tpOffsetMap.keySet()) {
+                    if (!tpOffsetMap.get(ackOffset)) {
+                        break;
+                    }
+                    removeOffsets.add(ackOffset);
+                    commitOffset = ackOffset;
+                }
+                // the first haven't ack, do nothing
+                if (commitOffset == -1) {
+                    return;
+                }
+
+                // remove offset and commit offset
+                removeOffsets.forEach(tpOffsetMap::remove);
+                commitOffsetMap.put(topicPartition, new OffsetAndMetadata(commitOffset));
+            }
+        });
+    }
+
+    public class Fetcher implements Runnable {
+
+        private void commitKafkaOffset() {
+            prepareCommit();
+            if (consumer != null) {
+                try {
+                    consumer.commitAsync(commitOffsetMap, null);
+                    commitOffsetMap.clear();
+                } catch (Exception e) {
+                    LOGGER.error("commit kafka offset failed: {}", e.getMessage(), e);
+                }
+            }
+        }
+
+        /**
+         * put the received msg to onFinished method
+         *
+         * @param messageRecords {@link List < MessageRecord >}
+         */
+        private void handleAndCallbackMsg(List<MessageRecord> messageRecords) {
+            long start = System.currentTimeMillis();
+            try {
+                context.getDefaultStateCounter().addCallbackTimes(1);
+                context.getConfig().getCallback().onFinishedBatch(messageRecords);
+                context.getDefaultStateCounter()
+                        .addCallbackTimeCost(System.currentTimeMillis() - start)
+                        .addCallbackDoneTimes(1);
+            } catch (Exception e) {
+                context.getDefaultStateCounter().addCallbackErrorTimes(1);
+                LOGGER.error("failed to callback: {}", e.getMessage(), e);
+            }
+        }
+
+        private String getOffset(String topic, int partitionId, long offset) {
+            TopicPartition topicPartition = new TopicPartition(topic, partitionId);
+            ackOffsetMap.computeIfAbsent(topicPartition, k -> new ConcurrentSkipListMap<>()).put(offset, false);
+            return topic + ":" + partitionId + ":" + offset;
+        }
+
+        private Map<String, String> getMsgHeaders(Headers headers) {
+            Map<String, String> headerMap = new HashMap<>();
+            for (Header header : headers) {
+                headerMap.put(header.key(), new String(header.value()));
+            }
+            return headerMap;
+        }
+
+        @Override
+        public void run() {
+            boolean hasPermit;
+            while (true) {
+                hasPermit = false;
+                try {
+                    if (context.getConfig().isStopConsume() || stopConsume) {
+                        TimeUnit.MILLISECONDS.sleep(50);
+                        continue;
+                    }
+
+                    if (sleepTime > 0) {
+                        TimeUnit.MILLISECONDS.sleep(sleepTime);
+                    }
+
+                    context.acquireRequestPermit();
+                    hasPermit = true;
+                    // fetch from kafka
+                    fetchFromKafka();
+                    // commit
+                    commitKafkaOffset();
+                } catch (Exception e) {
+                    context.getDefaultStateCounter().addFetchErrorTimes(1);
+                    LOGGER.error(e.getMessage(), e);

Review Comment:
   fixed, thx



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [inlong] vernedeng commented on a diff in pull request #5659: [INLONG-5623][SDK] Support kafka multi topics fetcher

Posted by GitBox <gi...@apache.org>.
vernedeng commented on code in PR #5659:
URL: https://github.com/apache/inlong/pull/5659#discussion_r953469404


##########
inlong-sdk/sort-sdk/src/main/java/org/apache/inlong/sdk/sort/fetcher/kafka/KafkaMultiTopicsFetcher.java:
##########
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.sort.fetcher.kafka;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.inlong.sdk.sort.api.ClientContext;
+import org.apache.inlong.sdk.sort.api.Deserializer;
+import org.apache.inlong.sdk.sort.api.Interceptor;
+import org.apache.inlong.sdk.sort.api.MultiTopicsFetcher;
+import org.apache.inlong.sdk.sort.api.SeekerFactory;
+import org.apache.inlong.sdk.sort.api.SortClientConfig;
+import org.apache.inlong.sdk.sort.entity.InLongMessage;
+import org.apache.inlong.sdk.sort.entity.InLongTopic;
+import org.apache.inlong.sdk.sort.entity.MessageRecord;
+import org.apache.inlong.sdk.sort.fetcher.pulsar.PulsarMultiTopicsFetcher;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.consumer.RangeAssignor;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * Kafka multi topics fetcher
+ */
+public class KafkaMultiTopicsFetcher extends MultiTopicsFetcher {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarMultiTopicsFetcher.class);
+    private final ConcurrentHashMap<TopicPartition, OffsetAndMetadata> commitOffsetMap;
+    private final ConcurrentHashMap<TopicPartition, ConcurrentSkipListMap<Long, Boolean>> ackOffsetMap;
+    private final String bootstrapServers;
+    private ConsumerRebalanceListener listener;
+    private KafkaConsumer<byte[], byte[]> consumer;
+
+    public KafkaMultiTopicsFetcher(
+            List<InLongTopic> topics,
+            ClientContext context,
+            Interceptor interceptor,
+            Deserializer deserializer,
+            String bootstrapServers) {
+        super(topics, context, interceptor, deserializer);
+        this.bootstrapServers = bootstrapServers;
+        this.commitOffsetMap = new ConcurrentHashMap<>();
+        this.ackOffsetMap = new ConcurrentHashMap<>();
+    }
+
+    @Override
+    public boolean init() {
+        try {
+            this.consumer = createKafkaConsumer();
+            InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+            this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+            this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                    commitOffsetMap);
+            consumer.subscribe(onlineTopics.keySet(), listener);
+            return true;
+        } catch (Throwable t) {
+            LOGGER.error("failed to init kafka consumer: {}", t.getMessage(), t);
+            return false;
+        }
+    }
+
+    private KafkaConsumer<byte[], byte[]> createKafkaConsumer() {
+        Properties properties = new Properties();
+        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
+        properties.put(ConsumerConfig.GROUP_ID_CONFIG, context.getConfig().getSortTaskId());
+        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
+                ByteArrayDeserializer.class.getName());
+        properties.put(ConsumerConfig.RECEIVE_BUFFER_CONFIG,
+                context.getConfig().getKafkaSocketRecvBufferSize());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        SortClientConfig.ConsumeStrategy offsetResetStrategy = context.getConfig().getOffsetResetStrategy();
+        if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.lastest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
+        } else if (offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest
+                || offsetResetStrategy == SortClientConfig.ConsumeStrategy.earliest_absolutely) {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        } else {
+            properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none");
+        }
+        properties.put(ConsumerConfig.FETCH_MAX_BYTES_CONFIG,
+                context.getConfig().getKafkaFetchSizeBytes());
+        properties.put(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG,
+                context.getConfig().getKafkaFetchWaitMs());
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
+        properties.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,
+                RangeAssignor.class.getName());
+        properties.put(ConsumerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG, 120000L);
+        LOGGER.info("start to create kafka consumer:{}", properties);
+        return new KafkaConsumer<>(properties);
+    }
+
+    @Override
+    public void ack(String msgOffset) throws Exception {
+        // the format of multi topic kafka fetcher msg offset is topic:partitionId:offset, such as topic1:20:1746839
+        String[] offset = msgOffset.split(":");
+        if (offset.length != 3) {
+            throw new Exception("offset is illegal, the correct format is topic:partitionId:offset, "
+                    + "the error offset is:" + msgOffset);
+        }
+
+        // parse topic partition offset
+        TopicPartition topicPartition = new TopicPartition(offset[0], Integer.parseInt(offset[1]));
+        long ackOffset = Long.parseLong(offset[2]);
+
+        // ack
+        if (!ackOffsetMap.containsKey(topicPartition) || !ackOffsetMap.get(topicPartition).containsKey(ackOffset)) {
+            LOGGER.warn("did not find offsetMap or ack offset of {}, offset {}, just ignore it",
+                    topicPartition, ackOffset);
+            return;
+        }
+
+        // mark this offset has been ack.
+        ConcurrentSkipListMap<Long, Boolean> tpOffsetMap = ackOffsetMap.get(topicPartition);
+        // to prevent race condition in AckOffsetOnRebalance::onPartitionsRevoked
+        if (Objects.nonNull(tpOffsetMap)) {
+            tpOffsetMap.put(ackOffset, true);
+        }
+    }
+
+    @Override
+    public void pause() {
+        consumer.pause(consumer.assignment());
+    }
+
+    @Override
+    public void resume() {
+        consumer.resume(consumer.assignment());
+    }
+
+    @Override
+    public boolean close() {
+        this.closed = true;
+        try {
+            if (fetchThread != null) {
+                fetchThread.interrupt();
+            }
+            if (consumer != null) {
+                prepareCommit();
+                consumer.commitSync(commitOffsetMap);
+                consumer.close();
+            }
+            commitOffsetMap.clear();
+        } catch (Throwable t) {
+            LOGGER.warn(t.getMessage(), t);
+        }
+        LOGGER.info("closed kafka multi topic fetcher");
+        return true;
+    }
+
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
+    @Override
+    public void setStopConsume(boolean stopConsume) {
+        this.stopConsume = stopConsume;
+    }
+
+    @Override
+    public boolean isStopConsume() {
+        return stopConsume;
+    }
+
+    @Override
+    public List<InLongTopic> getTopics() {
+        return new ArrayList<>(onlineTopics.values());
+    }
+
+    @Override
+    public boolean updateTopics(List<InLongTopic> topics) {
+        if (needUpdate(topics)) {
+            return updateAll(topics);
+        }
+        LOGGER.info("no need to update topics");
+        return false;
+    }
+
+    private boolean updateAll(Collection<InLongTopic> newTopics) {
+        if (CollectionUtils.isEmpty(newTopics)) {
+            LOGGER.error("new topics is empty or null");
+            return false;
+        }
+
+        // stop
+        this.setStopConsume(true);
+
+        // update
+        this.onlineTopics = newTopics.stream().collect(Collectors.toMap(InLongTopic::getTopic, t -> t));
+        InLongTopic topic = onlineTopics.values().stream().findFirst().get();
+        this.seeker = SeekerFactory.createKafkaSeeker(consumer, topic);
+        this.listener = new AckOffsetOnRebalance(topic.getInLongCluster().getClusterId(), seeker,
+                commitOffsetMap, ackOffsetMap);
+        Optional.ofNullable(interceptor).ifPresent(i -> i.configure(topic));
+
+        // subscribe new
+        consumer.subscribe(onlineTopics.keySet(), listener);
+
+        // resume
+        this.setStopConsume(false);
+        return true;
+    }
+
+    private void prepareCommit() {
+        ackOffsetMap.forEach((topicPartition, tpOffsetMap) -> {
+            synchronized (tpOffsetMap) {
+                // get the remove list
+                List<Long> removeOffsets = new ArrayList<>();
+                long commitOffset = -1;
+                for (Long ackOffset : tpOffsetMap.keySet()) {
+                    if (!tpOffsetMap.get(ackOffset)) {
+                        break;
+                    }
+                    removeOffsets.add(ackOffset);
+                    commitOffset = ackOffset;
+                }
+                // the first haven't ack, do nothing
+                if (commitOffset == -1) {
+                    return;
+                }
+
+                // remove offset and commit offset
+                removeOffsets.forEach(tpOffsetMap::remove);
+                commitOffsetMap.put(topicPartition, new OffsetAndMetadata(commitOffset));
+            }
+        });
+    }
+
+    public class Fetcher implements Runnable {
+
+        private void commitKafkaOffset() {
+            prepareCommit();
+            if (consumer != null) {
+                try {
+                    consumer.commitAsync(commitOffsetMap, null);
+                    commitOffsetMap.clear();
+                } catch (Exception e) {
+                    LOGGER.error("commit kafka offset failed: {}", e.getMessage(), e);
+                }
+            }
+        }
+
+        /**
+         * put the received msg to onFinished method
+         *
+         * @param messageRecords {@link List < MessageRecord >}
+         */
+        private void handleAndCallbackMsg(List<MessageRecord> messageRecords) {
+            long start = System.currentTimeMillis();
+            try {
+                context.getDefaultStateCounter().addCallbackTimes(1);
+                context.getConfig().getCallback().onFinishedBatch(messageRecords);
+                context.getDefaultStateCounter()
+                        .addCallbackTimeCost(System.currentTimeMillis() - start)
+                        .addCallbackDoneTimes(1);
+            } catch (Exception e) {
+                context.getDefaultStateCounter().addCallbackErrorTimes(1);
+                LOGGER.error("failed to callback: {}", e.getMessage(), e);

Review Comment:
   fixed, thx



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org