You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@inlong.apache.org by GitBox <gi...@apache.org> on 2022/02/26 03:34:37 UTC

[GitHub] [incubator-inlong] zk1510 commented on a change in pull request #2725: [INLONG-2666][Agent] Agent supports collecting the data from Kafka

zk1510 commented on a change in pull request #2725:
URL: https://github.com/apache/incubator-inlong/pull/2725#discussion_r815261339



##########
File path: inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/reader/KafkaReader.java
##########
@@ -0,0 +1,296 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.agent.plugin.sources.reader;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.inlong.agent.conf.JobProfile;
+import org.apache.inlong.agent.message.DefaultMessage;
+import org.apache.inlong.agent.metrics.audit.AuditUtils;
+import org.apache.inlong.agent.plugin.Message;
+import org.apache.inlong.agent.plugin.Reader;
+import org.apache.inlong.agent.plugin.Validator;
+import org.apache.inlong.agent.plugin.metrics.PluginJmxMetric;
+import org.apache.inlong.agent.plugin.metrics.PluginMetric;
+import org.apache.inlong.agent.plugin.metrics.PluginPrometheusMetric;
+import org.apache.inlong.agent.plugin.validator.PatternValidator;
+import org.apache.inlong.agent.utils.AgentUtils;
+import org.apache.inlong.agent.utils.ConfigUtil;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.common.TopicPartition;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+import static org.apache.inlong.agent.constant.CommonConstants.DEFAULT_PROXY_INLONG_GROUP_ID;
+import static org.apache.inlong.agent.constant.CommonConstants.DEFAULT_PROXY_INLONG_STREAM_ID;
+import static org.apache.inlong.agent.constant.CommonConstants.PROXY_INLONG_GROUP_ID;
+import static org.apache.inlong.agent.constant.CommonConstants.PROXY_INLONG_STREAM_ID;
+import static org.apache.inlong.agent.constant.JobConstants.JOB_KAFKA_OFFSET;
+
+public class KafkaReader<K, V> implements Reader {
+    private static final Logger LOGGER = LoggerFactory.getLogger(KafkaReader.class);
+
+    KafkaConsumer<K, V> consumer;
+    private Iterator<ConsumerRecord<K, V>> iterator;
+    private List<Validator> validators = new ArrayList<>();
+    public static final int NEVER_STOP_SIGN = -1;
+    private long timeout;
+    private long waitTimeout = 1000;
+    private long lastTime = 0;
+    // metric
+    private static final String KAFKA_READER_TAG_NAME = "AgentKafkaMetric";
+    private final PluginMetric kafkaMetric;
+    //total readRecords
+    private static AtomicLong currentTotalReadRecords = new AtomicLong(0);
+
+    private static AtomicLong lastTotalReadRecords = new AtomicLong(0);
+    // total readBytes
+    private static AtomicLong currentTotalReadBytes = new AtomicLong(0);
+    private static AtomicLong lastTotalReadBytes = new AtomicLong(0);
+    long lastTimestamp;
+    // bps: records/s
+    long recordSpeed;
+    // tps: bytes/s
+    long byteSpeed;
+    // sleepTime
+    long flowControlInterval;
+    private String inlongGroupId;
+    private String inlongStreamId;
+    private String snapshot;
+    private static final String KAFKA_SOURCE_READ_RECORD_SPEED = "job.kafkajob.record.speed.limit";
+    private static final String KAFKA_SOURCE_READ_BYTE_SPEED = "job.kafkajob.byte.speed.limit";
+    private static final String KAFKA_SOURCE_READ_MIN_INTERVAL = "kafka.min.interval.limit";
+    private static final String JOB_KAFKAJOB_READ_TIMEOUT = "job.kafkajob.read.timeout";
+
+    /**
+     * init attribute
+     * @param consumer
+     * @param paraMap
+     */
+    public KafkaReader(KafkaConsumer<K, V> consumer,Map<String,String> paraMap) {
+        this.consumer = consumer;
+        // metrics total readRecords
+        if (ConfigUtil.isPrometheusEnabled()) {
+            kafkaMetric = new PluginPrometheusMetric(AgentUtils.getUniqId(
+                    KAFKA_READER_TAG_NAME, currentTotalReadRecords.incrementAndGet()));
+        } else {
+            kafkaMetric = new PluginJmxMetric(AgentUtils.getUniqId(
+                    KAFKA_READER_TAG_NAME, currentTotalReadRecords.incrementAndGet()));
+        }
+
+        this.recordSpeed = Long.valueOf(paraMap.getOrDefault(KAFKA_SOURCE_READ_RECORD_SPEED,"10000"));
+        this.byteSpeed = Long.valueOf(paraMap.getOrDefault(KAFKA_SOURCE_READ_BYTE_SPEED,String.valueOf(1024 * 1024)));
+        this.flowControlInterval = Long.valueOf(paraMap.getOrDefault(KAFKA_SOURCE_READ_MIN_INTERVAL,"1000"));
+        this.lastTimestamp = System.currentTimeMillis();
+
+        LOGGER.info("KAFKA_SOURCE_READ_RECORD_SPEED = {}", this.recordSpeed);
+        LOGGER.info("KAFKA_SOURCE_READ_BYTE_SPEED = {}", this.byteSpeed);
+    }
+
+    @Override
+    public Message read() {
+
+        if (iterator != null && iterator.hasNext()) {
+            ConsumerRecord<K, V> record = iterator.next();
+            // body
+            String recordValue = record.value().toString();
+            if (validateMessage(recordValue)) {
+                AuditUtils.add(AuditUtils.AUDIT_ID_AGENT_READ_SUCCESS,
+                        inlongGroupId, inlongStreamId, System.currentTimeMillis());
+                // header
+                Map<String,String> headerMap = new HashMap<>();
+                headerMap.put("record.offset", String.valueOf(record.offset()));
+                headerMap.put("record.key", String.valueOf(record.key()));
+                // control speed
+                kafkaMetric.incReadNum();
+                //commit offset
+                consumer.commitAsync();
+                //commit succeed,then record current offset
+                snapshot = String.valueOf(record.offset());
+                DefaultMessage message = new DefaultMessage(recordValue.getBytes(StandardCharsets.UTF_8), headerMap);
+                recordReadLimit(1L, message.getBody().length);
+                return message;
+            }
+        }
+        AgentUtils.silenceSleepInMs(waitTimeout);
+
+        return null;
+    }
+
+    @Override
+    public boolean isFinished() {
+        if (iterator == null) {
+            //fetch data
+            fetchData(5000);
+            return false;
+        }
+        if (iterator.hasNext()) {
+            lastTime = 0;
+            return false;
+        }
+        //fetch data
+        boolean fetchDataSuccess = fetchData(5000);
+        if (fetchDataSuccess && iterator.hasNext()) {
+            lastTime = 0;
+            return false;
+        } else {
+            if (lastTime == 0) {
+                lastTime = System.currentTimeMillis();
+            }
+            if (timeout == NEVER_STOP_SIGN) {
+                return false;
+            }
+            return System.currentTimeMillis() - lastTime > timeout;
+        }
+    }
+
+    @Override
+    public String getReadSource() {
+        Set<TopicPartition> assignment = consumer.assignment();
+        //consumer.
+        Iterator<TopicPartition> iterator = assignment.iterator();
+        while (iterator.hasNext()) {
+            TopicPartition topicPartition = iterator.next();
+            return topicPartition.topic() + "_" + topicPartition.partition();

Review comment:
       consumer-> one topic->one partition




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org