You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@inlong.apache.org by GitBox <gi...@apache.org> on 2022/02/25 11:09:25 UTC

[GitHub] [incubator-inlong] healchow commented on a change in pull request #2725: [INLONG-2666][Agent] Agent supports collecting the data from Kafka

healchow commented on a change in pull request #2725:
URL: https://github.com/apache/incubator-inlong/pull/2725#discussion_r814677031



##########
File path: inlong-agent/agent-plugins/pom.xml
##########
@@ -126,5 +126,17 @@
             <artifactId>agent-common</artifactId>
             <version>${project.version}</version>
         </dependency>
+
+        <dependency>
+            <groupId>org.apache.kafka</groupId>
+            <artifactId>kafka_${flink.scala.binary.version}</artifactId>
+            <version>${kafka.version}</version>
+        </dependency>
+
+        <dependency>

Review comment:
       Could u use `gson` or `jackson` instead of `fastjson`?

##########
File path: inlong-agent/agent-plugins/src/test/java/org/apache/inlong/agent/plugin/sources/TestKafkaReader.java
##########
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.agent.plugin.sources;
+
+import org.apache.inlong.agent.conf.JobProfile;
+import org.apache.inlong.agent.plugin.Message;
+import org.apache.inlong.agent.plugin.Reader;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.List;
+
+public class TestKafkaReader {
+    private static final Logger LOGGER = LoggerFactory.getLogger(TestKafkaReader.class);
+
+    @Test
+    public void testKafkaReader() {
+        KafkaSource kafkaSource = new KafkaSource();
+        JobProfile conf = JobProfile.parseJsonStr("{}");
+        conf.set("job.kafkajob.topic","test2");
+        conf.set("job.kafkajob.bootstrap.servers","10.91.78.107:9092");
+        conf.set("job.kafkajob.group.id","test_group1");
+//        conf.set("job.kafkajob.record.speed.limit","1");

Review comment:
       Please remove those unused codes.

##########
File path: inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/constant/JobConstants.java
##########
@@ -51,27 +51,27 @@
     public static final String JOB_DIR_FILTER_PATH = "job.filejob.dir.path";
 
     //Binlog job
-    private static final String JOB_DATABASE_USER = "job.binlogjob.user";
-    private static final String JOB_DATABASE_PASSWORD = "job.binlogjob.password";
-    private static final String JOB_DATABASE_HOSTNAME = "job.binlogjob.hostname";
-    private static final String JOB_DATABASE_WHITELIST = "job.binlogjob.tableWhiteList";
-    private static final String JOB_DATABASE_SERVER_TIME_ZONE = "job.binlogjob.database.serverTimezone";
-    private static final String JOB_DATABASE_STORE_OFFSET_INTERVAL_MS = "offset.binlogjob.offset.flush.interval.ms";
-    private static final String JOB_DATABASE_STORE_HISTORY_FILENAME = "job.binlogjob.database.history.file.filename";
-    private static final String JOB_DATABASE_SNAPSHOT_MODE = "job.binlogjob.database.snapshot.mode";
-    private static final  String JOB_DATABASE_OFFSET = "job.binlogjob.database.offset";
+    public static final String JOB_DATABASE_USER = "job.binlogjob.user";
+    public static final String JOB_DATABASE_PASSWORD = "job.binlogjob.password";
+    public static final String JOB_DATABASE_HOSTNAME = "job.binlogjob.hostname";
+    public static final String JOB_DATABASE_WHITELIST = "job.binlogjob.tableWhiteList";
+    public static final String JOB_DATABASE_SERVER_TIME_ZONE = "job.binlogjob.database.serverTimezone";
+    public static final String JOB_DATABASE_STORE_OFFSET_INTERVAL_MS = "offset.binlogjob.offset.flush.interval.ms";
+    public static final String JOB_DATABASE_STORE_HISTORY_FILENAME = "job.binlogjob.database.history.file.filename";
+    public static final String JOB_DATABASE_SNAPSHOT_MODE = "job.binlogjob.database.snapshot.mode";
+    public static final  String JOB_DATABASE_OFFSET = "job.binlogjob.database.offset";
 
     //Kafka job
-    private static final  String SOURCE_KAFKA_TOPIC = "job.kafkajob.topic";
-    private static final  String SOURCE_KAFKA_KEY_DESERIALIZER = "job.kafkajob.key.deserializer";
-    private static final  String SOURCE_KAFKA_VALUE_DESERIALIZER = "job.kafkajob.value.Deserializer";
-    private static final  String SOURCE_KAFKA_BOOTSTRAP_SERVERS = "job.kafkajob.bootstrap.servers";
-    private static final  String SOURCE_KAFKA_GROUP_ID = "job.kafkajob.group.Id";
-    private static final  String SOURCE_KAFKA_RECORD_SPEED = "job.kafkajob.record.speed";
-    private static final  String SOURCE_KAFKA_BYTE_SPEED_LIMIT = "job.kafkajob.byte.speed.limit";
-    private static final  String SOURCE_KAFKA_MIN_INTERVAL = "job.kafkajob.min.interval";
-    private static final  String SOURCE_KAFKA_OFFSET = "job.kafkajob.offset";
-    private static final  String SOURCE_KAFKA_READ_TIMEOUT = "job.kafkajob.read.timeout";
+    public static final  String JOB_KAFKA_TOPIC = "job.kafkajob.topic";

Review comment:
       It is recommended to use all lowercase or all uppercase.

##########
File path: inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/KafkaSource.java
##########
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.agent.plugin.sources;
+
+import com.alibaba.fastjson.JSON;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.inlong.agent.conf.JobProfile;
+import org.apache.inlong.agent.plugin.Reader;
+import org.apache.inlong.agent.plugin.Source;
+import org.apache.inlong.agent.plugin.metrics.SourceJmxMetric;
+import org.apache.inlong.agent.plugin.metrics.SourceMetrics;
+import org.apache.inlong.agent.plugin.metrics.SourcePrometheusMetrics;
+import org.apache.inlong.agent.plugin.sources.reader.KafkaReader;
+import org.apache.inlong.agent.utils.ConfigUtil;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.common.PartitionInfo;
+import org.apache.kafka.common.TopicPartition;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import static org.apache.inlong.agent.constant.JobConstants.DEFAULT_JOB_LINE_FILTER;
+import static org.apache.inlong.agent.constant.JobConstants.JOB_KAFKA_OFFSET;
+import static org.apache.inlong.agent.constant.JobConstants.JOB_KAFKA_PARTITION_OFFSET_DELIMITER;
+import static org.apache.inlong.agent.constant.JobConstants.JOB_LINE_FILTER_PATTERN;
+import static org.apache.inlong.agent.constant.JobConstants.JOB_OFFSET_DELIMITER;
+
+public class KafkaSource implements Source {
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(KafkaSource.class);
+
+    private static final String KAFKA_SOURCE_TAG_NAME = "AgentKafkaSourceMetric";
+    private static final String JOB_KAFKAJOB_PARAM_PREFIX = "job.kafkajob.";
+    private static final String JOB_KAFKAJOB_TOPIC = "job.kafkajob.topic";
+    private static final String JOB_KAFKAJOB_BOOTSTRAP_SERVERS = "job.kafkajob.bootstrap.servers";
+    private static final String JOB_KAFKAJOB_GROUP_ID = "job.kafkajob.group.id";
+    private static final String JOB_KAFKAJOB_WAIT_TIMEOUT = "job.kafkajob.wait.timeout";
+    //private static final String JOB_KAFKAJOB_PARTITION_OFFSET = "job.kafkajob.topic.partition.offset";
+    private static final String KAFKA_COMMIT_AUTO = "enable.auto.commit";
+    private static final String KAFKA_DESERIALIZER_METHOD = "org.apache.kafka.common.serialization.StringDeserializer";
+    private static final String KAFKA_KEY_DESERIALIZER = "key.deserializer";
+    private static final String KAFKA_VALUE_DESERIALIZER = "value.deserializer";
+
+    private final SourceMetrics sourceMetrics;
+
+    public KafkaSource() {
+        if (ConfigUtil.isPrometheusEnabled()) {
+            this.sourceMetrics = new SourcePrometheusMetrics(KAFKA_SOURCE_TAG_NAME);
+        } else {
+            this.sourceMetrics = new SourceJmxMetric(KAFKA_SOURCE_TAG_NAME);
+        }
+
+    }
+
+    @Override
+    public List<Reader> split(JobProfile conf) {
+        List<Reader> result = new ArrayList<>();
+        String filterPattern = conf.get(JOB_LINE_FILTER_PATTERN, DEFAULT_JOB_LINE_FILTER);
+
+        Properties props = new Properties();
+        Map<String,String> map = (Map)JSON.parse(conf.toJsonStr());
+        Iterator<Map.Entry<String,String>> iterator = map.entrySet().iterator();
+        //begin build kafkaConsumer

Review comment:
       It's suggested to add one blank to begin your comment.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@inlong.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org