You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@seatunnel.apache.org by GitBox <gi...@apache.org> on 2022/09/28 04:14:54 UTC

[GitHub] [incubator-seatunnel] hailin0 commented on a diff in pull request #2889: [Improve][Connector-V2][Kafka Sink]custom partition

hailin0 commented on code in PR #2889:
URL: https://github.com/apache/incubator-seatunnel/pull/2889#discussion_r981918523


##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/serialize/DefaultSeaTunnelRowSerializer.java:
##########
@@ -33,8 +34,19 @@ public DefaultSeaTunnelRowSerializer(String topic, SeaTunnelRowType seaTunnelRow
         this.jsonSerializationSchema = new JsonSerializationSchema(seaTunnelRowType);
     }
 
+    public DefaultSeaTunnelRowSerializer(String topic, int partation, SeaTunnelRowType seaTunnelRowType) {
+        this.topic = topic;
+        this.partation = partation;
+        this.jsonSerializationSchema = new JsonSerializationSchema(seaTunnelRowType);

Review Comment:
   ```suggestion
           this(topic, seaTunnelRowType);
           this.partation = partation;
   ```



##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/CustomPartitioner.java:
##########
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.kafka.sink;
+
+import org.apache.kafka.clients.producer.Partitioner;
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.PartitionInfo;
+
+import java.util.List;
+import java.util.Map;
+
+public class CustomPartitioner implements Partitioner {
+    List<String> assignPartitions = KafkaSinkWriter.getASSIGNPARTATIONS();
+    @Override
+    public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
+        List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
+        int numPartitions = partitions.size();
+
+        int assignPartitionsSize = assignPartitions.size();
+        String message = new String(valueBytes);
+        for (int i = 0; i < assignPartitionsSize; i++) {
+            if (message.contains(assignPartitions.get(i))) {
+                return i;
+            }
+        }
+        //Choose one of the remaining partitions according to the hashcode.
+        return (Math.abs(message.hashCode()) % (numPartitions - assignPartitionsSize)) + assignPartitionsSize;

Review Comment:
   Math.abs(message.hashCode()) ->  message.hashCode() & Integer.MAX_VALUE



##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/CustomPartitioner.java:
##########
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.kafka.sink;
+
+import org.apache.kafka.clients.producer.Partitioner;
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.PartitionInfo;
+
+import java.util.List;
+import java.util.Map;
+
+public class CustomPartitioner implements Partitioner {
+    List<String> assignPartitions = KafkaSinkWriter.getASSIGNPARTATIONS();
+    @Override
+    public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
+        List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
+        int numPartitions = partitions.size();
+
+        int assignPartitionsSize = assignPartitions.size();
+        String message = new String(valueBytes);
+        for (int i = 0; i < assignPartitionsSize; i++) {
+            if (message.contains(assignPartitions.get(i))) {
+                return i;
+            }
+        }
+        //Choose one of the remaining partitions according to the hashcode.
+        return (Math.abs(message.hashCode()) % (numPartitions - assignPartitionsSize)) + assignPartitionsSize;

Review Comment:
   reference
   
   https://github.com/apache/incubator-seatunnel/pull/2921



##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkWriter.java:
##########
@@ -129,6 +143,9 @@ private Properties getKafkaProperties(Config pluginConfig) {
         kafkaConfig.entrySet().forEach(entry -> {
             kafkaProperties.put(entry.getKey(), entry.getValue().unwrapped());
         });
+        if (pluginConfig.hasPath(ASSIGN_PARTITIONS)) {
+            kafkaProperties.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, "org.apache.seatunnel.connectors.seatunnel.kafka.sink.CustomPartitioner");
+        }

Review Comment:
   remove this code block
   
   
   
   example:
   
   ```hocon
   sink {
     kafka {
        ...
         kafka.partitioner.class = org.apache.seatunnel.connectors.seatunnel.kafka.sink.CustomPartitioner
     }  
   }
   
   you can add this example to docs
   ```



##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkWriter.java:
##########
@@ -51,11 +53,17 @@ public class KafkaSinkWriter implements SinkWriter<SeaTunnelRow, KafkaCommitInfo
 
     private String transactionPrefix;
     private long lastCheckpointId = 0;
+    private int partition;
 
     private final KafkaProduceSender<byte[], byte[]> kafkaProducerSender;
     private final SeaTunnelRowSerializer<byte[], byte[]> seaTunnelRowSerializer;
 
     private static final int PREFIX_RANGE = 10000;
+    private static List<String> ASSIGNPARTATIONS;
+
+    public static List<String> getASSIGNPARTATIONS() {
+        return ASSIGNPARTATIONS;
+    }

Review Comment:
   remove



##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/Config.java:
##########
@@ -50,4 +49,14 @@ public class Config {
      * The prefix of kafka's transactionId, make sure different job use different prefix.
      */
     public static final String TRANSACTION_PREFIX = "transaction_prefix";
+
+    /**
+     * Send information according to the specified partition.
+     */
+    public static final String PARTITION = "partition";
+
+    /**
+     * Determine the partition to send based on the content of the message.
+     */
+    public static final String ASSIGN_PARTITIONS = "assign_partitions";

Review Comment:
   remove



##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkWriter.java:
##########
@@ -71,6 +79,12 @@ public KafkaSinkWriter(
             List<KafkaSinkState> kafkaStates) {
         this.context = context;
         this.pluginConfig = pluginConfig;
+        if (pluginConfig.hasPath(PARTITION)) {
+            this.partition = pluginConfig.getInt(PARTITION);
+        }
+        if (pluginConfig.hasPath(ASSIGN_PARTITIONS)) {
+            ASSIGNPARTATIONS = pluginConfig.getStringList(ASSIGN_PARTITIONS);

Review Comment:
   ```suggestion
               CustomPartitioner.setAssignPartitions(pluginConfig.getStringList(ASSIGN_PARTITIONS));
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@seatunnel.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org