You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@seatunnel.apache.org by GitBox <gi...@apache.org> on 2022/09/28 04:14:54 UTC
[GitHub] [incubator-seatunnel] hailin0 commented on a diff in pull request #2889: [Improve][Connector-V2][Kafka Sink]custom partition
hailin0 commented on code in PR #2889:
URL: https://github.com/apache/incubator-seatunnel/pull/2889#discussion_r981918523
##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/serialize/DefaultSeaTunnelRowSerializer.java:
##########
@@ -33,8 +34,19 @@ public DefaultSeaTunnelRowSerializer(String topic, SeaTunnelRowType seaTunnelRow
this.jsonSerializationSchema = new JsonSerializationSchema(seaTunnelRowType);
}
+ public DefaultSeaTunnelRowSerializer(String topic, int partation, SeaTunnelRowType seaTunnelRowType) {
+ this.topic = topic;
+ this.partation = partation;
+ this.jsonSerializationSchema = new JsonSerializationSchema(seaTunnelRowType);
Review Comment:
```suggestion
this(topic, seaTunnelRowType);
this.partation = partation;
```
##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/CustomPartitioner.java:
##########
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.kafka.sink;
+
+import org.apache.kafka.clients.producer.Partitioner;
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.PartitionInfo;
+
+import java.util.List;
+import java.util.Map;
+
+public class CustomPartitioner implements Partitioner {
+ List<String> assignPartitions = KafkaSinkWriter.getASSIGNPARTATIONS();
+ @Override
+ public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
+ List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
+ int numPartitions = partitions.size();
+
+ int assignPartitionsSize = assignPartitions.size();
+ String message = new String(valueBytes);
+ for (int i = 0; i < assignPartitionsSize; i++) {
+ if (message.contains(assignPartitions.get(i))) {
+ return i;
+ }
+ }
+ //Choose one of the remaining partitions according to the hashcode.
+ return (Math.abs(message.hashCode()) % (numPartitions - assignPartitionsSize)) + assignPartitionsSize;
Review Comment:
Math.abs(message.hashCode()) -> message.hashCode() & Integer.MAX_VALUE
##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/CustomPartitioner.java:
##########
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.kafka.sink;
+
+import org.apache.kafka.clients.producer.Partitioner;
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.PartitionInfo;
+
+import java.util.List;
+import java.util.Map;
+
+public class CustomPartitioner implements Partitioner {
+ List<String> assignPartitions = KafkaSinkWriter.getASSIGNPARTATIONS();
+ @Override
+ public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
+ List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
+ int numPartitions = partitions.size();
+
+ int assignPartitionsSize = assignPartitions.size();
+ String message = new String(valueBytes);
+ for (int i = 0; i < assignPartitionsSize; i++) {
+ if (message.contains(assignPartitions.get(i))) {
+ return i;
+ }
+ }
+ //Choose one of the remaining partitions according to the hashcode.
+ return (Math.abs(message.hashCode()) % (numPartitions - assignPartitionsSize)) + assignPartitionsSize;
Review Comment:
reference
https://github.com/apache/incubator-seatunnel/pull/2921
##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkWriter.java:
##########
@@ -129,6 +143,9 @@ private Properties getKafkaProperties(Config pluginConfig) {
kafkaConfig.entrySet().forEach(entry -> {
kafkaProperties.put(entry.getKey(), entry.getValue().unwrapped());
});
+ if (pluginConfig.hasPath(ASSIGN_PARTITIONS)) {
+ kafkaProperties.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, "org.apache.seatunnel.connectors.seatunnel.kafka.sink.CustomPartitioner");
+ }
Review Comment:
remove this code block
example:
```hocon
sink {
kafka {
...
kafka.partitioner.class = org.apache.seatunnel.connectors.seatunnel.kafka.sink.CustomPartitioner
}
}
you can add this example to docs
```
##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkWriter.java:
##########
@@ -51,11 +53,17 @@ public class KafkaSinkWriter implements SinkWriter<SeaTunnelRow, KafkaCommitInfo
private String transactionPrefix;
private long lastCheckpointId = 0;
+ private int partition;
private final KafkaProduceSender<byte[], byte[]> kafkaProducerSender;
private final SeaTunnelRowSerializer<byte[], byte[]> seaTunnelRowSerializer;
private static final int PREFIX_RANGE = 10000;
+ private static List<String> ASSIGNPARTATIONS;
+
+ public static List<String> getASSIGNPARTATIONS() {
+ return ASSIGNPARTATIONS;
+ }
Review Comment:
remove
##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/Config.java:
##########
@@ -50,4 +49,14 @@ public class Config {
* The prefix of kafka's transactionId, make sure different job use different prefix.
*/
public static final String TRANSACTION_PREFIX = "transaction_prefix";
+
+ /**
+ * Send information according to the specified partition.
+ */
+ public static final String PARTITION = "partition";
+
+ /**
+ * Determine the partition to send based on the content of the message.
+ */
+ public static final String ASSIGN_PARTITIONS = "assign_partitions";
Review Comment:
remove
##########
seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkWriter.java:
##########
@@ -71,6 +79,12 @@ public KafkaSinkWriter(
List<KafkaSinkState> kafkaStates) {
this.context = context;
this.pluginConfig = pluginConfig;
+ if (pluginConfig.hasPath(PARTITION)) {
+ this.partition = pluginConfig.getInt(PARTITION);
+ }
+ if (pluginConfig.hasPath(ASSIGN_PARTITIONS)) {
+ ASSIGNPARTATIONS = pluginConfig.getStringList(ASSIGN_PARTITIONS);
Review Comment:
```suggestion
CustomPartitioner.setAssignPartitions(pluginConfig.getStringList(ASSIGN_PARTITIONS));
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@seatunnel.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org