You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jira@kafka.apache.org by GitBox <gi...@apache.org> on 2021/10/12 20:31:17 UTC

[GitHub] [kafka] guozhangwang commented on a change in pull request #11368: KAFKA-13261: Add support for custom partitioners in foreign key joins

guozhangwang commented on a change in pull request #11368:
URL: https://github.com/apache/kafka/pull/11368#discussion_r727481457



##########
File path: streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinCustomPartitionerIntegrationTest.java
##########
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.streams.integration;
+
+import static java.time.Duration.ofSeconds;
+import static java.util.Arrays.asList;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.startApplicationAndWaitUntilRunning;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.KafkaStreams;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
+import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
+import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.Materialized;
+import org.apache.kafka.streams.kstream.Named;
+import org.apache.kafka.streams.kstream.Produced;
+import org.apache.kafka.streams.kstream.Repartitioned;
+import org.apache.kafka.streams.kstream.TableJoined;
+import org.apache.kafka.streams.kstream.ValueJoiner;
+import org.apache.kafka.streams.state.KeyValueStore;
+import org.apache.kafka.streams.utils.UniqueTopicSerdeScope;
+import org.apache.kafka.test.IntegrationTest;
+import org.apache.kafka.test.TestUtils;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import kafka.utils.MockTime;
+
+@Category({IntegrationTest.class})
+public class KTableKTableForeignKeyInnerJoinCustomPartitionerIntegrationTest {
+    private final static int NUM_BROKERS = 1;
+
+    public final static EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
+    private final static MockTime MOCK_TIME = CLUSTER.time;
+    private final static String TABLE_1 = "table1";
+    private final static String TABLE_2 = "table2";
+    private final static String OUTPUT = "output-";
+    private final Properties streamsConfig = getStreamsConfig();
+    private final Properties streamsConfigTwo = getStreamsConfig();
+    private final Properties streamsConfigThree = getStreamsConfig();
+    private KafkaStreams streams;
+    private KafkaStreams streamsTwo;
+    private KafkaStreams streamsThree;
+    private final static Properties CONSUMER_CONFIG = new Properties();
+
+    private final static Properties PRODUCER_CONFIG_1 = new Properties();
+    private final static Properties PRODUCER_CONFIG_2 = new Properties();
+
+    @BeforeClass
+    public static void startCluster() throws IOException, InterruptedException {
+        CLUSTER.start();
+        //Use multiple partitions to ensure distribution of keys.
+
+        CLUSTER.createTopic(TABLE_1, 4, 1);
+        CLUSTER.createTopic(TABLE_2, 4, 1);
+        CLUSTER.createTopic(OUTPUT, 4, 1);
+
+        PRODUCER_CONFIG_1.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
+        PRODUCER_CONFIG_1.put(ProducerConfig.ACKS_CONFIG, "all");
+        PRODUCER_CONFIG_1.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
+        PRODUCER_CONFIG_1.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
+
+        PRODUCER_CONFIG_2.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
+        PRODUCER_CONFIG_2.put(ProducerConfig.ACKS_CONFIG, "all");
+        PRODUCER_CONFIG_2.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
+        PRODUCER_CONFIG_2.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
+
+        final List<KeyValue<String, String>> table1 = asList(
+            new KeyValue<>("ID123-1", "ID123-A1"),
+            new KeyValue<>("ID123-2", "ID123-A2"),
+            new KeyValue<>("ID123-3", "ID123-A3"),
+            new KeyValue<>("ID123-4", "ID123-A4")
+        );
+
+        final List<KeyValue<String, String>> table2 = asList(
+            new KeyValue<>("ID123", "BBB")
+        );
+
+        IntegrationTestUtils.produceKeyValuesSynchronously(TABLE_1, table1, PRODUCER_CONFIG_1, MOCK_TIME);
+        IntegrationTestUtils.produceKeyValuesSynchronously(TABLE_2, table2, PRODUCER_CONFIG_2, MOCK_TIME);
+
+        CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
+        CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, "ktable-ktable-consumer");
+        CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
+        CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
+    }
+
+    @AfterClass
+    public static void closeCluster() {
+        CLUSTER.stop();
+    }
+
+    @Before
+    public void before() throws IOException {
+        final String stateDirBasePath = TestUtils.tempDirectory().getPath();
+        streamsConfig.put(StreamsConfig.STATE_DIR_CONFIG, stateDirBasePath + "-1");
+        streamsConfigTwo.put(StreamsConfig.STATE_DIR_CONFIG, stateDirBasePath + "-2");
+        streamsConfigThree.put(StreamsConfig.STATE_DIR_CONFIG, stateDirBasePath + "-3");
+    }
+
+    @After
+    public void after() throws IOException {
+        if (streams != null) {
+            streams.close();
+            streams = null;
+        }
+        if (streamsTwo != null) {
+            streamsTwo.close();
+            streamsTwo = null;
+        }
+        if (streamsThree != null) {
+            streamsThree.close();
+            streamsThree = null;
+        }
+        IntegrationTestUtils.purgeLocalStreamsState(asList(streamsConfig, streamsConfigTwo, streamsConfigThree));
+    }
+
+    @Test
+    public void shouldInnerJoinMultiPartitionQueryable() throws Exception {
+        final Set<KeyValue<String, String>> expectedOne = new HashSet<>();
+        expectedOne.add(new KeyValue<>("ID123-1", "value1=ID123-A1,value2=BBB"));
+        expectedOne.add(new KeyValue<>("ID123-2", "value1=ID123-A2,value2=BBB"));
+        expectedOne.add(new KeyValue<>("ID123-3", "value1=ID123-A3,value2=BBB"));
+        expectedOne.add(new KeyValue<>("ID123-4", "value1=ID123-A4,value2=BBB"));
+
+        verifyKTableKTableJoin(expectedOne);
+    }
+
+    private void verifyKTableKTableJoin(final Set<KeyValue<String, String>> expectedResult) throws Exception {
+        final String innerJoinType = "INNER";
+        final String queryableName = innerJoinType + "-store1";
+
+        streams = prepareTopology(queryableName, streamsConfig);

Review comment:
       Why create three identical streams app in the test?

##########
File path: streams/src/main/java/org/apache/kafka/streams/kstream/TableJoined.java
##########
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.kstream;
+
+import org.apache.kafka.streams.processor.StreamPartitioner;
+
+import java.util.function.Function;
+
+/**
+ * The {@code TableJoined} class represents optional params that can be passed to
+ * {@link KTable#join(KTable, Function, ValueJoiner, TableJoined) KTable#join(KTable,Function,...)} and
+ * {@link KTable#leftJoin(KTable, Function, ValueJoiner, TableJoined) KTable#leftJoin(KTable,Function,...)}
+ * operations, for foreign key joins.
+ * @param <K>   this key type ; key type for the left (primary) table
+ * @param <KO>  other key type ; key type for the right (foreign key) table
+ */
+public class TableJoined<K, KO> implements NamedOperation<TableJoined<K, KO>> {
+
+    protected final StreamPartitioner<K, Void> partitioner;
+    protected final StreamPartitioner<KO, Void> otherPartitioner;
+    protected final String name;
+
+    private TableJoined(final StreamPartitioner<K, Void> partitioner,
+                        final StreamPartitioner<KO, Void> otherPartitioner,
+                        final String name) {
+        this.partitioner = partitioner;
+        this.otherPartitioner = otherPartitioner;
+        this.name = name;
+    }
+
+    protected TableJoined(final TableJoined<K, KO> tableJoined) {
+        this(tableJoined.partitioner, tableJoined.otherPartitioner, tableJoined.name);
+    }
+
+    /**
+     * Create an instance of {@code TableJoined} with partitioner and otherPartitioner {@link StreamPartitioner} instances.

Review comment:
       I think we should also emphasize that the partitioners are aligned with the left and right tables' own partitioning schemes. That means if the left and right tables are originally partitioned that involves the message values, then we are still "doomed"..




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: jira-unsubscribe@kafka.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org