You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jira@kafka.apache.org by GitBox <gi...@apache.org> on 2021/03/03 19:15:27 UTC

[GitHub] [kafka] mjsax commented on a change in pull request #10150: KAFKA-3745: Add access to read-only key in value joiner

mjsax commented on a change in pull request #10150:
URL: https://github.com/apache/kafka/pull/10150#discussion_r586703617



##########
File path: streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java
##########
@@ -792,12 +802,24 @@ public void shouldNotAllowNullValueJoinerOnJoinWithStreamJoined() {
             NullPointerException.class,
             () -> testStream.join(
                 testStream,
-                null,
+                    (ValueJoiner<? super String, ? super String, ?>) null,

Review comment:
       nit: fix indention

##########
File path: streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java
##########
@@ -916,12 +946,25 @@ public void shouldNotAllowNullValueJoinerOnLeftJoinWithStreamJoined() {
             NullPointerException.class,
             () -> testStream.leftJoin(
                 testStream,
-                null,
+                    (ValueJoiner<? super String, ? super String, ?>) null,

Review comment:
       nit: fix indention

##########
File path: streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java
##########
@@ -1040,12 +1091,24 @@ public void shouldNotAllowNullValueJoinerOnOuterJoinWithStreamJoined() {
             NullPointerException.class,
             () -> testStream.outerJoin(
                 testStream,
-                null,
+                    (ValueJoiner<? super String, ? super String, ?>) null,

Review comment:
       indention

##########
File path: streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java
##########
@@ -1315,15 +1315,15 @@ void to(final TopicNameExtractor<K, V> topicExtractor,
                                  final JoinWindows windows);
 
     /**
-     * Join records of this stream with another {@code KStream}'s records using windowed inner equi join using the

Review comment:
       Oh dear... This diff is confusing... I guess you actually inserted this a copy from above with slight modification only.
   
   Might be helpful if you could highlight the actual changes to the JavaDocs? Two below are clear. Are there others? Or same changes for all other method?

##########
File path: streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java
##########
@@ -1225,25 +1320,45 @@ public void shouldNotAllowNullMapperOnJoinWithGlobalTableWithNamed() {
     }
 
     @Test
-    public void shouldNotAllowNullJoinerOnJoinWithGlobalTable() {
+    public void shouldNotAllowNullValueJoinerOnJoinWithGlobalTable() {
+        final NullPointerException exception = assertThrows(
+            NullPointerException.class,
+            () -> testStream.join(testGlobalTable, MockMapper.selectValueMapper(), (ValueJoiner<? super String, ? super String, ?>) null));
+        assertThat(exception.getMessage(), equalTo("joiner can't be null"));
+    }
+
+    @Test
+    public void shouldNotAllowNullValueJoinerWithKeyOnJoinWithGlobalTable() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
-            () -> testStream.join(testGlobalTable, MockMapper.selectValueMapper(), null));
+            () -> testStream.join(testGlobalTable, MockMapper.selectValueMapper(), (ValueJoinerWithKey<? super String, ? super String, ? super String, ?>) null));
         assertThat(exception.getMessage(), equalTo("joiner can't be null"));
     }
 
     @Test
-    public void shouldNotAllowNullJoinerOnJoinWithGlobalTableWithNamed() {
+    public void shouldNotAllowNullValueJoinerOnJoinWithGlobalTableWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
             () -> testStream.join(
                 testGlobalTable,
                 MockMapper.selectValueMapper(),
-                null,
+                    (ValueJoiner<? super String, ? super String, ?>) null,

Review comment:
       indention

##########
File path: streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplValueJoinerWithKeyTest.java
##########
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.kstream.internals;
+
+import org.apache.kafka.common.serialization.IntegerSerializer;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.TestInputTopic;
+import org.apache.kafka.streams.TestOutputTopic;
+import org.apache.kafka.streams.TopologyTestDriver;
+import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.GlobalKTable;
+import org.apache.kafka.streams.kstream.JoinWindows;
+import org.apache.kafka.streams.kstream.Joined;
+import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.KeyValueMapper;
+import org.apache.kafka.streams.kstream.Produced;
+import org.apache.kafka.streams.kstream.StreamJoined;
+import org.apache.kafka.streams.kstream.ValueJoinerWithKey;
+import org.apache.kafka.test.StreamsTestUtils;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Properties;
+
+import static java.time.Duration.ofMillis;
+import static org.junit.Assert.assertEquals;
+
+public class KStreamImplValueJoinerWithKeyTest {
+
+    private KStream<String, Integer> leftStream;
+    private KStream<String, Integer> rightStream;
+    private KTable<String, Integer> ktable;
+    private GlobalKTable<String, Integer> globalKTable;
+    private StreamsBuilder builder;
+
+    private final Properties props = StreamsTestUtils.getStreamsConfig(Serdes.String(), Serdes.String());
+    private final String leftTopic = "left";
+    private final String rightTopic = "right";
+    private final String ktableTopic = "ktableTopic";
+    private final String globalTopic = "globalTopic";
+    private final String outputTopic = "joined-result";
+
+    private final ValueJoinerWithKey<String, Integer, Integer, String> valueJoinerWithKey =
+        (key, lv, rv) -> key + ":" + (lv + (rv == null ? 0 : rv));
+    private final JoinWindows joinWindows = JoinWindows.of(ofMillis(100));
+    private final StreamJoined<String, Integer, Integer> streamJoined =
+            StreamJoined.with(Serdes.String(), Serdes.Integer(), Serdes.Integer());
+    private final Joined<String, Integer, Integer> joined =
+            Joined.with(Serdes.String(), Serdes.Integer(), Serdes.Integer());
+    private final KeyValueMapper<String, Integer, String> keyValueMapper =
+        (k, v) -> k;
+
+    @Before
+    public void setup() {
+        builder = new StreamsBuilder();
+        leftStream = builder.stream(leftTopic, Consumed.with(Serdes.String(), Serdes.Integer()));
+        rightStream = builder.stream(rightTopic, Consumed.with(Serdes.String(), Serdes.Integer()));
+        ktable = builder.table(ktableTopic, Consumed.with(Serdes.String(), Serdes.Integer()));
+        globalKTable = builder.globalTable(globalTopic, Consumed.with(Serdes.String(), Serdes.Integer()));
+    }
+
+    @Test
+    public void shouldIncludeKeyInStreamSteamJoinResults() {
+        leftStream.join(
+                rightStream,
+                valueJoinerWithKey,
+                joinWindows,
+                streamJoined
+        ).to(outputTopic, Produced.with(Serdes.String(), Serdes.String()));
+        // Left KV A, 3, Right KV A, 5
+        runJoinTopology(builder,
+                Collections.singletonList(KeyValue.pair("A", "A:5")),
+                false,
+                rightTopic
+        );
+    }
+
+    @Test
+    public void shouldIncludeKeyInStreamLeftJoinResults() {
+        leftStream.leftJoin(
+                rightStream,
+                valueJoinerWithKey,
+                joinWindows,
+                streamJoined
+        ).to(outputTopic, Produced.with(Serdes.String(), Serdes.String()));
+        // Left KV A, 3, Right KV A, 5
+        // TTD pipes records to left stream first, then right
+        // with TTD there's no caching, so join emits immediately with "A, 3, null" then "A, 3, 5"
+        final List<KeyValue<String, String>> expectedResults = Arrays.asList(KeyValue.pair("A", "A:3"), KeyValue.pair("A", "A:5"));
+        runJoinTopology(builder,
+                expectedResults,
+                false,
+                rightTopic
+        );
+    }
+
+    @Test
+    public void shouldIncludeKeyInStreamOuterJoinResults() {
+        leftStream.outerJoin(
+                rightStream,
+                valueJoinerWithKey,
+                joinWindows,
+                streamJoined
+        ).to(outputTopic, Produced.with(Serdes.String(), Serdes.String()));
+
+        // Left KV A, 3, Right KV A, 5

Review comment:
       To actually test outer join, should we not send a second right record with different key (ie, `B`)? (I assume you copied an existing test, so maybe we can improve both in a follow up PR?)

##########
File path: streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java
##########
@@ -1315,15 +1315,15 @@ void to(final TopicNameExtractor<K, V> topicExtractor,
                                  final JoinWindows windows);
 
     /**
-     * Join records of this stream with another {@code KStream}'s records using windowed inner equi join using the
-     * {@link Joined} instance for configuration of the {@link Serde key serde}, {@link Serde this stream's value serde},
-     * and {@link Serde the other stream's value serde}.
+     * Join records of this stream with another {@code KStream}'s records using windowed inner equi join with default
+     * serializers and deserializers.
      * The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
      * Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
      * {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
      * <p>
-     * For each pair of records meeting both join predicates the provided {@link ValueJoiner} will be called to compute
+     * For each pair of records meeting both join predicates the provided {@link ValueJoinerWithKey} will be called to compute
      * a value (with arbitrary type) for the result record.
+     * Note that the key is read-only and should not be modified, as this can lead to undefined behaviour.

Review comment:
       And this seems to be an actual change?

##########
File path: streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java
##########
@@ -1315,15 +1315,15 @@ void to(final TopicNameExtractor<K, V> topicExtractor,
                                  final JoinWindows windows);
 
     /**
-     * Join records of this stream with another {@code KStream}'s records using windowed inner equi join using the
-     * {@link Joined} instance for configuration of the {@link Serde key serde}, {@link Serde this stream's value serde},
-     * and {@link Serde the other stream's value serde}.
+     * Join records of this stream with another {@code KStream}'s records using windowed inner equi join with default
+     * serializers and deserializers.
      * The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
      * Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
      * {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
      * <p>
-     * For each pair of records meeting both join predicates the provided {@link ValueJoiner} will be called to compute
+     * For each pair of records meeting both join predicates the provided {@link ValueJoinerWithKey} will be called to compute

Review comment:
       This seems to be an actual change?

##########
File path: streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java
##########
@@ -1285,25 +1400,45 @@ public void shouldNotAllowNullMapperOnLeftJoinWithGlobalTableWithNamed() {
     }
 
     @Test
-    public void shouldNotAllowNullJoinerOnLeftJoinWithGlobalTable() {
+    public void shouldNotAllowNullValueJoinerOnLeftJoinWithGlobalTable() {
+        final NullPointerException exception = assertThrows(
+            NullPointerException.class,
+            () -> testStream.leftJoin(testGlobalTable, MockMapper.selectValueMapper(), (ValueJoiner<? super String, ? super String, ?>) null));
+        assertThat(exception.getMessage(), equalTo("joiner can't be null"));
+    }
+
+    @Test
+    public void shouldNotAllowNullValueJoinerWithKeyOnLeftJoinWithGlobalTable() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
-            () -> testStream.leftJoin(testGlobalTable, MockMapper.selectValueMapper(), null));
+            () -> testStream.leftJoin(testGlobalTable, MockMapper.selectValueMapper(), (ValueJoinerWithKey<? super String, ? super String, ? super String, ?>) null));
         assertThat(exception.getMessage(), equalTo("joiner can't be null"));
     }
 
     @Test
-    public void shouldNotAllowNullJoinerOnLeftJoinWithGlobalTableWithNamed() {
+    public void shouldNotAllowNullValueJoinerOnLeftJoinWithGlobalTableWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
             () -> testStream.leftJoin(
                 testGlobalTable,
                 MockMapper.selectValueMapper(),
-                null,
+                    (ValueJoiner<? super String, ? super String, ?>) null,

Review comment:
       indention

##########
File path: streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplValueJoinerWithKeyTest.java
##########
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.kstream.internals;
+
+import org.apache.kafka.common.serialization.IntegerSerializer;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.TestInputTopic;
+import org.apache.kafka.streams.TestOutputTopic;
+import org.apache.kafka.streams.TopologyTestDriver;
+import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.GlobalKTable;
+import org.apache.kafka.streams.kstream.JoinWindows;
+import org.apache.kafka.streams.kstream.Joined;
+import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.KeyValueMapper;
+import org.apache.kafka.streams.kstream.Produced;
+import org.apache.kafka.streams.kstream.StreamJoined;
+import org.apache.kafka.streams.kstream.ValueJoinerWithKey;
+import org.apache.kafka.test.StreamsTestUtils;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Properties;
+
+import static java.time.Duration.ofMillis;
+import static org.junit.Assert.assertEquals;
+
+public class KStreamImplValueJoinerWithKeyTest {
+
+    private KStream<String, Integer> leftStream;
+    private KStream<String, Integer> rightStream;
+    private KTable<String, Integer> ktable;
+    private GlobalKTable<String, Integer> globalKTable;
+    private StreamsBuilder builder;
+
+    private final Properties props = StreamsTestUtils.getStreamsConfig(Serdes.String(), Serdes.String());
+    private final String leftTopic = "left";
+    private final String rightTopic = "right";
+    private final String ktableTopic = "ktableTopic";
+    private final String globalTopic = "globalTopic";
+    private final String outputTopic = "joined-result";
+
+    private final ValueJoinerWithKey<String, Integer, Integer, String> valueJoinerWithKey =
+        (key, lv, rv) -> key + ":" + (lv + (rv == null ? 0 : rv));
+    private final JoinWindows joinWindows = JoinWindows.of(ofMillis(100));
+    private final StreamJoined<String, Integer, Integer> streamJoined =
+            StreamJoined.with(Serdes.String(), Serdes.Integer(), Serdes.Integer());
+    private final Joined<String, Integer, Integer> joined =
+            Joined.with(Serdes.String(), Serdes.Integer(), Serdes.Integer());
+    private final KeyValueMapper<String, Integer, String> keyValueMapper =
+        (k, v) -> k;
+
+    @Before
+    public void setup() {
+        builder = new StreamsBuilder();
+        leftStream = builder.stream(leftTopic, Consumed.with(Serdes.String(), Serdes.Integer()));
+        rightStream = builder.stream(rightTopic, Consumed.with(Serdes.String(), Serdes.Integer()));
+        ktable = builder.table(ktableTopic, Consumed.with(Serdes.String(), Serdes.Integer()));
+        globalKTable = builder.globalTable(globalTopic, Consumed.with(Serdes.String(), Serdes.Integer()));
+    }
+
+    @Test
+    public void shouldIncludeKeyInStreamSteamJoinResults() {
+        leftStream.join(
+                rightStream,
+                valueJoinerWithKey,
+                joinWindows,
+                streamJoined
+        ).to(outputTopic, Produced.with(Serdes.String(), Serdes.String()));
+        // Left KV A, 3, Right KV A, 5
+        runJoinTopology(builder,
+                Collections.singletonList(KeyValue.pair("A", "A:5")),
+                false,
+                rightTopic
+        );
+    }
+
+    @Test
+    public void shouldIncludeKeyInStreamLeftJoinResults() {
+        leftStream.leftJoin(
+                rightStream,
+                valueJoinerWithKey,
+                joinWindows,
+                streamJoined
+        ).to(outputTopic, Produced.with(Serdes.String(), Serdes.String()));
+        // Left KV A, 3, Right KV A, 5
+        // TTD pipes records to left stream first, then right
+        // with TTD there's no caching, so join emits immediately with "A, 3, null" then "A, 3, 5"
+        final List<KeyValue<String, String>> expectedResults = Arrays.asList(KeyValue.pair("A", "A:3"), KeyValue.pair("A", "A:5"));
+        runJoinTopology(builder,
+                expectedResults,
+                false,
+                rightTopic
+        );
+    }
+
+    @Test
+    public void shouldIncludeKeyInStreamOuterJoinResults() {
+        leftStream.outerJoin(
+                rightStream,
+                valueJoinerWithKey,
+                joinWindows,
+                streamJoined
+        ).to(outputTopic, Produced.with(Serdes.String(), Serdes.String()));
+
+        // Left KV A, 3, Right KV A, 5
+        // TTD pipes records to left stream first, then right
+        // with TTD there's no caching, so join emits immediately with "A, 3, null" then "A, 3, 5"
+        final List<KeyValue<String, String>> expectedResults = Arrays.asList(KeyValue.pair("A", "A:3"), KeyValue.pair("A", "A:5"));
+        runJoinTopology(builder,
+                expectedResults,
+                false,
+                rightTopic
+        );
+    }
+
+    @Test
+    public void shouldIncludeKeyInStreamTableJoinResults() {
+        leftStream.join(
+            ktable,
+            valueJoinerWithKey,
+            joined
+        ).to(outputTopic, Produced.with(Serdes.String(), Serdes.String()));
+        // Left KV A, 3, Table KV A, 5
+        runJoinTopology(builder,
+                Collections.singletonList(KeyValue.pair("A", "A:5")),
+                true,
+                ktableTopic
+        );
+    }
+
+    @Test
+    public void shouldIncludeKeyInStreamTableLeftJoinResults() {
+        leftStream.leftJoin(
+            ktable,
+            valueJoinerWithKey,
+            joined
+        ).to(outputTopic, Produced.with(Serdes.String(), Serdes.String()));
+        // Left KV A, 3, Table KV A, 5

Review comment:
       Similar to above: should we send one more left record with key `C`?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org