You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2021/11/10 19:44:13 UTC

[GitHub] [beam] lukecwik commented on a change in pull request #15549: [BEAM-11997] Changed RedisIO implementation to SDF

lukecwik commented on a change in pull request #15549:
URL: https://github.com/apache/beam/pull/15549#discussion_r746893559



##########
File path: sdks/java/io/redis/src/main/java/org/apache/beam/sdk/io/redis/RedisCursor.java
##########
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.redis;
+
+import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.util.Objects;
+import javax.annotation.Nonnull;
+import org.apache.beam.sdk.coders.BigEndianLongCoder;
+import org.apache.beam.sdk.io.range.ByteKey;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+public class RedisCursor implements Comparable<RedisCursor>, Serializable {
+
+  public static final RedisCursor ZERO_CURSOR = RedisCursor.of("0", 8);
+
+  private final String cursor;
+  private final ByteKey byteCursor;
+  private final long dbSize;
+  private final int nBits;
+
+  public static RedisCursor of(String cursor, long dbSize) {
+    return new RedisCursor(cursor, dbSize);
+  }
+
+  public static RedisCursor of(ByteKey byteCursor, long dbSize) {
+    return new RedisCursor(byteCursor, dbSize);
+  }
+
+  private RedisCursor(ByteKey byteCursor, long dbSize) {
+    this.byteCursor = byteCursor;
+    this.dbSize = dbSize;
+    this.nBits = getTablePow(dbSize);
+    this.cursor = byteKeyToString(byteCursor, nBits);
+  }
+
+  private RedisCursor(String cursor, long dbSize) {
+    this.cursor = cursor;
+    this.dbSize = dbSize;
+    this.nBits = getTablePow(dbSize);
+    this.byteCursor = stringCursorToByteKey(cursor, this.nBits);
+  }
+
+  /**
+   * {@link RedisCursor} implements {@link Comparable Comparable&lt;RedisCursor&gt;} by transforming
+   * the cursors to an index of the Redis table.
+   */
+  @Override
+  public int compareTo(@Nonnull RedisCursor other) {
+    checkNotNull(other, "other");
+    return Long.compare(Long.parseLong(cursor), Long.parseLong(other.cursor));
+  }
+
+  @Override
+  public boolean equals(@Nullable Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    RedisCursor that = (RedisCursor) o;
+    return dbSize == that.dbSize
+        && nBits == that.nBits
+        && Objects.equals(cursor, that.cursor)
+        && Objects.equals(byteCursor, that.byteCursor);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(cursor, byteCursor, dbSize, nBits);
+  }
+
+  public String getCursor() {
+    return cursor;
+  }
+
+  public ByteKey getByteCursor() {
+    return byteCursor;
+  }
+
+  public long getDbSize() {
+    return dbSize;
+  }
+
+  @VisibleForTesting
+  static ByteKey stringCursorToByteKey(String cursor, int nBits) {
+    long cursorLong = Long.parseLong(cursor);
+    long reversed = shiftBits(cursorLong, nBits);
+    BigEndianLongCoder coder = BigEndianLongCoder.of();
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    try {
+      coder.encode(reversed, os);
+    } catch (IOException e) {
+      throw new IllegalArgumentException("invalid redis cursor " + cursor);
+    }
+    byte[] byteArray = os.toByteArray();
+    return ByteKey.copyFrom(byteArray);
+  }
+
+  @VisibleForTesting
+  static long shiftBits(long a, int nBits) {
+    long b = 0;
+    for (int i = 0; i < nBits; ++i) {
+      b <<= 1;
+      b |= (a & 1);
+      a >>= 1;
+    }
+    return b;
+  }
+
+  @VisibleForTesting
+  static int getTablePow(long nKeys) {
+    return 64 - Long.numberOfLeadingZeros(nKeys - 1);
+  }
+
+  @VisibleForTesting
+  static String byteKeyToString(ByteKey byteKeyStart, int nBites) {
+    ByteBuffer bb = ByteBuffer.wrap(byteKeyStart.getBytes());
+    if (bb.capacity() < nBites) {
+      int rem = nBites - bb.capacity();
+      byte[] padding = new byte[rem];
+      bb = ByteBuffer.allocate(nBites).put(padding).put(bb.array());
+      bb.position(0);
+    }
+    long l = bb.getLong();
+    return Long.toString(l);
+  }

Review comment:
       ```suggestion
     static String byteKeyToString(ByteKey byteKeyStart, int nBits) {
       ByteBuffer bb = ByteBuffer.wrap(byteKeyStart.getBytes());
       if (bb.capacity() < nBits) {
         int rem = nBites - bb.capacity();
         byte[] padding = new byte[rem];
         bb = ByteBuffer.allocate(nBits).put(padding).put(bb.array());
         bb.position(0);
       }
       long l = bb.getLong();
       return Long.toString(l);
     }
   ```

##########
File path: sdks/java/io/redis/src/main/java/org/apache/beam/sdk/io/redis/RedisIO.java
##########
@@ -334,94 +373,47 @@ public void setup() {
     public void teardown() {
       jedis.close();
     }
-  }
 
-  private static class ReadKeysWithPattern extends BaseReadFn<String> {
-
-    ReadKeysWithPattern(RedisConnectionConfiguration connectionConfiguration) {
-      super(connectionConfiguration);
+    @GetInitialRestriction
+    public RedisCursorRange getInitialRestriction() {
+      long dbSize = jedis.dbSize();
+      return RedisCursorRange.of(RedisCursor.of("0", dbSize), RedisCursor.of("0", dbSize));
     }
 
     @ProcessElement
-    public void processElement(ProcessContext c) {
+    public ProcessContinuation processElement(
+        ProcessContext c, RestrictionTracker<RedisCursorRange, RedisCursor> tracker) {
+      RedisCursor cursor = tracker.currentRestriction().getStartPosition();
       ScanParams scanParams = new ScanParams();
       scanParams.match(c.element());
-
-      String cursor = ScanParams.SCAN_POINTER_START;
-      boolean finished = false;
-      while (!finished) {
-        ScanResult<String> scanResult = jedis.scan(cursor, scanParams);
-        List<String> keys = scanResult.getResult();
-        for (String k : keys) {
-          c.output(k);
+      while (tracker.tryClaim(cursor)) {
+        ScanResult<String> scanResult = jedis.scan(cursor.getCursor(), scanParams);
+        List<String> keys = new ArrayList<>();
+        for (String k : scanResult.getResult()) {
+          keys.add(k);
         }
-        cursor = scanResult.getCursor();
-        if (cursor.equals(ScanParams.SCAN_POINTER_START)) {
-          finished = true;
-        }
-      }
-    }
-  }
-
-  /** A {@link DoFn} requesting Redis server to get key/value pairs. */
-  private static class ReadFn extends BaseReadFn<KV<String, String>> {
-    transient @Nullable Multimap<BoundedWindow, String> bundles = null;
-    @Nullable AtomicInteger batchCount = null;
-    private final int batchSize;
-
-    ReadFn(RedisConnectionConfiguration connectionConfiguration, int batchSize) {
-      super(connectionConfiguration);
-      this.batchSize = batchSize;
-    }
-
-    @StartBundle
-    public void startBundle() {
-      bundles = ArrayListMultimap.create();
-      batchCount = new AtomicInteger();
-    }
-
-    @ProcessElement
-    public void processElement(ProcessContext c, BoundedWindow window) {
-      String key = c.element();
-      bundles.put(window, key);
-      if (batchCount.incrementAndGet() > getBatchSize()) {
-        Multimap<BoundedWindow, KV<String, String>> kvs = fetchAndFlush();
-        for (BoundedWindow w : kvs.keySet()) {
-          for (KV<String, String> kv : kvs.get(w)) {
+        if (keys.size() > 0) {
+          for (KV<String, String> kv : fetchAndFlush(keys)) {
             c.output(kv);
           }
         }
-      }
-    }
-
-    @FinishBundle
-    public void finishBundle(FinishBundleContext context) {
-      Multimap<BoundedWindow, KV<String, String>> kvs = fetchAndFlush();
-      for (BoundedWindow w : kvs.keySet()) {
-        for (KV<String, String> kv : kvs.get(w)) {
-          context.output(kv, w.maxTimestamp(), w);
+        if (RedisCursor.ZERO_CURSOR.getCursor().equals(scanResult.getCursor())) {
+          break;
         }

Review comment:
       I would expect the next tryClaim to fail and you shouldn't have to have this exit condition here.
   
   You want the RedisCursor to differentiate `0` as the start or end by embedding a boolean like `isStart`.
   
   Then in the comparison `isStart` compares before everything and `0 && !isStart` compares after everything.

##########
File path: sdks/java/io/redis/src/main/java/org/apache/beam/sdk/io/redis/RedisCursor.java
##########
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.redis;
+
+import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.util.Objects;
+import javax.annotation.Nonnull;
+import org.apache.beam.sdk.coders.BigEndianLongCoder;
+import org.apache.beam.sdk.io.range.ByteKey;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+public class RedisCursor implements Comparable<RedisCursor>, Serializable {

Review comment:
       It makes more sense to treat this as a simple container class that contains
   ```
   String cursor
   long dbSize
   boolean isStart
   ```

##########
File path: sdks/java/io/redis/src/main/java/org/apache/beam/sdk/io/redis/RedisCursor.java
##########
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.redis;
+
+import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.util.Objects;
+import javax.annotation.Nonnull;
+import org.apache.beam.sdk.coders.BigEndianLongCoder;
+import org.apache.beam.sdk.io.range.ByteKey;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+public class RedisCursor implements Comparable<RedisCursor>, Serializable {
+
+  public static final RedisCursor ZERO_CURSOR = RedisCursor.of("0", 8);
+
+  private final String cursor;
+  private final ByteKey byteCursor;
+  private final long dbSize;
+  private final int nBits;
+
+  public static RedisCursor of(String cursor, long dbSize) {
+    return new RedisCursor(cursor, dbSize);
+  }
+
+  public static RedisCursor of(ByteKey byteCursor, long dbSize) {
+    return new RedisCursor(byteCursor, dbSize);
+  }
+
+  private RedisCursor(ByteKey byteCursor, long dbSize) {
+    this.byteCursor = byteCursor;
+    this.dbSize = dbSize;
+    this.nBits = getTablePow(dbSize);
+    this.cursor = byteKeyToString(byteCursor, nBits);
+  }
+
+  private RedisCursor(String cursor, long dbSize) {
+    this.cursor = cursor;
+    this.dbSize = dbSize;
+    this.nBits = getTablePow(dbSize);
+    this.byteCursor = stringCursorToByteKey(cursor, this.nBits);
+  }
+
+  /**
+   * {@link RedisCursor} implements {@link Comparable Comparable&lt;RedisCursor&gt;} by transforming
+   * the cursors to an index of the Redis table.
+   */
+  @Override
+  public int compareTo(@Nonnull RedisCursor other) {
+    checkNotNull(other, "other");
+    return Long.compare(Long.parseLong(cursor), Long.parseLong(other.cursor));
+  }
+
+  @Override
+  public boolean equals(@Nullable Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    RedisCursor that = (RedisCursor) o;
+    return dbSize == that.dbSize
+        && nBits == that.nBits
+        && Objects.equals(cursor, that.cursor)
+        && Objects.equals(byteCursor, that.byteCursor);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(cursor, byteCursor, dbSize, nBits);
+  }
+
+  public String getCursor() {
+    return cursor;
+  }
+
+  public ByteKey getByteCursor() {
+    return byteCursor;
+  }
+
+  public long getDbSize() {
+    return dbSize;
+  }
+
+  @VisibleForTesting
+  static ByteKey stringCursorToByteKey(String cursor, int nBits) {
+    long cursorLong = Long.parseLong(cursor);
+    long reversed = shiftBits(cursorLong, nBits);
+    BigEndianLongCoder coder = BigEndianLongCoder.of();
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    try {
+      coder.encode(reversed, os);
+    } catch (IOException e) {
+      throw new IllegalArgumentException("invalid redis cursor " + cursor);
+    }
+    byte[] byteArray = os.toByteArray();
+    return ByteKey.copyFrom(byteArray);
+  }
+
+  @VisibleForTesting
+  static long shiftBits(long a, int nBits) {
+    long b = 0;

Review comment:
       Can you simplify this with Long.reverse?
   
   It will also help with readability.

##########
File path: sdks/java/io/redis/src/main/java/org/apache/beam/sdk/io/redis/RedisIO.java
##########
@@ -334,94 +373,47 @@ public void setup() {
     public void teardown() {
       jedis.close();
     }
-  }
 
-  private static class ReadKeysWithPattern extends BaseReadFn<String> {
-
-    ReadKeysWithPattern(RedisConnectionConfiguration connectionConfiguration) {
-      super(connectionConfiguration);
+    @GetInitialRestriction
+    public RedisCursorRange getInitialRestriction() {
+      long dbSize = jedis.dbSize();
+      return RedisCursorRange.of(RedisCursor.of("0", dbSize), RedisCursor.of("0", dbSize));
     }
 
     @ProcessElement
-    public void processElement(ProcessContext c) {
+    public ProcessContinuation processElement(
+        ProcessContext c, RestrictionTracker<RedisCursorRange, RedisCursor> tracker) {
+      RedisCursor cursor = tracker.currentRestriction().getStartPosition();
       ScanParams scanParams = new ScanParams();
       scanParams.match(c.element());
-
-      String cursor = ScanParams.SCAN_POINTER_START;
-      boolean finished = false;
-      while (!finished) {
-        ScanResult<String> scanResult = jedis.scan(cursor, scanParams);
-        List<String> keys = scanResult.getResult();
-        for (String k : keys) {
-          c.output(k);
+      while (tracker.tryClaim(cursor)) {
+        ScanResult<String> scanResult = jedis.scan(cursor.getCursor(), scanParams);
+        List<String> keys = new ArrayList<>();
+        for (String k : scanResult.getResult()) {
+          keys.add(k);
         }

Review comment:
       Why add them to another list when `scanResult.getResult()` is already a list?

##########
File path: sdks/java/io/redis/src/main/java/org/apache/beam/sdk/io/redis/RedisCursorRangeTracker.java
##########
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.redis;
+
+import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
+import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
+import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState;
+
+import org.apache.beam.sdk.io.range.ByteKey;
+import org.apache.beam.sdk.io.range.ByteKeyRange;
+import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
+import org.apache.beam.sdk.transforms.splittabledofn.SplitResult;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.Bytes;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+@SuppressWarnings({
+  "nullness" // TODO(https://issues.apache.org/jira/browse/BEAM-10402)
+})
+public class RedisCursorRangeTracker extends RestrictionTracker<RedisCursorRange, RedisCursor>
+    implements RestrictionTracker.HasProgress {
+
+  private RedisCursorRange range;

Review comment:
       I think you can you use a ByteKeyRangeTracker as a delegate object and perform RedisCursor -> ByteKey conversions and ByteKey -> RedisCursor conversions here if you had a better way to represent `0` as start and `0` as end.
   
   `0 && isStart -> ByteKey.of(0x00)`
   `0 && !isStart -> ByteKey.EMPTY`
   
   All the other mappings will fall in between.

##########
File path: sdks/java/io/redis/src/main/java/org/apache/beam/sdk/io/redis/RedisIO.java
##########
@@ -334,94 +373,47 @@ public void setup() {
     public void teardown() {
       jedis.close();
     }
-  }
 
-  private static class ReadKeysWithPattern extends BaseReadFn<String> {
-
-    ReadKeysWithPattern(RedisConnectionConfiguration connectionConfiguration) {
-      super(connectionConfiguration);
+    @GetInitialRestriction
+    public RedisCursorRange getInitialRestriction() {
+      long dbSize = jedis.dbSize();
+      return RedisCursorRange.of(RedisCursor.of("0", dbSize), RedisCursor.of("0", dbSize));
     }
 
     @ProcessElement
-    public void processElement(ProcessContext c) {
+    public ProcessContinuation processElement(
+        ProcessContext c, RestrictionTracker<RedisCursorRange, RedisCursor> tracker) {
+      RedisCursor cursor = tracker.currentRestriction().getStartPosition();
       ScanParams scanParams = new ScanParams();
       scanParams.match(c.element());
-
-      String cursor = ScanParams.SCAN_POINTER_START;
-      boolean finished = false;
-      while (!finished) {
-        ScanResult<String> scanResult = jedis.scan(cursor, scanParams);
-        List<String> keys = scanResult.getResult();
-        for (String k : keys) {
-          c.output(k);
+      while (tracker.tryClaim(cursor)) {
+        ScanResult<String> scanResult = jedis.scan(cursor.getCursor(), scanParams);
+        List<String> keys = new ArrayList<>();
+        for (String k : scanResult.getResult()) {
+          keys.add(k);
         }
-        cursor = scanResult.getCursor();
-        if (cursor.equals(ScanParams.SCAN_POINTER_START)) {
-          finished = true;
-        }
-      }
-    }
-  }
-
-  /** A {@link DoFn} requesting Redis server to get key/value pairs. */
-  private static class ReadFn extends BaseReadFn<KV<String, String>> {
-    transient @Nullable Multimap<BoundedWindow, String> bundles = null;
-    @Nullable AtomicInteger batchCount = null;
-    private final int batchSize;
-
-    ReadFn(RedisConnectionConfiguration connectionConfiguration, int batchSize) {
-      super(connectionConfiguration);
-      this.batchSize = batchSize;
-    }
-
-    @StartBundle
-    public void startBundle() {
-      bundles = ArrayListMultimap.create();
-      batchCount = new AtomicInteger();
-    }
-
-    @ProcessElement
-    public void processElement(ProcessContext c, BoundedWindow window) {
-      String key = c.element();
-      bundles.put(window, key);
-      if (batchCount.incrementAndGet() > getBatchSize()) {
-        Multimap<BoundedWindow, KV<String, String>> kvs = fetchAndFlush();
-        for (BoundedWindow w : kvs.keySet()) {
-          for (KV<String, String> kv : kvs.get(w)) {
+        if (keys.size() > 0) {
+          for (KV<String, String> kv : fetchAndFlush(keys)) {
             c.output(kv);
           }
         }
-      }
-    }
-
-    @FinishBundle
-    public void finishBundle(FinishBundleContext context) {
-      Multimap<BoundedWindow, KV<String, String>> kvs = fetchAndFlush();
-      for (BoundedWindow w : kvs.keySet()) {
-        for (KV<String, String> kv : kvs.get(w)) {
-          context.output(kv, w.maxTimestamp(), w);
+        if (RedisCursor.ZERO_CURSOR.getCursor().equals(scanResult.getCursor())) {
+          break;
         }
+        cursor = RedisCursor.of(scanResult.getCursor(), jedis.dbSize());
       }
+      return ProcessContinuation.stop();
     }
 
-    private int getBatchSize() {
-      return batchSize;
-    }
-
-    private Multimap<BoundedWindow, KV<String, String>> fetchAndFlush() {
-      Multimap<BoundedWindow, KV<String, String>> kvs = ArrayListMultimap.create();
-      for (BoundedWindow w : bundles.keySet()) {
-        String[] keys = new String[bundles.get(w).size()];
-        bundles.get(w).toArray(keys);
-        List<String> results = jedis.mget(keys);
-        for (int i = 0; i < results.size(); i++) {
-          if (results.get(i) != null) {
-            kvs.put(w, KV.of(keys[i], results.get(i)));
-          }
+    private List<KV<String, String>> fetchAndFlush(List<String> bundle) {

Review comment:
       Can you move this code into the `@ProcessElement` method so we can output each key and value as it appears instead of creating another list of all the results?
   
   (Alternatively pass in the `ProcessContext` and output directly from within this method)

##########
File path: sdks/java/io/redis/src/main/java/org/apache/beam/sdk/io/redis/RedisCursorRangeTracker.java
##########
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.redis;
+
+import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
+import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull;
+import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState;
+
+import org.apache.beam.sdk.io.range.ByteKey;
+import org.apache.beam.sdk.io.range.ByteKeyRange;
+import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
+import org.apache.beam.sdk.transforms.splittabledofn.SplitResult;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.Bytes;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+@SuppressWarnings({
+  "nullness" // TODO(https://issues.apache.org/jira/browse/BEAM-10402)
+})
+public class RedisCursorRangeTracker extends RestrictionTracker<RedisCursorRange, RedisCursor>
+    implements RestrictionTracker.HasProgress {
+
+  private RedisCursorRange range;
+  private @Nullable ByteKey lastClaimedKey = null;
+  private @Nullable ByteKey lastAttemptedKey = null;
+
+  /* An empty range which contains no keys. */
+  @VisibleForTesting
+  static final RedisCursorRange NO_KEYS =
+      RedisCursorRange.of(RedisCursor.ZERO_CURSOR, RedisCursor.ZERO_CURSOR);

Review comment:
       Isn't `[0, 0)` also equivalent to the all keys range?

##########
File path: sdks/java/io/redis/src/main/java/org/apache/beam/sdk/io/redis/RedisIO.java
##########
@@ -122,6 +126,38 @@ public static Read read() {
         .build();
   }
 
+  public static ByteKey cursorToByteKey(String cursor, long dbSize) throws IOException {
+    long cursorLong = Long.parseLong(cursor);
+    long reversed = shiftBits(cursorLong, dbSize);
+    BigEndianLongCoder coder = BigEndianLongCoder.of();
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    coder.encode(reversed, os);
+    byte[] byteArray = os.toByteArray();
+    return ByteKey.copyFrom(byteArray);
+  }
+
+  public static String byteKeyToString(ByteKey byteKeyStart) {
+    ByteBuffer bb = ByteBuffer.wrap(byteKeyStart.getBytes());
+    if (bb.capacity() < 8) {
+      int rem = 8 - bb.capacity();
+      byte[] padding = new byte[rem];
+      bb = ByteBuffer.allocate(8).put(padding).put(bb.array());
+      bb.position(0);
+    }
+    long l = bb.getLong();
+    return Long.toString(l);
+  }
+
+  public static long shiftBits(long a, long dbSize) {
+    long b = 0;
+    for (long i = 0; i < dbSize; ++i) {
+      b <<= 1;
+      b |= (a & 1);
+      a >>= 1;
+    }
+    return b;
+  }

Review comment:
       Duplicate code?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@beam.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org