You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by ro...@apache.org on 2022/10/13 07:03:04 UTC

[druid] branch master updated: Composite approach for checking in-filter values set in column dictionary (#13133)

This is an automated email from the ASF dual-hosted git repository.

rohangarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 45dfd679e9 Composite approach for checking in-filter values set in column dictionary (#13133)
45dfd679e9 is described below

commit 45dfd679e92e668172c470c37615e7447b601af1
Author: Rohan Garg <77...@users.noreply.github.com>
AuthorDate: Thu Oct 13 12:32:48 2022 +0530

    Composite approach for checking in-filter values set in column dictionary (#13133)
---
 ...tionaryEncodedStringIndexSupplierBenchmark.java | 156 +++++++++++++++
 .../apache/druid/benchmark/InFilterBenchmark.java  |  18 +-
 .../DictionaryEncodedStringIndexSupplier.java      |  89 ++++++++-
 .../DictionaryEncodedStringIndexSupplierTest.java  | 222 +++++++++++++++++++++
 4 files changed, 476 insertions(+), 9 deletions(-)

diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java
new file mode 100644
index 0000000000..acacce00e5
--- /dev/null
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.benchmark;
+
+import com.google.common.collect.FluentIterable;
+import org.apache.druid.collections.bitmap.BitmapFactory;
+import org.apache.druid.collections.bitmap.ImmutableBitmap;
+import org.apache.druid.collections.bitmap.MutableBitmap;
+import org.apache.druid.collections.bitmap.RoaringBitmapFactory;
+import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.segment.column.BitmapColumnIndex;
+import org.apache.druid.segment.column.StringValueSetIndex;
+import org.apache.druid.segment.data.BitmapSerdeFactory;
+import org.apache.druid.segment.data.GenericIndexed;
+import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
+import org.apache.druid.segment.serde.DictionaryEncodedStringIndexSupplier;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+import javax.annotation.Nullable;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+import java.util.TreeSet;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.IntStream;
+
+@State(Scope.Benchmark)
+@Fork(value = 1)
+@Warmup(iterations = 10)
+@Measurement(iterations = 10)
+public class DictionaryEncodedStringIndexSupplierBenchmark
+{
+  static {
+    NullHandling.initializeForTests();
+  }
+
+  @State(Scope.Benchmark)
+  public static class BenchmarkState
+  {
+    @Nullable
+    private DictionaryEncodedStringIndexSupplier.GenericIndexedDictionaryEncodedStringValueSetIndex stringValueSetIndex;
+    private final TreeSet<ByteBuffer> values = new TreeSet<>();
+    private static final int START_INT = 10_000_000;
+
+    // cardinality of the dictionary. it will contain this many ints (as strings, of course), starting at START_INT,
+    // even numbers only.
+    @Param({"1000000"})
+    int dictionarySize;
+
+    @Param({"1", "2", "5", "10", "15", "20", "30", "50", "100"})
+    int filterToDictionaryPercentage;
+
+    @Param({"10", "100"})
+    int selectivityPercentage;
+
+    @Setup(Level.Trial)
+    public void setup()
+    {
+      final BitmapFactory bitmapFactory = new RoaringBitmapFactory();
+      final BitmapSerdeFactory serdeFactory = new RoaringBitmapSerdeFactory(null);
+      final Iterable<Integer> ints = intGenerator();
+      final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(
+          FluentIterable.from(ints)
+                        .transform(Object::toString),
+          GenericIndexed.STRING_STRATEGY
+      );
+      final GenericIndexed<ByteBuffer> dictionaryUtf8 = GenericIndexed.fromIterable(
+          FluentIterable.from(ints)
+                        .transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))),
+          GenericIndexed.BYTE_BUFFER_STRATEGY
+      );
+      final GenericIndexed<ImmutableBitmap> bitmaps = GenericIndexed.fromIterable(
+          () -> IntStream.range(0, dictionarySize)
+                         .mapToObj(
+                             i -> {
+                               final MutableBitmap mutableBitmap = bitmapFactory.makeEmptyMutableBitmap();
+                               mutableBitmap.add(i);
+                               return bitmapFactory.makeImmutableBitmap(mutableBitmap);
+                             }
+                         )
+                         .iterator(),
+          serdeFactory.getObjectStrategy()
+      );
+      DictionaryEncodedStringIndexSupplier dictionaryEncodedStringIndexSupplier =
+          new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, dictionaryUtf8, bitmaps, null);
+      stringValueSetIndex =
+          (DictionaryEncodedStringIndexSupplier.GenericIndexedDictionaryEncodedStringValueSetIndex)
+              dictionaryEncodedStringIndexSupplier.as(StringValueSetIndex.class);
+      List<Integer> filterValues = new ArrayList<>();
+      List<Integer> nonFilterValues = new ArrayList<>();
+      for (int i = 0; i < dictionarySize; i++) {
+        filterValues.add(START_INT + i * 2);
+        nonFilterValues.add(START_INT + i * 2 + 1);
+      }
+      Random r = new Random(9001);
+      Collections.shuffle(filterValues);
+      Collections.shuffle(nonFilterValues);
+      values.clear();
+      for (int i = 0; i < filterToDictionaryPercentage * dictionarySize / 100; i++) {
+        if (r.nextInt(100) < selectivityPercentage) {
+          values.add(ByteBuffer.wrap((filterValues.get(i).toString()).getBytes(StandardCharsets.UTF_8)));
+        } else {
+          values.add(ByteBuffer.wrap((nonFilterValues.get(i).toString()).getBytes(StandardCharsets.UTF_8)));
+        }
+      }
+    }
+
+    private Iterable<Integer> intGenerator()
+    {
+      // i * 2 => half of these values will be present in the inFilter, half won't.
+      return () -> IntStream.range(0, dictionarySize).map(i -> START_INT + i * 2).boxed().iterator();
+    }
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void doValueSetCheck(Blackhole blackhole, BenchmarkState state)
+  {
+    BitmapColumnIndex bitmapIndex = state.stringValueSetIndex.forSortedValuesUtf8(state.values);
+    bitmapIndex.estimateSelectivity(10_000_000);
+  }
+}
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java
index 9556be4cb9..09c3253af0 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java
@@ -64,6 +64,7 @@ public class InFilterBenchmark
   private static final int START_INT = 10_000_000;
 
   private InDimFilter inFilter;
+  private InDimFilter endInDimFilter;
 
   // cardinality of the dictionary. it will contain this many ints (as strings, of course), starting at START_INT,
   // even numbers only.
@@ -71,7 +72,7 @@ public class InFilterBenchmark
   int dictionarySize;
 
   // cardinality of the "in" filter. half of its values will be in the dictionary, half will not.
-  @Param({"10000"})
+  @Param({"1", "10", "100", "1000", "10000"})
   int filterSize;
 
   // selector will contain a "dictionarySize" number of bitmaps; each one contains a single int.
@@ -114,6 +115,12 @@ public class InFilterBenchmark
         "dummy",
         IntStream.range(START_INT, START_INT + filterSize).mapToObj(String::valueOf).collect(Collectors.toSet())
     );
+    endInDimFilter = new InDimFilter(
+        "dummy",
+        IntStream.range(START_INT + dictionarySize * 2, START_INT + dictionarySize * 2 + 1)
+                 .mapToObj(String::valueOf)
+                 .collect(Collectors.toSet())
+    );
   }
 
   @Benchmark
@@ -125,6 +132,15 @@ public class InFilterBenchmark
     blackhole.consume(bitmapIndex);
   }
 
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void doFilterAtEnd(Blackhole blackhole)
+  {
+    final ImmutableBitmap bitmapIndex = Filters.computeDefaultBitmapResults(endInDimFilter, selector);
+    blackhole.consume(bitmapIndex);
+  }
+
   private Iterable<Integer> intGenerator()
   {
     // i * 2 => half of these values will be present in the inFilter, half won't.
diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java
index b0dc0072cf..5c1d2bc08b 100644
--- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java
+++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java
@@ -21,6 +21,8 @@ package org.apache.druid.segment.serde;
 
 import com.google.common.base.Predicate;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.PeekingIterator;
 import it.unimi.dsi.fastutil.ints.IntIntImmutablePair;
 import it.unimi.dsi.fastutil.ints.IntIntPair;
 import it.unimi.dsi.fastutil.ints.IntIterator;
@@ -175,6 +177,13 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
       extends BaseGenericIndexedDictionaryEncodedIndex<ByteBuffer> implements StringValueSetIndex, Utf8ValueSetIndex
   {
     private static final int SIZE_WORTH_CHECKING_MIN = 8;
+    // This determines the cut-off point to swtich the merging algorithm from doing binary-search per element in the value
+    // set to doing a sorted merge algorithm between value set and dictionary. The ratio here represents the ratio b/w
+    // the number of elements in value set and the number of elements in the dictionary. The number has been derived
+    // using benchmark in https://github.com/apache/druid/pull/13133. If the ratio is higher than the threshold, we use
+    // sorted merge instead of binary-search based algorithm.
+    private static final double SORTED_MERGE_RATIO_THRESHOLD = 0.12D;
+    private final GenericIndexed<ByteBuffer> genericIndexedDictionary;
 
     public GenericIndexedDictionaryEncodedStringValueSetIndex(
         BitmapFactory bitmapFactory,
@@ -183,6 +192,7 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
     )
     {
       super(bitmapFactory, dictionary, bitmaps);
+      this.genericIndexedDictionary = dictionary;
     }
 
     @Override
@@ -219,7 +229,8 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
           Iterables.transform(
               values,
               input -> ByteBuffer.wrap(StringUtils.toUtf8(input))
-          )
+          ),
+          values.size()
       );
     }
 
@@ -235,24 +246,86 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
         tailSet = valuesUtf8;
       }
 
-      return getBitmapColumnIndexForSortedIterableUtf8(tailSet);
+      return getBitmapColumnIndexForSortedIterableUtf8(tailSet, tailSet.size());
     }
 
     /**
      * Helper used by {@link #forSortedValues} and {@link #forSortedValuesUtf8}.
      */
-    private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8(Iterable<ByteBuffer> valuesUtf8)
+    private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8(Iterable<ByteBuffer> valuesUtf8, int size)
     {
+      // for large number of in-filter values in comparison to the dictionary size, use the sorted merge algorithm.
+      if (size > SORTED_MERGE_RATIO_THRESHOLD * dictionary.size()) {
+        return new SimpleImmutableBitmapIterableIndex()
+        {
+          @Override
+          public Iterable<ImmutableBitmap> getBitmapIterable()
+          {
+            return () -> new Iterator<ImmutableBitmap>()
+            {
+              final PeekingIterator<ByteBuffer> valuesIterator = Iterators.peekingIterator(valuesUtf8.iterator());
+              final PeekingIterator<ByteBuffer> dictionaryIterator =
+                  Iterators.peekingIterator(genericIndexedDictionary.iterator());
+              int next = -1;
+              int idx = 0;
+
+              @Override
+              public boolean hasNext()
+              {
+                if (next < 0) {
+                  findNext();
+                }
+                return next >= 0;
+              }
+
+              @Override
+              public ImmutableBitmap next()
+              {
+                if (next < 0) {
+                  findNext();
+                  if (next < 0) {
+                    throw new NoSuchElementException();
+                  }
+                }
+                final int swap = next;
+                next = -1;
+                return getBitmap(swap);
+              }
+
+              private void findNext()
+              {
+                while (next < 0 && valuesIterator.hasNext() && dictionaryIterator.hasNext()) {
+                  ByteBuffer nextValue = valuesIterator.peek();
+                  ByteBuffer nextDictionaryKey = dictionaryIterator.peek();
+                  int comparison = GenericIndexed.BYTE_BUFFER_STRATEGY.compare(nextValue, nextDictionaryKey);
+                  if (comparison == 0) {
+                    next = idx;
+                    valuesIterator.next();
+                    break;
+                  } else if (comparison < 0) {
+                    valuesIterator.next();
+                  } else {
+                    dictionaryIterator.next();
+                    idx++;
+                  }
+                }
+              }
+            };
+          }
+        };
+      }
+
+      // if the size of in-filter values is less than the threshold percentage of dictionary size, then use binary search
+      // based lookup per value. The algorithm works well for smaller number of values.
       return new SimpleImmutableBitmapIterableIndex()
       {
         @Override
         public Iterable<ImmutableBitmap> getBitmapIterable()
         {
-          final int dictionarySize = dictionary.size();
-
           return () -> new Iterator<ImmutableBitmap>()
           {
-            final Iterator<ByteBuffer> iterator = valuesUtf8.iterator();
+            final PeekingIterator<ByteBuffer> valuesIterator = Iterators.peekingIterator(valuesUtf8.iterator());
+            final int dictionarySize = dictionary.size();
             int next = -1;
 
             @Override
@@ -280,8 +353,8 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
 
             private void findNext()
             {
-              while (next < 0 && iterator.hasNext()) {
-                ByteBuffer nextValue = iterator.next();
+              while (next < 0 && valuesIterator.hasNext()) {
+                ByteBuffer nextValue = valuesIterator.next();
                 next = dictionary.indexOf(nextValue);
 
                 if (next == -dictionarySize - 1) {
diff --git a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java
new file mode 100644
index 0000000000..36d5ba76a0
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.serde;
+
+import com.google.common.collect.ImmutableSet;
+import org.apache.druid.collections.bitmap.ImmutableBitmap;
+import org.apache.druid.collections.bitmap.MutableBitmap;
+import org.apache.druid.query.BitmapResultFactory;
+import org.apache.druid.query.DefaultBitmapResultFactory;
+import org.apache.druid.segment.column.BitmapColumnIndex;
+import org.apache.druid.segment.column.StringValueSetIndex;
+import org.apache.druid.segment.data.BitmapSerdeFactory;
+import org.apache.druid.segment.data.GenericIndexed;
+import org.apache.druid.segment.data.GenericIndexedWriter;
+import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
+import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium;
+import org.apache.druid.testing.InitializedNullHandlingTest;
+import org.junit.Assert;
+import org.junit.Test;
+import org.roaringbitmap.IntIterator;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.nio.charset.StandardCharsets;
+import java.util.TreeSet;
+
+public class DictionaryEncodedStringIndexSupplierTest extends InitializedNullHandlingTest
+{
+  BitmapSerdeFactory roaringFactory = new RoaringBitmapSerdeFactory(null);
+  BitmapResultFactory<ImmutableBitmap> bitmapResultFactory = new DefaultBitmapResultFactory(
+      roaringFactory.getBitmapFactory()
+  );
+
+  @Test
+  public void testStringColumnWithNullValueSetIndex() throws IOException
+  {
+    DictionaryEncodedStringIndexSupplier indexSupplier = makeStringWithNullsSupplier();
+    StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class);
+    Assert.assertNotNull(valueSetIndex);
+
+    // 10 rows
+    // dictionary: [null, b, foo, fooo, z]
+    // column: [foo, null, fooo, b, z, fooo, z, null, null, foo]
+
+    BitmapColumnIndex columnIndex = valueSetIndex.forValue("b");
+    Assert.assertNotNull(columnIndex);
+    Assert.assertEquals(0.1, columnIndex.estimateSelectivity(10), 0.0);
+    ImmutableBitmap bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap, 3);
+
+    // non-existent in local column
+    columnIndex = valueSetIndex.forValue("fo");
+    Assert.assertNotNull(columnIndex);
+    Assert.assertEquals(0.0, columnIndex.estimateSelectivity(10), 0.0);
+    bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap);
+
+    // set index
+    columnIndex = valueSetIndex.forSortedValues(new TreeSet<>(ImmutableSet.of("b", "fooo", "z")));
+    Assert.assertNotNull(columnIndex);
+    Assert.assertEquals(0.5, columnIndex.estimateSelectivity(10), 0.0);
+    bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap, 2, 3, 4, 5, 6);
+
+    // set index with single value in middle
+    columnIndex = valueSetIndex.forSortedValues(new TreeSet<>(ImmutableSet.of("foo")));
+    Assert.assertNotNull(columnIndex);
+    Assert.assertEquals(0.2, columnIndex.estimateSelectivity(10), 0.0);
+    bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap, 0, 9);
+
+    // set index with no element in column and all elements less than lowest non-null value
+    columnIndex = valueSetIndex.forSortedValues(new TreeSet<>(ImmutableSet.of("a", "aa", "aaa")));
+    Assert.assertNotNull(columnIndex);
+    Assert.assertEquals(0.0, columnIndex.estimateSelectivity(10), 0.0);
+    bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap);
+
+    // set index with no element in column and all elements greater than highest non-null value
+    columnIndex = valueSetIndex.forSortedValues(new TreeSet<>(ImmutableSet.of("zz", "zzz", "zzzz")));
+    Assert.assertNotNull(columnIndex);
+    Assert.assertEquals(0.0, columnIndex.estimateSelectivity(10), 0.0);
+    bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
+    checkBitmap(bitmap);
+  }
+
+  private DictionaryEncodedStringIndexSupplier makeStringWithNullsSupplier() throws IOException
+  {
+    ByteBuffer stringBuffer = ByteBuffer.allocate(1 << 12);
+    ByteBuffer byteBuffer = ByteBuffer.allocate(1 << 12);
+
+    GenericIndexedWriter<String> stringWriter = new GenericIndexedWriter<>(
+        new OnHeapMemorySegmentWriteOutMedium(),
+        "strings",
+        GenericIndexed.STRING_STRATEGY
+    );
+    GenericIndexedWriter<ByteBuffer> byteBufferWriter = new GenericIndexedWriter<>(
+        new OnHeapMemorySegmentWriteOutMedium(),
+        "byteBuffers",
+        GenericIndexed.BYTE_BUFFER_STRATEGY
+    );
+
+    stringWriter.open();
+    byteBufferWriter.open();
+
+    ByteBuffer bitmapsBuffer = ByteBuffer.allocate(1 << 12);
+
+    GenericIndexedWriter<ImmutableBitmap> bitmapWriter = new GenericIndexedWriter<>(
+        new OnHeapMemorySegmentWriteOutMedium(),
+        "bitmaps",
+        roaringFactory.getObjectStrategy()
+    );
+    bitmapWriter.setObjectsNotSorted();
+    bitmapWriter.open();
+    // 10 rows
+    // dictionary: [null, b, fo, foo, fooo, z]
+    // column: [foo, null, fooo, b, z, fooo, z, null, null, foo]
+
+    // null
+    stringWriter.write(null);
+    byteBufferWriter.write(null);
+    bitmapWriter.write(fillBitmap(1, 7, 8));
+
+    // b
+    stringWriter.write("b");
+    byteBufferWriter.write(ByteBuffer.wrap("b".getBytes(StandardCharsets.UTF_8)));
+    bitmapWriter.write(fillBitmap(3));
+
+    // fo
+    stringWriter.write("foo");
+    byteBufferWriter.write(ByteBuffer.wrap("foo".getBytes(StandardCharsets.UTF_8)));
+    bitmapWriter.write(fillBitmap(0, 9));
+
+    // fooo
+    stringWriter.write("fooo");
+    byteBufferWriter.write(ByteBuffer.wrap("fooo".getBytes(StandardCharsets.UTF_8)));
+    bitmapWriter.write(fillBitmap(2, 5));
+
+    // z
+    stringWriter.write("z");
+    byteBufferWriter.write(ByteBuffer.wrap("z".getBytes(StandardCharsets.UTF_8)));
+    bitmapWriter.write(fillBitmap(4, 6));
+
+    writeToBuffer(stringBuffer, stringWriter);
+    writeToBuffer(byteBuffer, stringWriter);
+    writeToBuffer(bitmapsBuffer, bitmapWriter);
+
+    GenericIndexed<ImmutableBitmap> bitmaps = GenericIndexed.read(bitmapsBuffer, roaringFactory.getObjectStrategy());
+    return new DictionaryEncodedStringIndexSupplier(
+        roaringFactory.getBitmapFactory(),
+        GenericIndexed.read(stringBuffer, GenericIndexed.STRING_STRATEGY),
+        GenericIndexed.read(byteBuffer, GenericIndexed.BYTE_BUFFER_STRATEGY),
+        bitmaps,
+        null
+    );
+  }
+
+  static void writeToBuffer(ByteBuffer buffer, Serializer serializer) throws IOException
+  {
+    WritableByteChannel channel = new WritableByteChannel()
+    {
+      @Override
+      public int write(ByteBuffer src)
+      {
+        int size = src.remaining();
+        buffer.put(src);
+        return size;
+      }
+
+      @Override
+      public boolean isOpen()
+      {
+        return true;
+      }
+
+      @Override
+      public void close()
+      {
+      }
+    };
+
+    serializer.writeTo(channel, null);
+    buffer.position(0);
+  }
+
+  private ImmutableBitmap fillBitmap(int... rows)
+  {
+    MutableBitmap bitmap = roaringFactory.getBitmapFactory().makeEmptyMutableBitmap();
+    for (int i : rows) {
+      bitmap.add(i);
+    }
+    return roaringFactory.getBitmapFactory().makeImmutableBitmap(bitmap);
+  }
+
+  private void checkBitmap(ImmutableBitmap bitmap, int... expectedRows)
+  {
+    IntIterator iterator = bitmap.iterator();
+    for (int i : expectedRows) {
+      Assert.assertTrue(iterator.hasNext());
+      Assert.assertEquals(i, iterator.next());
+    }
+    Assert.assertFalse(iterator.hasNext());
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org