You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by jo...@apache.org on 2020/04/25 20:47:26 UTC

[druid] branch master updated: Adjust string comparators used for ingestion (#9742)

This is an automated email from the ASF dual-hosted git repository.

jonwei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new fe000a9  Adjust string comparators used for ingestion (#9742)
fe000a9 is described below

commit fe000a9e4bf1d2681760f72e9c593bd2aa5a1416
Author: Jonathan Wei <jo...@users.noreply.github.com>
AuthorDate: Sat Apr 25 13:47:07 2020 -0700

    Adjust string comparators used for ingestion (#9742)
    
    * Adjust string comparators used for ingestion
    
    * Small tweak
    
    * Fix inspection, more javadocs
    
    * Address PR comment
    
    * Add rollup comment
    
    * Add ordering test
    
    * Fix IncrementaIndexRowCompTest
---
 .../org/apache/druid/segment/DimensionHandler.java |   4 +
 .../org/apache/druid/segment/DimensionIndexer.java |   4 +
 .../druid/segment/StringDimensionHandler.java      |  71 ++--
 .../druid/segment/StringDimensionIndexer.java      |  30 +-
 .../druid/segment/IndexMergerNullHandlingTest.java |  28 +-
 .../apache/druid/segment/IndexMergerTestBase.java  | 432 ++++++++++++++++++++-
 .../incremental/IncrementalIndexRowCompTest.java   |   4 +-
 7 files changed, 519 insertions(+), 54 deletions(-)

diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandler.java
index e1ea334..56bfed2 100644
--- a/processing/src/main/java/org/apache/druid/segment/DimensionHandler.java
+++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandler.java
@@ -122,6 +122,10 @@ public interface DimensionHandler
    * Returns a comparator that knows how to compare {@link ColumnValueSelector} of the assumed dimension type,
    * corresponding to this DimensionHandler. E. g. {@link StringDimensionHandler} returns a comparator, that compares
    * {@link ColumnValueSelector}s as {@link DimensionSelector}s.
+   *
+   * The comparison rules used by this method should match the rules used by
+   * {@link DimensionIndexer#compareUnsortedEncodedKeyComponents}, otherwise incorrect ordering/merging of rows
+   * can occur during ingestion, causing issues such as imperfect rollup.
    */
   Comparator<ColumnValueSelector> getEncodedValueSelectorComparator();
 
diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
index 06c27f7..99921eb 100644
--- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
@@ -241,6 +241,10 @@ public interface DimensionIndexer
    *
    * Refer to StringDimensionIndexer.compareUnsortedEncodedKeyComponents() for a reference implementation.
    *
+   * The comparison rules used by this method should match the rules used by
+   * {@link DimensionHandler#getEncodedValueSelectorComparator()}, otherwise incorrect ordering/merging of rows
+   * can occur during ingestion, causing issues such as imperfect rollup.
+   *
    * @param lhs dimension value array from a Row key
    * @param rhs dimension value array from a Row key
    * @return comparison of the two arrays
diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
index c14bd31..65ff7f3 100644
--- a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
+++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
@@ -33,61 +33,46 @@ import java.util.Comparator;
 
 public class StringDimensionHandler implements DimensionHandler<Integer, int[], String>
 {
-
   /**
-   * Compares {@link IndexedInts} lexicographically, with the exception that if a row contains only zeros (that's the
-   * index of null) at all positions, it is considered "null" as a whole and is "less" than any "non-null" row. Empty
-   * row (size is zero) is also considered "null".
-   *
-   * The implementation is a bit complicated because it tries to check each position of both rows only once.
+   * This comparator uses the following rules:
+   * - Compare the two value arrays up to the length of the shorter array
+   * - If the two arrays match so far, then compare the array lengths, the shorter array is considered smaller
+   * - Comparing null and the empty list is a special case: these are considered equal
    */
   private static final Comparator<ColumnValueSelector> DIMENSION_SELECTOR_COMPARATOR = (s1, s2) -> {
     IndexedInts row1 = getRow(s1);
     IndexedInts row2 = getRow(s2);
     int len1 = row1.size();
     int len2 = row2.size();
-    boolean row1IsNull = true;
-    boolean row2IsNull = true;
-    for (int i = 0; i < Math.min(len1, len2); i++) {
-      int v1 = row1.get(i);
-      row1IsNull &= v1 == 0;
-      int v2 = row2.get(i);
-      row2IsNull &= v2 == 0;
-      int valueDiff = Integer.compare(v1, v2);
-      if (valueDiff != 0) {
-        return valueDiff;
-      }
-    }
-    //noinspection SubtractionInCompareTo -- substraction is safe here, because lengths or rows are small numbers.
-    int lenDiff = len1 - len2;
-    if (lenDiff == 0) {
-      return 0;
-    } else {
-      if (!row1IsNull || !row2IsNull) {
-        return lenDiff;
-      } else {
-        return compareRestNulls(row1, len1, row2, len2);
+    int lenCompareResult = Integer.compare(len1, len2);
+    int valsIndex = 0;
+
+    if (lenCompareResult != 0) {
+      // if the values don't have the same length, check if we're comparing [] and [null], which are equivalent
+      if (len1 + len2 == 1) {
+        IndexedInts longerRow = len2 > len1 ? row2 : row1;
+        if (longerRow.get(0) == 0) {
+          return 0;
+        } else {
+          //noinspection ObjectEquality -- longerRow is explicitly set to only row1 or row2
+          return longerRow == row1 ? 1 : -1;
+        }
       }
     }
-  };
 
-  private static int compareRestNulls(IndexedInts row1, int len1, IndexedInts row2, int len2)
-  {
-    if (len1 < len2) {
-      for (int i = len1; i < len2; i++) {
-        if (row2.get(i) != 0) {
-          return -1;
-        }
-      }
-    } else {
-      for (int i = len2; i < len1; i++) {
-        if (row1.get(i) != 0) {
-          return 1;
-        }
+    int lenToCompare = Math.min(len1, len2);
+    while (valsIndex < lenToCompare) {
+      int v1 = row1.get(valsIndex);
+      int v2 = row2.get(valsIndex);
+      int valueCompareResult = Integer.compare(v1, v2);
+      if (valueCompareResult != 0) {
+        return valueCompareResult;
       }
+      ++valsIndex;
     }
-    return 0;
-  }
+
+    return lenCompareResult;
+  };
 
   /**
    * Value for absent column, i. e. {@link NilColumnValueSelector}, should be equivalent to [null] during index merging.
diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index f08a495..ada146d 100644
--- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -399,23 +399,42 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
     int lhsLen = lhs.length;
     int rhsLen = rhs.length;
 
-    int retVal = Ints.compare(lhsLen, rhsLen);
+    int lenCompareResult = Ints.compare(lhsLen, rhsLen);
+    if (lenCompareResult != 0) {
+      // if the values don't have the same length, check if we're comparing [] and [null], which are equivalent
+      if (lhsLen + rhsLen == 1) {
+        int[] longerVal = rhsLen > lhsLen ? rhs : lhs;
+        if (longerVal[0] == dimLookup.idForNull) {
+          return 0;
+        } else {
+          //noinspection ArrayEquality -- longerVal is explicitly set to only lhs or rhs
+          return longerVal == lhs ? 1 : -1;
+        }
+      }
+    }
+
     int valsIndex = 0;
-    while (retVal == 0 && valsIndex < lhsLen) {
+    int lenToCompare = Math.min(lhsLen, rhsLen);
+    while (valsIndex < lenToCompare) {
       int lhsVal = lhs[valsIndex];
       int rhsVal = rhs[valsIndex];
       if (lhsVal != rhsVal) {
         final String lhsValActual = getActualValue(lhsVal, false);
         final String rhsValActual = getActualValue(rhsVal, false);
+        int valueCompareResult = 0;
         if (lhsValActual != null && rhsValActual != null) {
-          retVal = lhsValActual.compareTo(rhsValActual);
+          valueCompareResult = lhsValActual.compareTo(rhsValActual);
         } else if (lhsValActual == null ^ rhsValActual == null) {
-          retVal = lhsValActual == null ? -1 : 1;
+          valueCompareResult = lhsValActual == null ? -1 : 1;
+        }
+        if (valueCompareResult != 0) {
+          return valueCompareResult;
         }
       }
       ++valsIndex;
     }
-    return retVal;
+
+    return lenCompareResult;
   }
 
   @Override
@@ -796,6 +815,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
     return sortedLookup == null ? sortedLookup = dimLookup.sort() : sortedLookup;
   }
 
+  @Nullable
   private String getActualValue(int intermediateValue, boolean idSorted)
   {
     if (idSorted) {
diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java
index f2f1cf0..a9b63b4 100644
--- a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java
@@ -33,6 +33,7 @@ import org.apache.druid.segment.column.BitmapIndex;
 import org.apache.druid.segment.column.ColumnHolder;
 import org.apache.druid.segment.column.DictionaryEncodedColumn;
 import org.apache.druid.segment.data.IncrementalIndexTest;
+import org.apache.druid.segment.data.IndexedInts;
 import org.apache.druid.segment.incremental.IncrementalIndex;
 import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
 import org.junit.Assert;
@@ -55,6 +56,10 @@ import java.util.stream.IntStream;
 
 public class IndexMergerNullHandlingTest
 {
+  static {
+    NullHandling.initializeForTests();
+  }
+
   private IndexMerger indexMerger;
   private IndexIO indexIO;
   private IndexSpec indexSpec;
@@ -221,8 +226,9 @@ public class IndexMergerNullHandlingTest
       retVal.add(NullHandling.emptyToNullIfNeeded(((String) value)));
     } else if (value instanceof List) {
       final List<String> list = (List<String>) value;
-      if (list.isEmpty() && !hasMultipleValues) {
+      if (list.isEmpty()) {
         // empty lists become nulls in single valued columns
+        // they sometimes also become nulls in multi-valued columns (see comments in getRow())
         retVal.add(NullHandling.emptyToNullIfNeeded(null));
       } else {
         retVal.addAll(list.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toList()));
@@ -242,7 +248,25 @@ public class IndexMergerNullHandlingTest
     final List<String> retVal = new ArrayList<>();
 
     if (column.hasMultipleValues()) {
-      column.getMultiValueRow(rowNumber).forEach(i -> retVal.add(column.lookupName(i)));
+      IndexedInts rowVals = column.getMultiValueRow(rowNumber);
+      if (rowVals.size() == 0) {
+        // This is a sort of test hack:
+        // - If we ingest the subset [{d=[]}, {d=[a, b]}], we get an IndexedInts with 0 size for the nully row,
+        //   representing the empty list
+        // - If we ingest the subset [{}, {d=[]}, {d=[a, b]}], we instead get an IndexedInts with 1 size,
+        //   representing a row with a single null value
+        // This occurs because the dimension value comparator used during ingestion considers null and the empty list
+        // to be the same.
+        // - In the first subset, we only see the empty list and a non-empty list, so the key used in the
+        //   incremental index fact table for the nully row is the empty list.
+        // - In the second subset, the fact table initially gets an entry for d=null. When the row with the
+        //   empty list value is added, it is treated as identical to the first d=null row, so it gets rolled up.
+        //   The resulting persisted segment will have [null] instead of [] because of this rollup.
+        // To simplify this test class, we always normalize the empty list into null here.
+        retVal.add(null);
+      } else {
+        rowVals.forEach(i -> retVal.add(column.lookupName(i)));
+      }
     } else {
       retVal.add(column.lookupName(column.getSingleValueRow(rowNumber)));
     }
diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java
index 1783f43..b253614 100644
--- a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java
+++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java
@@ -78,6 +78,7 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.EnumSet;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -2076,11 +2077,11 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest
 
     Assert.assertEquals(2, rowList.size());
     Assert.assertEquals(
-        Arrays.asList(Arrays.asList("a", "b", "x"), Arrays.asList("a", "b", "x")),
+        Arrays.asList(Arrays.asList("a", "a", "b", "x"), Arrays.asList("a", "b", "x", "x")),
         rowList.get(0).dimensionValues()
     );
     Assert.assertEquals(
-        Arrays.asList(Arrays.asList("a", "a", "b", "x"), Arrays.asList("a", "b", "x", "x")),
+        Arrays.asList(Arrays.asList("a", "b", "x"), Arrays.asList("a", "b", "x")),
         rowList.get(1).dimensionValues()
     );
 
@@ -2210,6 +2211,433 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest
     );
   }
 
+  @Test
+  public void testMultivalDim_mergeAcrossSegments_rollupWorks() throws Exception
+  {
+    List<String> dims = Arrays.asList(
+        "dimA",
+        "dimMultiVal"
+    );
+
+    IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
+        .withDimensionsSpec(
+            new DimensionsSpec(
+                ImmutableList.of(
+                    new StringDimensionSchema("dimA", MultiValueHandling.SORTED_ARRAY, true),
+                    new StringDimensionSchema("dimMultiVal", MultiValueHandling.SORTED_ARRAY, true)
+                )
+            )
+        )
+        .withMetrics(
+            new LongSumAggregatorFactory("sumCount", "sumCount")
+        )
+        .withRollup(true)
+        .build();
+
+    IncrementalIndex toPersistA = new IncrementalIndex.Builder()
+        .setIndexSchema(indexSchema)
+        .setMaxRowCount(1000)
+        .buildOnheap();
+
+    Map<String, Object> event1 = new HashMap<>();
+    event1.put("dimA", "leek");
+    event1.put("dimMultiVal", ImmutableList.of("1", "2", "4"));
+    event1.put("sumCount", 1L);
+
+    Map<String, Object> event2 = new HashMap<>();
+    event2.put("dimA", "leek");
+    event2.put("dimMultiVal", ImmutableList.of("1", "2", "3", "5"));
+    event2.put("sumCount", 1L);
+
+    toPersistA.add(new MapBasedInputRow(1, dims, event1));
+    toPersistA.add(new MapBasedInputRow(1, dims, event2));
+
+    IncrementalIndex toPersistB = new IncrementalIndex.Builder()
+        .setIndexSchema(indexSchema)
+        .setMaxRowCount(1000)
+        .buildOnheap();
+
+    Map<String, Object> event3 = new HashMap<>();
+    event3.put("dimA", "leek");
+    event3.put("dimMultiVal", ImmutableList.of("1", "2", "4"));
+    event3.put("sumCount", 1L);
+
+    Map<String, Object> event4 = new HashMap<>();
+    event4.put("dimA", "potato");
+    event4.put("dimMultiVal", ImmutableList.of("0", "1", "4"));
+    event4.put("sumCount", 1L);
+
+    toPersistB.add(new MapBasedInputRow(1, dims, event3));
+    toPersistB.add(new MapBasedInputRow(1, dims, event4));
+
+    final File tmpDirA = temporaryFolder.newFolder();
+    final File tmpDirB = temporaryFolder.newFolder();
+    final File tmpDirMerged = temporaryFolder.newFolder();
+
+    QueryableIndex indexA = closer.closeLater(
+        indexIO.loadIndex(indexMerger.persist(toPersistA, tmpDirA, indexSpec, null))
+    );
+
+    QueryableIndex indexB = closer.closeLater(
+        indexIO.loadIndex(indexMerger.persist(toPersistB, tmpDirB, indexSpec, null))
+    );
+
+    final QueryableIndex merged = closer.closeLater(
+        indexIO.loadIndex(
+            indexMerger.mergeQueryableIndex(
+                Arrays.asList(indexA, indexB),
+                true,
+                new AggregatorFactory[]{
+                    new LongSumAggregatorFactory("sumCount", "sumCount")
+                },
+                tmpDirMerged,
+                indexSpec,
+                null
+            )
+        )
+    );
+
+    final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
+    final List<DebugRow> rowList = RowIteratorHelper.toList(adapter.getRows());
+
+    Assert.assertEquals(
+        ImmutableList.of("dimA", "dimMultiVal"),
+        ImmutableList.copyOf(adapter.getDimensionNames())
+    );
+
+    Assert.assertEquals(3, rowList.size());
+    Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3", "5")), rowList.get(0).dimensionValues());
+    Assert.assertEquals(1L, rowList.get(0).metricValues().get(0));
+    Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "4")), rowList.get(1).dimensionValues());
+    Assert.assertEquals(2L, rowList.get(1).metricValues().get(0));
+    Assert.assertEquals(Arrays.asList("potato", Arrays.asList("0", "1", "4")), rowList.get(2).dimensionValues());
+    Assert.assertEquals(1L, rowList.get(2).metricValues().get(0));
+
+    checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimA", "leek"));
+    checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimA", "potato"));
+
+    checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimMultiVal", "0"));
+    checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", "1"));
+    checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimMultiVal", "2"));
+    checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "3"));
+    checkBitmapIndex(Arrays.asList(1, 2), adapter.getBitmapIndex("dimMultiVal", "4"));
+    checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "5"));
+  }
+
+
+  @Test
+  public void testMultivalDim_persistAndMerge_dimensionValueOrderingRules() throws Exception
+  {
+    List<String> dims = Arrays.asList(
+        "dimA",
+        "dimMultiVal"
+    );
+
+    IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
+        .withDimensionsSpec(
+            new DimensionsSpec(
+                ImmutableList.of(
+                    new StringDimensionSchema("dimA", MultiValueHandling.SORTED_ARRAY, true),
+                    new StringDimensionSchema("dimMultiVal", MultiValueHandling.SORTED_ARRAY, true)
+                )
+            )
+        )
+        .withMetrics(
+            new LongSumAggregatorFactory("sumCount", "sumCount")
+        )
+        .withRollup(true)
+        .build();
+
+    Map<String, Object> nullEvent = new HashMap<>();
+    nullEvent.put("dimA", "leek");
+    nullEvent.put("sumCount", 1L);
+
+    Map<String, Object> nullEvent2 = new HashMap<>();
+    nullEvent2.put("dimA", "leek");
+    nullEvent2.put("dimMultiVal", null);
+    nullEvent2.put("sumCount", 1L);
+
+    Map<String, Object> emptyListEvent = new HashMap<>();
+    emptyListEvent.put("dimA", "leek");
+    emptyListEvent.put("dimMultiVal", ImmutableList.of());
+    emptyListEvent.put("sumCount", 1L);
+
+    List<String> listWithNull = new ArrayList<>();
+    listWithNull.add(null);
+    Map<String, Object> listWithNullEvent = new HashMap<>();
+    listWithNullEvent.put("dimA", "leek");
+    listWithNullEvent.put("dimMultiVal", listWithNull);
+    listWithNullEvent.put("sumCount", 1L);
+
+    Map<String, Object> emptyStringEvent = new HashMap<>();
+    emptyStringEvent.put("dimA", "leek");
+    emptyStringEvent.put("dimMultiVal", "");
+    emptyStringEvent.put("sumCount", 1L);
+
+    Map<String, Object> listWithEmptyStringEvent = new HashMap<>();
+    listWithEmptyStringEvent.put("dimA", "leek");
+    listWithEmptyStringEvent.put("dimMultiVal", ImmutableList.of(""));
+    listWithEmptyStringEvent.put("sumCount", 1L);
+
+    Map<String, Object> singleValEvent = new HashMap<>();
+    singleValEvent.put("dimA", "leek");
+    singleValEvent.put("dimMultiVal", "1");
+    singleValEvent.put("sumCount", 1L);
+
+    Map<String, Object> singleValEvent2 = new HashMap<>();
+    singleValEvent2.put("dimA", "leek");
+    singleValEvent2.put("dimMultiVal", "2");
+    singleValEvent2.put("sumCount", 1L);
+
+    Map<String, Object> singleValEvent3 = new HashMap<>();
+    singleValEvent3.put("dimA", "potato");
+    singleValEvent3.put("dimMultiVal", "2");
+    singleValEvent3.put("sumCount", 1L);
+
+    Map<String, Object> listWithSingleValEvent = new HashMap<>();
+    listWithSingleValEvent.put("dimA", "leek");
+    listWithSingleValEvent.put("dimMultiVal", ImmutableList.of("1"));
+    listWithSingleValEvent.put("sumCount", 1L);
+
+    Map<String, Object> listWithSingleValEvent2 = new HashMap<>();
+    listWithSingleValEvent2.put("dimA", "leek");
+    listWithSingleValEvent2.put("dimMultiVal", ImmutableList.of("2"));
+    listWithSingleValEvent2.put("sumCount", 1L);
+
+    Map<String, Object> listWithSingleValEvent3 = new HashMap<>();
+    listWithSingleValEvent3.put("dimA", "potato");
+    listWithSingleValEvent3.put("dimMultiVal", ImmutableList.of("2"));
+    listWithSingleValEvent3.put("sumCount", 1L);
+
+    Map<String, Object> multivalEvent = new HashMap<>();
+    multivalEvent.put("dimA", "leek");
+    multivalEvent.put("dimMultiVal", ImmutableList.of("1", "3"));
+    multivalEvent.put("sumCount", 1L);
+
+    Map<String, Object> multivalEvent2 = new HashMap<>();
+    multivalEvent2.put("dimA", "leek");
+    multivalEvent2.put("dimMultiVal", ImmutableList.of("1", "4"));
+    multivalEvent2.put("sumCount", 1L);
+
+    Map<String, Object> multivalEvent3 = new HashMap<>();
+    multivalEvent3.put("dimA", "leek");
+    multivalEvent3.put("dimMultiVal", ImmutableList.of("1", "3", "5"));
+    multivalEvent3.put("sumCount", 1L);
+
+    Map<String, Object> multivalEvent4 = new HashMap<>();
+    multivalEvent4.put("dimA", "leek");
+    multivalEvent4.put("dimMultiVal", ImmutableList.of("1", "2", "3"));
+    multivalEvent4.put("sumCount", 1L);
+
+    List<String> multivalEvent5List = Arrays.asList("1", "2", "3", null);
+    Map<String, Object> multivalEvent5 = new HashMap<>();
+    multivalEvent5.put("dimA", "leek");
+    multivalEvent5.put("dimMultiVal", multivalEvent5List);
+    multivalEvent5.put("sumCount", 1L);
+
+    List<String> multivalEvent6List = Arrays.asList(null, "3");
+    Map<String, Object> multivalEvent6 = new HashMap<>();
+    multivalEvent6.put("dimA", "leek");
+    multivalEvent6.put("dimMultiVal", multivalEvent6List);
+    multivalEvent6.put("sumCount", 1L);
+
+    Map<String, Object> multivalEvent7 = new HashMap<>();
+    multivalEvent7.put("dimA", "leek");
+    multivalEvent7.put("dimMultiVal", ImmutableList.of("1", "2", "3", ""));
+    multivalEvent7.put("sumCount", 1L);
+
+    Map<String, Object> multivalEvent8 = new HashMap<>();
+    multivalEvent8.put("dimA", "leek");
+    multivalEvent8.put("dimMultiVal", ImmutableList.of("", "3"));
+    multivalEvent8.put("sumCount", 1L);
+
+    Map<String, Object> multivalEvent9 = new HashMap<>();
+    multivalEvent9.put("dimA", "potato");
+    multivalEvent9.put("dimMultiVal", ImmutableList.of("1", "3"));
+    multivalEvent9.put("sumCount", 1L);
+
+    List<Map<String, Object>> events = ImmutableList.of(
+        nullEvent,
+        nullEvent2,
+        emptyListEvent,
+        listWithNullEvent,
+        emptyStringEvent,
+        listWithEmptyStringEvent,
+        singleValEvent,
+        singleValEvent2,
+        singleValEvent3,
+        listWithSingleValEvent,
+        listWithSingleValEvent2,
+        listWithSingleValEvent3,
+        multivalEvent,
+        multivalEvent2,
+        multivalEvent3,
+        multivalEvent4,
+        multivalEvent5,
+        multivalEvent6,
+        multivalEvent7,
+        multivalEvent8,
+        multivalEvent9
+    );
+
+    IncrementalIndex toPersistA = new IncrementalIndex.Builder()
+        .setIndexSchema(indexSchema)
+        .setMaxRowCount(1000)
+        .buildOnheap();
+
+    for (Map<String, Object> event : events) {
+      toPersistA.add(new MapBasedInputRow(1, dims, event));
+    }
+
+    final File tmpDirA = temporaryFolder.newFolder();
+    QueryableIndex indexA = closer.closeLater(
+        indexIO.loadIndex(indexMerger.persist(toPersistA, tmpDirA, indexSpec, null))
+    );
+
+    List<QueryableIndex> singleEventIndexes = new ArrayList<>();
+    for (Map<String, Object> event : events) {
+      IncrementalIndex toPersist = new IncrementalIndex.Builder()
+          .setIndexSchema(indexSchema)
+          .setMaxRowCount(1000)
+          .buildOnheap();
+
+      toPersist.add(new MapBasedInputRow(1, dims, event));
+      final File tmpDir = temporaryFolder.newFolder();
+      QueryableIndex queryableIndex = closer.closeLater(
+          indexIO.loadIndex(indexMerger.persist(toPersist, tmpDir, indexSpec, null))
+      );
+      singleEventIndexes.add(queryableIndex);
+    }
+    singleEventIndexes.add(indexA);
+
+    final File tmpDirMerged = temporaryFolder.newFolder();
+    final QueryableIndex merged = closer.closeLater(
+        indexIO.loadIndex(
+            indexMerger.mergeQueryableIndex(
+                singleEventIndexes,
+                true,
+                new AggregatorFactory[]{
+                    new LongSumAggregatorFactory("sumCount", "sumCount")
+                },
+                tmpDirMerged,
+                indexSpec,
+                null
+            )
+        )
+    );
+
+    final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
+    final List<DebugRow> rowList = RowIteratorHelper.toList(adapter.getRows());
+
+    Assert.assertEquals(
+        ImmutableList.of("dimA", "dimMultiVal"),
+        ImmutableList.copyOf(adapter.getDimensionNames())
+    );
+
+    if (NullHandling.replaceWithDefault()) {
+      Assert.assertEquals(11, rowList.size());
+
+      Assert.assertEquals(Arrays.asList("leek", null), rowList.get(0).dimensionValues());
+      Assert.assertEquals(12L, rowList.get(0).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "1", "2", "3")), rowList.get(1).dimensionValues());
+      Assert.assertEquals(4L, rowList.get(1).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "3")), rowList.get(2).dimensionValues());
+      Assert.assertEquals(4L, rowList.get(2).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", "1"), rowList.get(3).dimensionValues());
+      Assert.assertEquals(4L, rowList.get(3).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3")), rowList.get(4).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(4).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3")), rowList.get(5).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(5).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3", "5")), rowList.get(6).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(6).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "4")), rowList.get(7).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(7).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", "2"), rowList.get(8).dimensionValues());
+      Assert.assertEquals(4L, rowList.get(8).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("potato", Arrays.asList("1", "3")), rowList.get(9).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(9).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("potato", "2"), rowList.get(10).dimensionValues());
+      Assert.assertEquals(4L, rowList.get(10).metricValues().get(0));
+
+      checkBitmapIndex(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8), adapter.getBitmapIndex("dimA", "leek"));
+      checkBitmapIndex(Arrays.asList(9, 10), adapter.getBitmapIndex("dimA", "potato"));
+
+      checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", null));
+      checkBitmapIndex(ImmutableList.of(), adapter.getBitmapIndex("dimMultiVal", ""));
+      checkBitmapIndex(Arrays.asList(1, 3, 4, 5, 6, 7, 9), adapter.getBitmapIndex("dimMultiVal", "1"));
+      checkBitmapIndex(Arrays.asList(1, 4, 8, 10), adapter.getBitmapIndex("dimMultiVal", "2"));
+      checkBitmapIndex(Arrays.asList(1, 2, 4, 5, 6, 9), adapter.getBitmapIndex("dimMultiVal", "3"));
+      checkBitmapIndex(Collections.singletonList(7), adapter.getBitmapIndex("dimMultiVal", "4"));
+      checkBitmapIndex(Collections.singletonList(6), adapter.getBitmapIndex("dimMultiVal", "5"));
+    } else {
+      Assert.assertEquals(14, rowList.size());
+
+      Assert.assertEquals(Arrays.asList("leek", null), rowList.get(0).dimensionValues());
+      Assert.assertEquals(8L, rowList.get(0).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "1", "2", "3")), rowList.get(1).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(1).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "3")), rowList.get(2).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(2).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", ""), rowList.get(3).dimensionValues());
+      Assert.assertEquals(4L, rowList.get(3).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList("", "1", "2", "3")), rowList.get(4).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(4).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList("", "3")), rowList.get(5).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(5).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", "1"), rowList.get(6).dimensionValues());
+      Assert.assertEquals(4L, rowList.get(6).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3")), rowList.get(7).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(7).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3")), rowList.get(8).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(8).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3", "5")), rowList.get(9).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(9).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "4")), rowList.get(10).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(10).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("leek", "2"), rowList.get(11).dimensionValues());
+      Assert.assertEquals(4L, rowList.get(11).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("potato", Arrays.asList("1", "3")), rowList.get(12).dimensionValues());
+      Assert.assertEquals(2L, rowList.get(12).metricValues().get(0));
+
+      Assert.assertEquals(Arrays.asList("potato", "2"), rowList.get(13).dimensionValues());
+      Assert.assertEquals(4L, rowList.get(13).metricValues().get(0));
+
+      checkBitmapIndex(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), adapter.getBitmapIndex("dimA", "leek"));
+      checkBitmapIndex(Arrays.asList(12, 13), adapter.getBitmapIndex("dimA", "potato"));
+
+      checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", null));
+      checkBitmapIndex(ImmutableList.of(3, 4, 5), adapter.getBitmapIndex("dimMultiVal", ""));
+      checkBitmapIndex(Arrays.asList(1, 4, 6, 7, 8, 9, 10, 12), adapter.getBitmapIndex("dimMultiVal", "1"));
+      checkBitmapIndex(Arrays.asList(1, 4, 7, 11, 13), adapter.getBitmapIndex("dimMultiVal", "2"));
+      checkBitmapIndex(Arrays.asList(1, 2, 4, 5, 7, 8, 9, 12), adapter.getBitmapIndex("dimMultiVal", "3"));
+      checkBitmapIndex(Collections.singletonList(10), adapter.getBitmapIndex("dimMultiVal", "4"));
+      checkBitmapIndex(Collections.singletonList(9), adapter.getBitmapIndex("dimMultiVal", "5"));
+    }
+  }
+
   private QueryableIndex persistAndLoad(List<DimensionSchema> schema, InputRow... rows) throws IOException
   {
     IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null, new DimensionsSpec(schema, null, null));
diff --git a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowCompTest.java b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowCompTest.java
index 94e17c6..f6f95e0 100644
--- a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowCompTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowCompTest.java
@@ -71,8 +71,8 @@ public class IncrementalIndexRowCompTest extends InitializedNullHandlingTest
 
     Assert.assertTrue(comparator.compare(ir4, ir6) > 0);
     Assert.assertTrue(comparator.compare(ir5, ir6) > 0);
-    Assert.assertTrue(comparator.compare(ir4, ir5) < 0);
-    Assert.assertTrue(comparator.compare(ir5, ir4) > 0);
+    Assert.assertTrue(comparator.compare(ir5, ir4) < 0);
+    Assert.assertTrue(comparator.compare(ir4, ir5) > 0);
   }
 
   private MapBasedInputRow toMapRow(long time, Object... dimAndVal)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org