You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by jo...@apache.org on 2020/04/25 20:47:26 UTC
[druid] branch master updated: Adjust string comparators used for
ingestion (#9742)
This is an automated email from the ASF dual-hosted git repository.
jonwei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new fe000a9 Adjust string comparators used for ingestion (#9742)
fe000a9 is described below
commit fe000a9e4bf1d2681760f72e9c593bd2aa5a1416
Author: Jonathan Wei <jo...@users.noreply.github.com>
AuthorDate: Sat Apr 25 13:47:07 2020 -0700
Adjust string comparators used for ingestion (#9742)
* Adjust string comparators used for ingestion
* Small tweak
* Fix inspection, more javadocs
* Address PR comment
* Add rollup comment
* Add ordering test
* Fix IncrementaIndexRowCompTest
---
.../org/apache/druid/segment/DimensionHandler.java | 4 +
.../org/apache/druid/segment/DimensionIndexer.java | 4 +
.../druid/segment/StringDimensionHandler.java | 71 ++--
.../druid/segment/StringDimensionIndexer.java | 30 +-
.../druid/segment/IndexMergerNullHandlingTest.java | 28 +-
.../apache/druid/segment/IndexMergerTestBase.java | 432 ++++++++++++++++++++-
.../incremental/IncrementalIndexRowCompTest.java | 4 +-
7 files changed, 519 insertions(+), 54 deletions(-)
diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandler.java
index e1ea334..56bfed2 100644
--- a/processing/src/main/java/org/apache/druid/segment/DimensionHandler.java
+++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandler.java
@@ -122,6 +122,10 @@ public interface DimensionHandler
* Returns a comparator that knows how to compare {@link ColumnValueSelector} of the assumed dimension type,
* corresponding to this DimensionHandler. E. g. {@link StringDimensionHandler} returns a comparator, that compares
* {@link ColumnValueSelector}s as {@link DimensionSelector}s.
+ *
+ * The comparison rules used by this method should match the rules used by
+ * {@link DimensionIndexer#compareUnsortedEncodedKeyComponents}, otherwise incorrect ordering/merging of rows
+ * can occur during ingestion, causing issues such as imperfect rollup.
*/
Comparator<ColumnValueSelector> getEncodedValueSelectorComparator();
diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
index 06c27f7..99921eb 100644
--- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
@@ -241,6 +241,10 @@ public interface DimensionIndexer
*
* Refer to StringDimensionIndexer.compareUnsortedEncodedKeyComponents() for a reference implementation.
*
+ * The comparison rules used by this method should match the rules used by
+ * {@link DimensionHandler#getEncodedValueSelectorComparator()}, otherwise incorrect ordering/merging of rows
+ * can occur during ingestion, causing issues such as imperfect rollup.
+ *
* @param lhs dimension value array from a Row key
* @param rhs dimension value array from a Row key
* @return comparison of the two arrays
diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
index c14bd31..65ff7f3 100644
--- a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
+++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
@@ -33,61 +33,46 @@ import java.util.Comparator;
public class StringDimensionHandler implements DimensionHandler<Integer, int[], String>
{
-
/**
- * Compares {@link IndexedInts} lexicographically, with the exception that if a row contains only zeros (that's the
- * index of null) at all positions, it is considered "null" as a whole and is "less" than any "non-null" row. Empty
- * row (size is zero) is also considered "null".
- *
- * The implementation is a bit complicated because it tries to check each position of both rows only once.
+ * This comparator uses the following rules:
+ * - Compare the two value arrays up to the length of the shorter array
+ * - If the two arrays match so far, then compare the array lengths, the shorter array is considered smaller
+ * - Comparing null and the empty list is a special case: these are considered equal
*/
private static final Comparator<ColumnValueSelector> DIMENSION_SELECTOR_COMPARATOR = (s1, s2) -> {
IndexedInts row1 = getRow(s1);
IndexedInts row2 = getRow(s2);
int len1 = row1.size();
int len2 = row2.size();
- boolean row1IsNull = true;
- boolean row2IsNull = true;
- for (int i = 0; i < Math.min(len1, len2); i++) {
- int v1 = row1.get(i);
- row1IsNull &= v1 == 0;
- int v2 = row2.get(i);
- row2IsNull &= v2 == 0;
- int valueDiff = Integer.compare(v1, v2);
- if (valueDiff != 0) {
- return valueDiff;
- }
- }
- //noinspection SubtractionInCompareTo -- substraction is safe here, because lengths or rows are small numbers.
- int lenDiff = len1 - len2;
- if (lenDiff == 0) {
- return 0;
- } else {
- if (!row1IsNull || !row2IsNull) {
- return lenDiff;
- } else {
- return compareRestNulls(row1, len1, row2, len2);
+ int lenCompareResult = Integer.compare(len1, len2);
+ int valsIndex = 0;
+
+ if (lenCompareResult != 0) {
+ // if the values don't have the same length, check if we're comparing [] and [null], which are equivalent
+ if (len1 + len2 == 1) {
+ IndexedInts longerRow = len2 > len1 ? row2 : row1;
+ if (longerRow.get(0) == 0) {
+ return 0;
+ } else {
+ //noinspection ObjectEquality -- longerRow is explicitly set to only row1 or row2
+ return longerRow == row1 ? 1 : -1;
+ }
}
}
- };
- private static int compareRestNulls(IndexedInts row1, int len1, IndexedInts row2, int len2)
- {
- if (len1 < len2) {
- for (int i = len1; i < len2; i++) {
- if (row2.get(i) != 0) {
- return -1;
- }
- }
- } else {
- for (int i = len2; i < len1; i++) {
- if (row1.get(i) != 0) {
- return 1;
- }
+ int lenToCompare = Math.min(len1, len2);
+ while (valsIndex < lenToCompare) {
+ int v1 = row1.get(valsIndex);
+ int v2 = row2.get(valsIndex);
+ int valueCompareResult = Integer.compare(v1, v2);
+ if (valueCompareResult != 0) {
+ return valueCompareResult;
}
+ ++valsIndex;
}
- return 0;
- }
+
+ return lenCompareResult;
+ };
/**
* Value for absent column, i. e. {@link NilColumnValueSelector}, should be equivalent to [null] during index merging.
diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index f08a495..ada146d 100644
--- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -399,23 +399,42 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
int lhsLen = lhs.length;
int rhsLen = rhs.length;
- int retVal = Ints.compare(lhsLen, rhsLen);
+ int lenCompareResult = Ints.compare(lhsLen, rhsLen);
+ if (lenCompareResult != 0) {
+ // if the values don't have the same length, check if we're comparing [] and [null], which are equivalent
+ if (lhsLen + rhsLen == 1) {
+ int[] longerVal = rhsLen > lhsLen ? rhs : lhs;
+ if (longerVal[0] == dimLookup.idForNull) {
+ return 0;
+ } else {
+ //noinspection ArrayEquality -- longerVal is explicitly set to only lhs or rhs
+ return longerVal == lhs ? 1 : -1;
+ }
+ }
+ }
+
int valsIndex = 0;
- while (retVal == 0 && valsIndex < lhsLen) {
+ int lenToCompare = Math.min(lhsLen, rhsLen);
+ while (valsIndex < lenToCompare) {
int lhsVal = lhs[valsIndex];
int rhsVal = rhs[valsIndex];
if (lhsVal != rhsVal) {
final String lhsValActual = getActualValue(lhsVal, false);
final String rhsValActual = getActualValue(rhsVal, false);
+ int valueCompareResult = 0;
if (lhsValActual != null && rhsValActual != null) {
- retVal = lhsValActual.compareTo(rhsValActual);
+ valueCompareResult = lhsValActual.compareTo(rhsValActual);
} else if (lhsValActual == null ^ rhsValActual == null) {
- retVal = lhsValActual == null ? -1 : 1;
+ valueCompareResult = lhsValActual == null ? -1 : 1;
+ }
+ if (valueCompareResult != 0) {
+ return valueCompareResult;
}
}
++valsIndex;
}
- return retVal;
+
+ return lenCompareResult;
}
@Override
@@ -796,6 +815,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
return sortedLookup == null ? sortedLookup = dimLookup.sort() : sortedLookup;
}
+ @Nullable
private String getActualValue(int intermediateValue, boolean idSorted)
{
if (idSorted) {
diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java
index f2f1cf0..a9b63b4 100644
--- a/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerNullHandlingTest.java
@@ -33,6 +33,7 @@ import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.data.IncrementalIndexTest;
+import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
import org.junit.Assert;
@@ -55,6 +56,10 @@ import java.util.stream.IntStream;
public class IndexMergerNullHandlingTest
{
+ static {
+ NullHandling.initializeForTests();
+ }
+
private IndexMerger indexMerger;
private IndexIO indexIO;
private IndexSpec indexSpec;
@@ -221,8 +226,9 @@ public class IndexMergerNullHandlingTest
retVal.add(NullHandling.emptyToNullIfNeeded(((String) value)));
} else if (value instanceof List) {
final List<String> list = (List<String>) value;
- if (list.isEmpty() && !hasMultipleValues) {
+ if (list.isEmpty()) {
// empty lists become nulls in single valued columns
+ // they sometimes also become nulls in multi-valued columns (see comments in getRow())
retVal.add(NullHandling.emptyToNullIfNeeded(null));
} else {
retVal.addAll(list.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toList()));
@@ -242,7 +248,25 @@ public class IndexMergerNullHandlingTest
final List<String> retVal = new ArrayList<>();
if (column.hasMultipleValues()) {
- column.getMultiValueRow(rowNumber).forEach(i -> retVal.add(column.lookupName(i)));
+ IndexedInts rowVals = column.getMultiValueRow(rowNumber);
+ if (rowVals.size() == 0) {
+ // This is a sort of test hack:
+ // - If we ingest the subset [{d=[]}, {d=[a, b]}], we get an IndexedInts with 0 size for the nully row,
+ // representing the empty list
+ // - If we ingest the subset [{}, {d=[]}, {d=[a, b]}], we instead get an IndexedInts with 1 size,
+ // representing a row with a single null value
+ // This occurs because the dimension value comparator used during ingestion considers null and the empty list
+ // to be the same.
+ // - In the first subset, we only see the empty list and a non-empty list, so the key used in the
+ // incremental index fact table for the nully row is the empty list.
+ // - In the second subset, the fact table initially gets an entry for d=null. When the row with the
+ // empty list value is added, it is treated as identical to the first d=null row, so it gets rolled up.
+ // The resulting persisted segment will have [null] instead of [] because of this rollup.
+ // To simplify this test class, we always normalize the empty list into null here.
+ retVal.add(null);
+ } else {
+ rowVals.forEach(i -> retVal.add(column.lookupName(i)));
+ }
} else {
retVal.add(column.lookupName(column.getSingleValueRow(rowNumber)));
}
diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java
index 1783f43..b253614 100644
--- a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java
+++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java
@@ -78,6 +78,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@@ -2076,11 +2077,11 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest
Assert.assertEquals(2, rowList.size());
Assert.assertEquals(
- Arrays.asList(Arrays.asList("a", "b", "x"), Arrays.asList("a", "b", "x")),
+ Arrays.asList(Arrays.asList("a", "a", "b", "x"), Arrays.asList("a", "b", "x", "x")),
rowList.get(0).dimensionValues()
);
Assert.assertEquals(
- Arrays.asList(Arrays.asList("a", "a", "b", "x"), Arrays.asList("a", "b", "x", "x")),
+ Arrays.asList(Arrays.asList("a", "b", "x"), Arrays.asList("a", "b", "x")),
rowList.get(1).dimensionValues()
);
@@ -2210,6 +2211,433 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest
);
}
+ @Test
+ public void testMultivalDim_mergeAcrossSegments_rollupWorks() throws Exception
+ {
+ List<String> dims = Arrays.asList(
+ "dimA",
+ "dimMultiVal"
+ );
+
+ IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
+ .withDimensionsSpec(
+ new DimensionsSpec(
+ ImmutableList.of(
+ new StringDimensionSchema("dimA", MultiValueHandling.SORTED_ARRAY, true),
+ new StringDimensionSchema("dimMultiVal", MultiValueHandling.SORTED_ARRAY, true)
+ )
+ )
+ )
+ .withMetrics(
+ new LongSumAggregatorFactory("sumCount", "sumCount")
+ )
+ .withRollup(true)
+ .build();
+
+ IncrementalIndex toPersistA = new IncrementalIndex.Builder()
+ .setIndexSchema(indexSchema)
+ .setMaxRowCount(1000)
+ .buildOnheap();
+
+ Map<String, Object> event1 = new HashMap<>();
+ event1.put("dimA", "leek");
+ event1.put("dimMultiVal", ImmutableList.of("1", "2", "4"));
+ event1.put("sumCount", 1L);
+
+ Map<String, Object> event2 = new HashMap<>();
+ event2.put("dimA", "leek");
+ event2.put("dimMultiVal", ImmutableList.of("1", "2", "3", "5"));
+ event2.put("sumCount", 1L);
+
+ toPersistA.add(new MapBasedInputRow(1, dims, event1));
+ toPersistA.add(new MapBasedInputRow(1, dims, event2));
+
+ IncrementalIndex toPersistB = new IncrementalIndex.Builder()
+ .setIndexSchema(indexSchema)
+ .setMaxRowCount(1000)
+ .buildOnheap();
+
+ Map<String, Object> event3 = new HashMap<>();
+ event3.put("dimA", "leek");
+ event3.put("dimMultiVal", ImmutableList.of("1", "2", "4"));
+ event3.put("sumCount", 1L);
+
+ Map<String, Object> event4 = new HashMap<>();
+ event4.put("dimA", "potato");
+ event4.put("dimMultiVal", ImmutableList.of("0", "1", "4"));
+ event4.put("sumCount", 1L);
+
+ toPersistB.add(new MapBasedInputRow(1, dims, event3));
+ toPersistB.add(new MapBasedInputRow(1, dims, event4));
+
+ final File tmpDirA = temporaryFolder.newFolder();
+ final File tmpDirB = temporaryFolder.newFolder();
+ final File tmpDirMerged = temporaryFolder.newFolder();
+
+ QueryableIndex indexA = closer.closeLater(
+ indexIO.loadIndex(indexMerger.persist(toPersistA, tmpDirA, indexSpec, null))
+ );
+
+ QueryableIndex indexB = closer.closeLater(
+ indexIO.loadIndex(indexMerger.persist(toPersistB, tmpDirB, indexSpec, null))
+ );
+
+ final QueryableIndex merged = closer.closeLater(
+ indexIO.loadIndex(
+ indexMerger.mergeQueryableIndex(
+ Arrays.asList(indexA, indexB),
+ true,
+ new AggregatorFactory[]{
+ new LongSumAggregatorFactory("sumCount", "sumCount")
+ },
+ tmpDirMerged,
+ indexSpec,
+ null
+ )
+ )
+ );
+
+ final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
+ final List<DebugRow> rowList = RowIteratorHelper.toList(adapter.getRows());
+
+ Assert.assertEquals(
+ ImmutableList.of("dimA", "dimMultiVal"),
+ ImmutableList.copyOf(adapter.getDimensionNames())
+ );
+
+ Assert.assertEquals(3, rowList.size());
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3", "5")), rowList.get(0).dimensionValues());
+ Assert.assertEquals(1L, rowList.get(0).metricValues().get(0));
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "4")), rowList.get(1).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(1).metricValues().get(0));
+ Assert.assertEquals(Arrays.asList("potato", Arrays.asList("0", "1", "4")), rowList.get(2).dimensionValues());
+ Assert.assertEquals(1L, rowList.get(2).metricValues().get(0));
+
+ checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimA", "leek"));
+ checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimA", "potato"));
+
+ checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimMultiVal", "0"));
+ checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", "1"));
+ checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimMultiVal", "2"));
+ checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "3"));
+ checkBitmapIndex(Arrays.asList(1, 2), adapter.getBitmapIndex("dimMultiVal", "4"));
+ checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "5"));
+ }
+
+
+ @Test
+ public void testMultivalDim_persistAndMerge_dimensionValueOrderingRules() throws Exception
+ {
+ List<String> dims = Arrays.asList(
+ "dimA",
+ "dimMultiVal"
+ );
+
+ IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
+ .withDimensionsSpec(
+ new DimensionsSpec(
+ ImmutableList.of(
+ new StringDimensionSchema("dimA", MultiValueHandling.SORTED_ARRAY, true),
+ new StringDimensionSchema("dimMultiVal", MultiValueHandling.SORTED_ARRAY, true)
+ )
+ )
+ )
+ .withMetrics(
+ new LongSumAggregatorFactory("sumCount", "sumCount")
+ )
+ .withRollup(true)
+ .build();
+
+ Map<String, Object> nullEvent = new HashMap<>();
+ nullEvent.put("dimA", "leek");
+ nullEvent.put("sumCount", 1L);
+
+ Map<String, Object> nullEvent2 = new HashMap<>();
+ nullEvent2.put("dimA", "leek");
+ nullEvent2.put("dimMultiVal", null);
+ nullEvent2.put("sumCount", 1L);
+
+ Map<String, Object> emptyListEvent = new HashMap<>();
+ emptyListEvent.put("dimA", "leek");
+ emptyListEvent.put("dimMultiVal", ImmutableList.of());
+ emptyListEvent.put("sumCount", 1L);
+
+ List<String> listWithNull = new ArrayList<>();
+ listWithNull.add(null);
+ Map<String, Object> listWithNullEvent = new HashMap<>();
+ listWithNullEvent.put("dimA", "leek");
+ listWithNullEvent.put("dimMultiVal", listWithNull);
+ listWithNullEvent.put("sumCount", 1L);
+
+ Map<String, Object> emptyStringEvent = new HashMap<>();
+ emptyStringEvent.put("dimA", "leek");
+ emptyStringEvent.put("dimMultiVal", "");
+ emptyStringEvent.put("sumCount", 1L);
+
+ Map<String, Object> listWithEmptyStringEvent = new HashMap<>();
+ listWithEmptyStringEvent.put("dimA", "leek");
+ listWithEmptyStringEvent.put("dimMultiVal", ImmutableList.of(""));
+ listWithEmptyStringEvent.put("sumCount", 1L);
+
+ Map<String, Object> singleValEvent = new HashMap<>();
+ singleValEvent.put("dimA", "leek");
+ singleValEvent.put("dimMultiVal", "1");
+ singleValEvent.put("sumCount", 1L);
+
+ Map<String, Object> singleValEvent2 = new HashMap<>();
+ singleValEvent2.put("dimA", "leek");
+ singleValEvent2.put("dimMultiVal", "2");
+ singleValEvent2.put("sumCount", 1L);
+
+ Map<String, Object> singleValEvent3 = new HashMap<>();
+ singleValEvent3.put("dimA", "potato");
+ singleValEvent3.put("dimMultiVal", "2");
+ singleValEvent3.put("sumCount", 1L);
+
+ Map<String, Object> listWithSingleValEvent = new HashMap<>();
+ listWithSingleValEvent.put("dimA", "leek");
+ listWithSingleValEvent.put("dimMultiVal", ImmutableList.of("1"));
+ listWithSingleValEvent.put("sumCount", 1L);
+
+ Map<String, Object> listWithSingleValEvent2 = new HashMap<>();
+ listWithSingleValEvent2.put("dimA", "leek");
+ listWithSingleValEvent2.put("dimMultiVal", ImmutableList.of("2"));
+ listWithSingleValEvent2.put("sumCount", 1L);
+
+ Map<String, Object> listWithSingleValEvent3 = new HashMap<>();
+ listWithSingleValEvent3.put("dimA", "potato");
+ listWithSingleValEvent3.put("dimMultiVal", ImmutableList.of("2"));
+ listWithSingleValEvent3.put("sumCount", 1L);
+
+ Map<String, Object> multivalEvent = new HashMap<>();
+ multivalEvent.put("dimA", "leek");
+ multivalEvent.put("dimMultiVal", ImmutableList.of("1", "3"));
+ multivalEvent.put("sumCount", 1L);
+
+ Map<String, Object> multivalEvent2 = new HashMap<>();
+ multivalEvent2.put("dimA", "leek");
+ multivalEvent2.put("dimMultiVal", ImmutableList.of("1", "4"));
+ multivalEvent2.put("sumCount", 1L);
+
+ Map<String, Object> multivalEvent3 = new HashMap<>();
+ multivalEvent3.put("dimA", "leek");
+ multivalEvent3.put("dimMultiVal", ImmutableList.of("1", "3", "5"));
+ multivalEvent3.put("sumCount", 1L);
+
+ Map<String, Object> multivalEvent4 = new HashMap<>();
+ multivalEvent4.put("dimA", "leek");
+ multivalEvent4.put("dimMultiVal", ImmutableList.of("1", "2", "3"));
+ multivalEvent4.put("sumCount", 1L);
+
+ List<String> multivalEvent5List = Arrays.asList("1", "2", "3", null);
+ Map<String, Object> multivalEvent5 = new HashMap<>();
+ multivalEvent5.put("dimA", "leek");
+ multivalEvent5.put("dimMultiVal", multivalEvent5List);
+ multivalEvent5.put("sumCount", 1L);
+
+ List<String> multivalEvent6List = Arrays.asList(null, "3");
+ Map<String, Object> multivalEvent6 = new HashMap<>();
+ multivalEvent6.put("dimA", "leek");
+ multivalEvent6.put("dimMultiVal", multivalEvent6List);
+ multivalEvent6.put("sumCount", 1L);
+
+ Map<String, Object> multivalEvent7 = new HashMap<>();
+ multivalEvent7.put("dimA", "leek");
+ multivalEvent7.put("dimMultiVal", ImmutableList.of("1", "2", "3", ""));
+ multivalEvent7.put("sumCount", 1L);
+
+ Map<String, Object> multivalEvent8 = new HashMap<>();
+ multivalEvent8.put("dimA", "leek");
+ multivalEvent8.put("dimMultiVal", ImmutableList.of("", "3"));
+ multivalEvent8.put("sumCount", 1L);
+
+ Map<String, Object> multivalEvent9 = new HashMap<>();
+ multivalEvent9.put("dimA", "potato");
+ multivalEvent9.put("dimMultiVal", ImmutableList.of("1", "3"));
+ multivalEvent9.put("sumCount", 1L);
+
+ List<Map<String, Object>> events = ImmutableList.of(
+ nullEvent,
+ nullEvent2,
+ emptyListEvent,
+ listWithNullEvent,
+ emptyStringEvent,
+ listWithEmptyStringEvent,
+ singleValEvent,
+ singleValEvent2,
+ singleValEvent3,
+ listWithSingleValEvent,
+ listWithSingleValEvent2,
+ listWithSingleValEvent3,
+ multivalEvent,
+ multivalEvent2,
+ multivalEvent3,
+ multivalEvent4,
+ multivalEvent5,
+ multivalEvent6,
+ multivalEvent7,
+ multivalEvent8,
+ multivalEvent9
+ );
+
+ IncrementalIndex toPersistA = new IncrementalIndex.Builder()
+ .setIndexSchema(indexSchema)
+ .setMaxRowCount(1000)
+ .buildOnheap();
+
+ for (Map<String, Object> event : events) {
+ toPersistA.add(new MapBasedInputRow(1, dims, event));
+ }
+
+ final File tmpDirA = temporaryFolder.newFolder();
+ QueryableIndex indexA = closer.closeLater(
+ indexIO.loadIndex(indexMerger.persist(toPersistA, tmpDirA, indexSpec, null))
+ );
+
+ List<QueryableIndex> singleEventIndexes = new ArrayList<>();
+ for (Map<String, Object> event : events) {
+ IncrementalIndex toPersist = new IncrementalIndex.Builder()
+ .setIndexSchema(indexSchema)
+ .setMaxRowCount(1000)
+ .buildOnheap();
+
+ toPersist.add(new MapBasedInputRow(1, dims, event));
+ final File tmpDir = temporaryFolder.newFolder();
+ QueryableIndex queryableIndex = closer.closeLater(
+ indexIO.loadIndex(indexMerger.persist(toPersist, tmpDir, indexSpec, null))
+ );
+ singleEventIndexes.add(queryableIndex);
+ }
+ singleEventIndexes.add(indexA);
+
+ final File tmpDirMerged = temporaryFolder.newFolder();
+ final QueryableIndex merged = closer.closeLater(
+ indexIO.loadIndex(
+ indexMerger.mergeQueryableIndex(
+ singleEventIndexes,
+ true,
+ new AggregatorFactory[]{
+ new LongSumAggregatorFactory("sumCount", "sumCount")
+ },
+ tmpDirMerged,
+ indexSpec,
+ null
+ )
+ )
+ );
+
+ final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
+ final List<DebugRow> rowList = RowIteratorHelper.toList(adapter.getRows());
+
+ Assert.assertEquals(
+ ImmutableList.of("dimA", "dimMultiVal"),
+ ImmutableList.copyOf(adapter.getDimensionNames())
+ );
+
+ if (NullHandling.replaceWithDefault()) {
+ Assert.assertEquals(11, rowList.size());
+
+ Assert.assertEquals(Arrays.asList("leek", null), rowList.get(0).dimensionValues());
+ Assert.assertEquals(12L, rowList.get(0).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "1", "2", "3")), rowList.get(1).dimensionValues());
+ Assert.assertEquals(4L, rowList.get(1).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "3")), rowList.get(2).dimensionValues());
+ Assert.assertEquals(4L, rowList.get(2).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", "1"), rowList.get(3).dimensionValues());
+ Assert.assertEquals(4L, rowList.get(3).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3")), rowList.get(4).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(4).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3")), rowList.get(5).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(5).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3", "5")), rowList.get(6).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(6).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "4")), rowList.get(7).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(7).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", "2"), rowList.get(8).dimensionValues());
+ Assert.assertEquals(4L, rowList.get(8).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("potato", Arrays.asList("1", "3")), rowList.get(9).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(9).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("potato", "2"), rowList.get(10).dimensionValues());
+ Assert.assertEquals(4L, rowList.get(10).metricValues().get(0));
+
+ checkBitmapIndex(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8), adapter.getBitmapIndex("dimA", "leek"));
+ checkBitmapIndex(Arrays.asList(9, 10), adapter.getBitmapIndex("dimA", "potato"));
+
+ checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", null));
+ checkBitmapIndex(ImmutableList.of(), adapter.getBitmapIndex("dimMultiVal", ""));
+ checkBitmapIndex(Arrays.asList(1, 3, 4, 5, 6, 7, 9), adapter.getBitmapIndex("dimMultiVal", "1"));
+ checkBitmapIndex(Arrays.asList(1, 4, 8, 10), adapter.getBitmapIndex("dimMultiVal", "2"));
+ checkBitmapIndex(Arrays.asList(1, 2, 4, 5, 6, 9), adapter.getBitmapIndex("dimMultiVal", "3"));
+ checkBitmapIndex(Collections.singletonList(7), adapter.getBitmapIndex("dimMultiVal", "4"));
+ checkBitmapIndex(Collections.singletonList(6), adapter.getBitmapIndex("dimMultiVal", "5"));
+ } else {
+ Assert.assertEquals(14, rowList.size());
+
+ Assert.assertEquals(Arrays.asList("leek", null), rowList.get(0).dimensionValues());
+ Assert.assertEquals(8L, rowList.get(0).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "1", "2", "3")), rowList.get(1).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(1).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList(null, "3")), rowList.get(2).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(2).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", ""), rowList.get(3).dimensionValues());
+ Assert.assertEquals(4L, rowList.get(3).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList("", "1", "2", "3")), rowList.get(4).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(4).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList("", "3")), rowList.get(5).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(5).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", "1"), rowList.get(6).dimensionValues());
+ Assert.assertEquals(4L, rowList.get(6).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3")), rowList.get(7).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(7).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3")), rowList.get(8).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(8).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "3", "5")), rowList.get(9).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(9).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "4")), rowList.get(10).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(10).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("leek", "2"), rowList.get(11).dimensionValues());
+ Assert.assertEquals(4L, rowList.get(11).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("potato", Arrays.asList("1", "3")), rowList.get(12).dimensionValues());
+ Assert.assertEquals(2L, rowList.get(12).metricValues().get(0));
+
+ Assert.assertEquals(Arrays.asList("potato", "2"), rowList.get(13).dimensionValues());
+ Assert.assertEquals(4L, rowList.get(13).metricValues().get(0));
+
+ checkBitmapIndex(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), adapter.getBitmapIndex("dimA", "leek"));
+ checkBitmapIndex(Arrays.asList(12, 13), adapter.getBitmapIndex("dimA", "potato"));
+
+ checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", null));
+ checkBitmapIndex(ImmutableList.of(3, 4, 5), adapter.getBitmapIndex("dimMultiVal", ""));
+ checkBitmapIndex(Arrays.asList(1, 4, 6, 7, 8, 9, 10, 12), adapter.getBitmapIndex("dimMultiVal", "1"));
+ checkBitmapIndex(Arrays.asList(1, 4, 7, 11, 13), adapter.getBitmapIndex("dimMultiVal", "2"));
+ checkBitmapIndex(Arrays.asList(1, 2, 4, 5, 7, 8, 9, 12), adapter.getBitmapIndex("dimMultiVal", "3"));
+ checkBitmapIndex(Collections.singletonList(10), adapter.getBitmapIndex("dimMultiVal", "4"));
+ checkBitmapIndex(Collections.singletonList(9), adapter.getBitmapIndex("dimMultiVal", "5"));
+ }
+ }
+
private QueryableIndex persistAndLoad(List<DimensionSchema> schema, InputRow... rows) throws IOException
{
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null, new DimensionsSpec(schema, null, null));
diff --git a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowCompTest.java b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowCompTest.java
index 94e17c6..f6f95e0 100644
--- a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowCompTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowCompTest.java
@@ -71,8 +71,8 @@ public class IncrementalIndexRowCompTest extends InitializedNullHandlingTest
Assert.assertTrue(comparator.compare(ir4, ir6) > 0);
Assert.assertTrue(comparator.compare(ir5, ir6) > 0);
- Assert.assertTrue(comparator.compare(ir4, ir5) < 0);
- Assert.assertTrue(comparator.compare(ir5, ir4) > 0);
+ Assert.assertTrue(comparator.compare(ir5, ir4) < 0);
+ Assert.assertTrue(comparator.compare(ir4, ir5) > 0);
}
private MapBasedInputRow toMapRow(long time, Object... dimAndVal)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org