You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by gi...@apache.org on 2020/08/11 18:07:38 UTC
[druid] branch master updated: fix bug with expressions on sparse
string realtime columns without explicit null valued rows (#10248)
This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new c72f96a fix bug with expressions on sparse string realtime columns without explicit null valued rows (#10248)
c72f96a is described below
commit c72f96a4babdf5055912bb0fb5eb2236cfe0ef23
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Tue Aug 11 11:07:17 2020 -0700
fix bug with expressions on sparse string realtime columns without explicit null valued rows (#10248)
* fix bug with realtime expressions on sparse string columns
* fix test
* add comment back
* push capabilities for dimensions to dimension indexers since they know things
* style
* style
* fixes
* getting a bit carried away
* missed one
* fix it
* benchmark build fix
* review stuffs
* javadoc and comments
* add comment
* more strict check
* fix missed usaged of impl instead of interface
---
.../indexing/StringDimensionIndexerBenchmark.java | 2 +-
.../apache/druid/indexer/IndexGeneratorJob.java | 4 +-
.../epinephelinae/GroupByQueryEngineV2.java | 2 +-
.../druid/query/metadata/SegmentAnalyzer.java | 2 +-
.../apache/druid/query/topn/TopNQueryEngine.java | 2 +-
.../druid/segment/DimensionHandlerUtils.java | 12 +--
.../org/apache/druid/segment/DimensionIndexer.java | 2 +
.../druid/segment/DoubleDimensionIndexer.java | 12 +++
.../druid/segment/FloatDimensionIndexer.java | 12 +++
.../org/apache/druid/segment/IndexMergerV9.java | 70 +++++++++++++-
.../apache/druid/segment/LongDimensionIndexer.java | 13 +++
.../druid/segment/StringDimensionHandler.java | 6 +-
.../druid/segment/StringDimensionIndexer.java | 56 ++++++++++-
.../druid/segment/column/ColumnCapabilities.java | 85 ++++++++++++++++-
.../segment/column/ColumnCapabilitiesImpl.java | 104 ++++++++++-----------
.../druid/segment/filter/ExpressionFilter.java | 2 +-
.../org/apache/druid/segment/filter/Filters.java | 2 +-
.../segment/incremental/IncrementalIndex.java | 95 ++++++++++---------
.../IncrementalIndexStorageAdapter.java | 63 ++++++++++++-
.../QueryableIndexVectorColumnSelectorFactory.java | 6 +-
.../druid/segment/virtual/ExpressionSelectors.java | 6 +-
.../druid/query/lookup/LookupSegmentTest.java | 4 +-
.../apache/druid/segment/IndexMergerTestBase.java | 27 ------
.../QueryableIndexColumnCapabilitiesTest.java | 18 ++--
.../segment/RowBasedColumnSelectorFactoryTest.java | 12 +--
.../segment/column/ColumnCapabilitiesImplTest.java | 6 +-
.../join/HashJoinSegmentStorageAdapterTest.java | 4 +-
.../join/table/IndexedTableJoinableTest.java | 4 +-
...ectorTest.java => ExpressionSelectorsTest.java} | 103 +++++++++++++++++++-
.../virtual/ExpressionVirtualColumnTest.java | 4 +-
.../druid/segment/realtime/plumber/Sink.java | 6 +-
31 files changed, 551 insertions(+), 195 deletions(-)
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
index 52fc851..2e4490b 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
@@ -59,7 +59,7 @@ public class StringDimensionIndexerBenchmark
@Setup
public void setup()
{
- indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true);
+ indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true, false);
for (int i = 0; i < cardinality; i++) {
indexer.processRowValsToUnsortedEncodedKeyComponent("abcd-" + i, true);
diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java
index 5dfdcac..72bbaee 100644
--- a/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java
+++ b/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java
@@ -47,7 +47,7 @@ import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.BaseProgressIndicator;
import org.apache.druid.segment.ProgressIndicator;
import org.apache.druid.segment.QueryableIndex;
-import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.indexing.TuningConfigs;
@@ -289,7 +289,7 @@ public class IndexGeneratorJob implements Jobby
AggregatorFactory[] aggs,
HadoopDruidIndexerConfig config,
Iterable<String> oldDimOrder,
- Map<String, ColumnCapabilitiesImpl> oldCapabilities
+ Map<String, ColumnCapabilities> oldCapabilities
)
{
final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
index d3aaa4f..b4cda9b 100644
--- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
+++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
@@ -340,7 +340,7 @@ public class GroupByQueryEngineV2
// Now check column capabilities.
final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension());
- return (columnCapabilities != null && !columnCapabilities.hasMultipleValues().isMaybeTrue()) ||
+ return (columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse()) ||
(missingMeansNonExistent && columnCapabilities == null);
});
}
diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
index 659b55b..15a081d 100644
--- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
+++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
@@ -227,7 +227,7 @@ public class SegmentAnalyzer
min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0));
max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1));
}
- } else if (capabilities.isDictionaryEncoded()) {
+ } else if (capabilities.isDictionaryEncoded().isTrue()) {
// fallback if no bitmap index
DictionaryEncodedColumn<String> theColumn = (DictionaryEncodedColumn<String>) columnHolder.getColumn();
cardinality = theColumn.getCardinality();
diff --git a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java
index eb22dc9..daad69f 100644
--- a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java
+++ b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java
@@ -194,7 +194,7 @@ public class TopNQueryEngine
}
if (capabilities != null && capabilities.getType() == ValueType.STRING) {
// string columns must use the on heap algorithm unless they have the following capabilites
- return capabilities.isDictionaryEncoded() && capabilities.areDictionaryValuesUnique().isTrue();
+ return capabilities.isDictionaryEncoded().isTrue() && capabilities.areDictionaryValuesUnique().isTrue();
} else {
// non-strings are not eligible to use the pooled algorithm, and should use a heap algorithm
return false;
diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java
index 9e5a129..f5b7e9f 100644
--- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java
+++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java
@@ -73,16 +73,16 @@ public final class DimensionHandlerUtils
)
{
if (capabilities == null) {
- return new StringDimensionHandler(dimensionName, multiValueHandling, true);
+ return new StringDimensionHandler(dimensionName, multiValueHandling, true, false);
}
multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
if (capabilities.getType() == ValueType.STRING) {
- if (!capabilities.isDictionaryEncoded()) {
+ if (!capabilities.isDictionaryEncoded().isTrue()) {
throw new IAE("String column must have dictionary encoding.");
}
- return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes());
+ return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes(), capabilities.hasSpatialIndexes());
}
if (capabilities.getType() == ValueType.LONG) {
@@ -98,7 +98,7 @@ public final class DimensionHandlerUtils
}
// Return a StringDimensionHandler by default (null columns will be treated as String typed)
- return new StringDimensionHandler(dimensionName, multiValueHandling, true);
+ return new StringDimensionHandler(dimensionName, multiValueHandling, true, false);
}
public static List<ValueType> getValueTypesFromDimensionSpecs(List<DimensionSpec> dimSpecs)
@@ -226,11 +226,11 @@ public final class DimensionHandlerUtils
capabilities = ColumnCapabilitiesImpl.copyOf(capabilities)
.setType(ValueType.STRING)
.setDictionaryValuesUnique(
- capabilities.isDictionaryEncoded() &&
+ capabilities.isDictionaryEncoded().isTrue() &&
fn.getExtractionType() == ExtractionFn.ExtractionType.ONE_TO_ONE
)
.setDictionaryValuesSorted(
- capabilities.isDictionaryEncoded() && fn.preservesOrdering()
+ capabilities.isDictionaryEncoded().isTrue() && fn.preservesOrdering()
);
}
diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
index cf7631d..277deb9 100644
--- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
@@ -22,6 +22,7 @@ package org.apache.druid.segment;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.MutableBitmap;
import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
@@ -236,6 +237,7 @@ public interface DimensionIndexer
IncrementalIndex.DimensionDesc desc
);
+ ColumnCapabilities getColumnCapabilities();
/**
* Compares the row values for this DimensionIndexer's dimension from a Row key.
*
diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java
index b802f75..677ed41 100644
--- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java
@@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
@@ -38,6 +41,9 @@ public class DoubleDimensionIndexer implements DimensionIndexer<Double, Double,
{
public static final Comparator<Double> DOUBLE_COMPARATOR = Comparators.naturalNullsFirst();
+ private final ColumnCapabilitiesImpl capabilities =
+ ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE);
+
@Override
public Double processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
{
@@ -90,6 +96,12 @@ public class DoubleDimensionIndexer implements DimensionIndexer<Double, Double,
}
@Override
+ public ColumnCapabilities getColumnCapabilities()
+ {
+ return capabilities;
+ }
+
+ @Override
public DimensionSelector makeDimensionSelector(
DimensionSpec spec,
IncrementalIndexRowHolder currEntry,
diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java
index dce58a2..2db5b59 100644
--- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java
@@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
@@ -38,6 +41,9 @@ public class FloatDimensionIndexer implements DimensionIndexer<Float, Float, Flo
{
public static final Comparator<Float> FLOAT_COMPARATOR = Comparators.naturalNullsFirst();
+ private final ColumnCapabilitiesImpl capabilities =
+ ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT);
+
@Override
public Float processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
{
@@ -91,6 +97,12 @@ public class FloatDimensionIndexer implements DimensionIndexer<Float, Float, Flo
}
@Override
+ public ColumnCapabilities getColumnCapabilities()
+ {
+ return capabilities;
+ }
+
+ @Override
public DimensionSelector makeDimensionSelector(
DimensionSpec spec,
IncrementalIndexRowHolder currEntry,
diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
index 066b8dc..19ce745 100644
--- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
+++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
@@ -88,6 +88,63 @@ public class IndexMergerV9 implements IndexMerger
{
private static final Logger log = new Logger(IndexMergerV9.class);
+ // merge logic for the state capabilities will be in after incremental index is persisted
+ public static final ColumnCapabilities.CoercionLogic DIMENSION_CAPABILITY_MERGE_LOGIC =
+ new ColumnCapabilities.CoercionLogic()
+ {
+ @Override
+ public boolean dictionaryEncoded()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean dictionaryValuesSorted()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean dictionaryValuesUnique()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean multipleValues()
+ {
+ return false;
+ }
+ };
+
+ public static final ColumnCapabilities.CoercionLogic METRIC_CAPABILITY_MERGE_LOGIC =
+ new ColumnCapabilities.CoercionLogic()
+ {
+ @Override
+ public boolean dictionaryEncoded()
+ {
+ return false;
+ }
+
+ @Override
+ public boolean dictionaryValuesSorted()
+ {
+ return false;
+ }
+
+ @Override
+ public boolean dictionaryValuesUnique()
+ {
+ return false;
+ }
+
+ @Override
+ public boolean multipleValues()
+ {
+ return false;
+ }
+ };
+
private final ObjectMapper mapper;
private final IndexIO indexIO;
private final SegmentWriteOutMediumFactory defaultSegmentWriteOutMediumFactory;
@@ -724,14 +781,14 @@ public class IndexMergerV9 implements IndexMerger
for (String dimension : adapter.getDimensionNames()) {
ColumnCapabilities capabilities = adapter.getCapabilities(dimension);
capabilitiesMap.compute(dimension, (d, existingCapabilities) ->
- ColumnCapabilitiesImpl.snapshot(capabilities)
- .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities)));
+ ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, DIMENSION_CAPABILITY_MERGE_LOGIC)
+ );
}
for (String metric : adapter.getMetricNames()) {
ColumnCapabilities capabilities = adapter.getCapabilities(metric);
capabilitiesMap.compute(metric, (m, existingCapabilities) ->
- ColumnCapabilitiesImpl.snapshot(capabilities)
- .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities)));
+ ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, METRIC_CAPABILITY_MERGE_LOGIC)
+ );
metricsValueTypes.put(metric, capabilities.getType());
metricTypeNames.put(metric, adapter.getMetricType(metric));
}
@@ -1011,7 +1068,10 @@ public class IndexMergerV9 implements IndexMerger
{
Map<String, DimensionHandler> handlers = new LinkedHashMap<>();
for (int i = 0; i < mergedDimensions.size(); i++) {
- ColumnCapabilities capabilities = dimCapabilities.get(i);
+ ColumnCapabilities capabilities = ColumnCapabilitiesImpl.snapshot(
+ dimCapabilities.get(i),
+ DIMENSION_CAPABILITY_MERGE_LOGIC
+ );
String dimName = mergedDimensions.get(i);
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null);
handlers.put(dimName, handler);
diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java
index f2a9127..2664055 100644
--- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java
@@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
@@ -38,6 +41,10 @@ public class LongDimensionIndexer implements DimensionIndexer<Long, Long, Long>
{
public static final Comparator LONG_COMPARATOR = Comparators.<Long>naturalNullsFirst();
+ private final ColumnCapabilitiesImpl capabilities =
+ ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG);
+
+
@Override
public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
{
@@ -91,6 +98,12 @@ public class LongDimensionIndexer implements DimensionIndexer<Long, Long, Long>
}
@Override
+ public ColumnCapabilities getColumnCapabilities()
+ {
+ return capabilities;
+ }
+
+ @Override
public DimensionSelector makeDimensionSelector(
DimensionSpec spec,
IncrementalIndexRowHolder currEntry,
diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
index 65ff7f3..b7fdad1 100644
--- a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
+++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
@@ -98,12 +98,14 @@ public class StringDimensionHandler implements DimensionHandler<Integer, int[],
private final String dimensionName;
private final MultiValueHandling multiValueHandling;
private final boolean hasBitmapIndexes;
+ private final boolean hasSpatialIndexes;
- public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes)
+ public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes)
{
this.dimensionName = dimensionName;
this.multiValueHandling = multiValueHandling;
this.hasBitmapIndexes = hasBitmapIndexes;
+ this.hasSpatialIndexes = hasSpatialIndexes;
}
@Override
@@ -139,7 +141,7 @@ public class StringDimensionHandler implements DimensionHandler<Integer, int[],
@Override
public DimensionIndexer<Integer, int[], String> makeIndexer()
{
- return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes);
+ return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes);
}
@Override
diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index c0200e1..bca0a5c 100644
--- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -40,6 +40,9 @@ import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.ArrayBasedIndexedInts;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.data.IndexedInts;
@@ -74,7 +77,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
private String minValue = null;
@Nullable
private String maxValue = null;
- private int idForNull = ABSENT_VALUE_ID;
+ private volatile int idForNull = ABSENT_VALUE_ID;
private final Object2IntMap<String> valueToId = new Object2IntOpenHashMap<>();
@@ -233,17 +236,19 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
private final DimensionDictionary dimLookup;
private final MultiValueHandling multiValueHandling;
private final boolean hasBitmapIndexes;
+ private final boolean hasSpatialIndexes;
private volatile boolean hasMultipleValues = false;
private volatile boolean isSparse = false;
@Nullable
private SortedDimensionDictionary sortedLookup;
- public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes)
+ public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes)
{
this.dimLookup = new DimensionDictionary();
this.multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
this.hasBitmapIndexes = hasBitmapIndexes;
+ this.hasSpatialIndexes = hasSpatialIndexes;
}
@Override
@@ -400,6 +405,17 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
return dimLookup.size();
}
+ /**
+ * returns true if all values are encoded in {@link #dimLookup}
+ */
+ private boolean dictionaryEncodesAllValues()
+ {
+ // name lookup is possible in advance if we explicitly process a value for every row, or if we've encountered an
+ // actual null value and it is present in our dictionary. otherwise the dictionary will be missing ids for implicit
+ // null values
+ return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID;
+ }
+
@Override
public int compareUnsortedEncodedKeyComponents(int[] lhs, int[] rhs)
{
@@ -457,6 +473,37 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
}
@Override
+ public ColumnCapabilities getColumnCapabilities()
+ {
+ ColumnCapabilitiesImpl capabilites = new ColumnCapabilitiesImpl().setType(ValueType.STRING)
+ .setHasBitmapIndexes(hasBitmapIndexes)
+ .setHasSpatialIndexes(hasSpatialIndexes)
+ .setDictionaryValuesUnique(true)
+ .setDictionaryValuesSorted(false);
+
+ // Strings are opportunistically multi-valued, but the capabilities are initialized as 'unknown', since a
+ // multi-valued row might be processed at any point during ingestion.
+ // We only explicitly set multiple values if we are certain that there are multiple values, otherwise, a race
+ // condition might occur where this indexer might process a multi-valued row in the period between obtaining the
+ // capabilities, and actually processing the rows with a selector. Leaving as unknown allows the caller to decide
+ // how to handle this.
+ if (hasMultipleValues) {
+ capabilites.setHasMultipleValues(true);
+ }
+ // Likewise, only set dictionaryEncoded if explicitly if true for a similar reason as multi-valued handling. The
+ // dictionary is populated as rows are processed, but there might be implicit default values not accounted for in
+ // the dictionary yet. We can be certain that the dictionary has an entry for every value if either of
+ // a) we have already processed an explitic default (null) valued row for this column
+ // b) the processing was not 'sparse', meaning that this indexer has processed an explict value for every row
+ // is true.
+ final boolean allValuesEncoded = dictionaryEncodesAllValues();
+ if (allValuesEncoded) {
+ capabilites.setDictionaryEncoded(true);
+ }
+ return capabilites;
+ }
+
+ @Override
public DimensionSelector makeDimensionSelector(
final DimensionSpec spec,
final IncrementalIndexRowHolder currEntry,
@@ -630,9 +677,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
@Override
public boolean nameLookupPossibleInAdvance()
{
- // name lookup is possible in advance if we got a value for every row (setSparseIndexed was not called on this
- // column) or we've encountered an actual null value and it is present in our dictionary
- return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID;
+ return dictionaryEncodesAllValues();
}
@Nullable
@@ -696,6 +741,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
return makeDimensionSelector(DefaultDimensionSpec.of(desc.getName()), currEntry, desc);
}
+
@Nullable
@Override
public Object convertUnsortedEncodedKeyComponentToActualList(int[] key)
diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java
index a9af25b..ee252aa 100644
--- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java
+++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java
@@ -26,17 +26,59 @@ import org.apache.druid.java.util.common.StringUtils;
import javax.annotation.Nullable;
/**
+ * This interface is used to expose information about columns that is interesting to know for all matters dealing with
+ * reading from columns, including query planning and optimization, creating readers to merge segments at ingestion
+ * time, and probably nearly anything else you can imagine.
*/
public interface ColumnCapabilities
{
+ /**
+ * Column type, good to know so caller can know what to expect and which optimal selector to use
+ */
ValueType getType();
- boolean isDictionaryEncoded();
+
+ /**
+ * Is the column dictionary encoded? If so, a DimensionDictionarySelector may be used instead of using a value
+ * selector, allowing algorithms to operate on primitive integer dictionary ids rather than the looked up dictionary
+ * values
+ */
+ Capable isDictionaryEncoded();
+
+ /**
+ * If the column is dictionary encoded, are those values sorted? Useful to know for optimizations that can defer
+ * looking up values and allowing sorting with the dictionary ids directly
+ */
Capable areDictionaryValuesSorted();
+
+ /**
+ * If the column is dictionary encoded, is there a 1:1 mapping of dictionary ids to values? If this is true, it
+ * unlocks optimizations such as allowing for things like grouping directly on dictionary ids and deferred value
+ * lookup
+ */
Capable areDictionaryValuesUnique();
- boolean isRunLengthEncoded();
+
+ /**
+ * String columns are sneaky, and might have multiple values, this is to allow callers to know and appropriately
+ * prepare themselves
+ */
+ Capable hasMultipleValues();
+
+ /**
+ * Does the column have an inverted index bitmap for each value? If so, these may be employed to 'pre-filter' the
+ * column by examining if the values match the filter and intersecting the bitmaps, to avoid having to scan and
+ * evaluate if every row matches the filter
+ */
boolean hasBitmapIndexes();
+
+ /**
+ * Does the column have spatial indexes available to allow use with spatial filtering?
+ */
boolean hasSpatialIndexes();
- Capable hasMultipleValues();
+
+ /**
+ * All Druid primitive columns support filtering, maybe with or without indexes, but by default complex columns
+ * do not support direct filtering, unless provided by through a custom implementation.
+ */
boolean isFilterable();
enum Capable
@@ -55,6 +97,11 @@ public interface ColumnCapabilities
return isTrue() || isUnknown();
}
+ public boolean isFalse()
+ {
+ return this == FALSE;
+ }
+
public boolean isUnknown()
{
return this == UNKNOWN;
@@ -105,4 +152,36 @@ public interface ColumnCapabilities
return StringUtils.toLowerCase(super.toString());
}
}
+
+ /**
+ * This interface define the shape of a mechnism to allow for bespoke coercion of {@link Capable#UNKNOWN} into
+ * {@link Capable#TRUE} or {@link Capable#FALSE} for each {@link Capable} of a {@link ColumnCapabilities}, as is
+ * appropriate for the situation of the caller.
+ */
+ interface CoercionLogic
+ {
+ /**
+ * If {@link ColumnCapabilities#isDictionaryEncoded()} is {@link Capable#UNKNOWN}, define if it should be treated
+ * as true or false.
+ */
+ boolean dictionaryEncoded();
+
+ /**
+ * If {@link ColumnCapabilities#areDictionaryValuesSorted()} ()} is {@link Capable#UNKNOWN}, define if it should be treated
+ * as true or false.
+ */
+ boolean dictionaryValuesSorted();
+
+ /**
+ * If {@link ColumnCapabilities#areDictionaryValuesUnique()} ()} is {@link Capable#UNKNOWN}, define if it should be treated
+ * as true or false.
+ */
+ boolean dictionaryValuesUnique();
+
+ /**
+ * If {@link ColumnCapabilities#hasMultipleValues()} is {@link Capable#UNKNOWN}, define if it should be treated
+ * as true or false.
+ */
+ boolean multipleValues();
+ }
}
diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
index 9ddbd04..9473efd 100644
--- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
+++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
@@ -21,6 +21,7 @@ package org.apache.druid.segment.column;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonSetter;
import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.ISE;
@@ -37,7 +38,6 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
if (other != null) {
capabilities.type = other.getType();
capabilities.dictionaryEncoded = other.isDictionaryEncoded();
- capabilities.runLengthEncoded = other.isRunLengthEncoded();
capabilities.hasInvertedIndexes = other.hasBitmapIndexes();
capabilities.hasSpatialIndexes = other.hasSpatialIndexes();
capabilities.hasMultipleValues = other.hasMultipleValues();
@@ -49,30 +49,59 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
}
/**
- * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to
- * {@link Capable#FALSE}, in order to present a snapshot of the state of the this column
+ * Copy a {@link ColumnCapabilities} and coerce all {@link ColumnCapabilities.Capable#UNKNOWN} to
+ * {@link ColumnCapabilities.Capable#TRUE} or {@link ColumnCapabilities.Capable#FALSE} as specified by
+ * {@link ColumnCapabilities.CoercionLogic}
*/
@Nullable
- public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities)
+ public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, CoercionLogic coerce)
{
- return snapshot(capabilities, false);
+ if (capabilities == null) {
+ return null;
+ }
+ ColumnCapabilitiesImpl copy = copyOf(capabilities);
+ copy.dictionaryEncoded = copy.dictionaryEncoded.coerceUnknownToBoolean(coerce.dictionaryEncoded());
+ copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(coerce.dictionaryValuesSorted());
+ copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(coerce.dictionaryValuesUnique());
+ copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(coerce.multipleValues());
+ return copy;
}
/**
- * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to
- * {@link Capable#FALSE} or {@link Capable#TRUE}, in order to present a snapshot of the state of the this column
+ * Snapshots a pair of capabilities and then merges them
*/
@Nullable
- public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, boolean unknownIsTrue)
- {
- if (capabilities == null) {
- return null;
+ public static ColumnCapabilitiesImpl merge(
+ @Nullable final ColumnCapabilities capabilities,
+ @Nullable final ColumnCapabilities other,
+ CoercionLogic coercionLogic
+ )
+ {
+ ColumnCapabilitiesImpl merged = snapshot(capabilities, coercionLogic);
+ ColumnCapabilitiesImpl otherSnapshot = snapshot(other, coercionLogic);
+ if (merged == null) {
+ return otherSnapshot;
+ } else if (otherSnapshot == null) {
+ return merged;
}
- ColumnCapabilitiesImpl copy = copyOf(capabilities);
- copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(unknownIsTrue);
- copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(unknownIsTrue);
- copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(unknownIsTrue);
- return copy;
+
+ if (merged.type == null) {
+ merged.type = other.getType();
+ }
+
+ if (!merged.type.equals(otherSnapshot.getType())) {
+ throw new ISE("Cannot merge columns of type[%s] and [%s]", merged.type, otherSnapshot.getType());
+ }
+
+ merged.dictionaryEncoded = merged.dictionaryEncoded.or(otherSnapshot.isDictionaryEncoded());
+ merged.hasMultipleValues = merged.hasMultipleValues.or(otherSnapshot.hasMultipleValues());
+ merged.dictionaryValuesSorted = merged.dictionaryValuesSorted.and(otherSnapshot.areDictionaryValuesSorted());
+ merged.dictionaryValuesUnique = merged.dictionaryValuesUnique.and(otherSnapshot.areDictionaryValuesUnique());
+ merged.hasInvertedIndexes |= otherSnapshot.hasBitmapIndexes();
+ merged.hasSpatialIndexes |= otherSnapshot.hasSpatialIndexes();
+ merged.filterable &= otherSnapshot.isFilterable();
+
+ return merged;
}
@@ -93,10 +122,9 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
@Nullable
private ValueType type = null;
- private boolean dictionaryEncoded = false;
- private boolean runLengthEncoded = false;
private boolean hasInvertedIndexes = false;
private boolean hasSpatialIndexes = false;
+ private Capable dictionaryEncoded = Capable.UNKNOWN;
private Capable hasMultipleValues = Capable.UNKNOWN;
// These capabilities are computed at query time and not persisted in the segment files.
@@ -121,15 +149,16 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
}
@Override
- @JsonProperty
- public boolean isDictionaryEncoded()
+ @JsonProperty("dictionaryEncoded")
+ public Capable isDictionaryEncoded()
{
return dictionaryEncoded;
}
+ @JsonSetter("dictionaryEncoded")
public ColumnCapabilitiesImpl setDictionaryEncoded(boolean dictionaryEncoded)
{
- this.dictionaryEncoded = dictionaryEncoded;
+ this.dictionaryEncoded = Capable.of(dictionaryEncoded);
return this;
}
@@ -158,13 +187,6 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
}
@Override
- @JsonProperty
- public boolean isRunLengthEncoded()
- {
- return runLengthEncoded;
- }
-
- @Override
@JsonProperty("hasBitmapIndexes")
public boolean hasBitmapIndexes()
{
@@ -218,30 +240,4 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
this.filterable = filterable;
return this;
}
-
- public ColumnCapabilities merge(@Nullable ColumnCapabilities other)
- {
- if (other == null) {
- return this;
- }
-
- if (type == null) {
- type = other.getType();
- }
-
- if (!type.equals(other.getType())) {
- throw new ISE("Cannot merge columns of type[%s] and [%s]", type, other.getType());
- }
-
- this.dictionaryEncoded |= other.isDictionaryEncoded();
- this.runLengthEncoded |= other.isRunLengthEncoded();
- this.hasInvertedIndexes |= other.hasBitmapIndexes();
- this.hasSpatialIndexes |= other.hasSpatialIndexes();
- this.filterable &= other.isFilterable();
- this.hasMultipleValues = this.hasMultipleValues.or(other.hasMultipleValues());
- this.dictionaryValuesSorted = this.dictionaryValuesSorted.and(other.areDictionaryValuesSorted());
- this.dictionaryValuesUnique = this.dictionaryValuesUnique.and(other.areDictionaryValuesUnique());
-
- return this;
- }
}
diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
index 0baa594..f11e7a7 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
@@ -115,7 +115,7 @@ public class ExpressionFilter implements Filter
// multiple values. The lack of multiple values is important because expression filters treat multi-value
// arrays as nulls, which doesn't permit index based filtering.
final String column = Iterables.getOnlyElement(requiredBindings.get());
- return selector.getBitmapIndex(column) != null && !selector.hasMultipleValues(column).isMaybeTrue();
+ return selector.getBitmapIndex(column) != null && selector.hasMultipleValues(column).isFalse();
} else {
// Multi-column expression.
return false;
diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
index 9cbecba..91d3409 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
@@ -414,7 +414,7 @@ public class Filters
if (filter.supportsBitmapIndex(indexSelector)) {
final ColumnHolder columnHolder = columnSelector.getColumnHolder(dimension);
if (columnHolder != null) {
- return !columnHolder.getCapabilities().hasMultipleValues().isMaybeTrue();
+ return columnHolder.getCapabilities().hasMultipleValues().isFalse();
}
}
return false;
diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java
index 649aea9..bb4377f 100644
--- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java
+++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java
@@ -59,6 +59,7 @@ import org.apache.druid.segment.DimensionIndexer;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.DoubleColumnSelector;
import org.apache.druid.segment.FloatColumnSelector;
+import org.apache.druid.segment.IndexMergerV9;
import org.apache.druid.segment.LongColumnSelector;
import org.apache.druid.segment.Metadata;
import org.apache.druid.segment.NilColumnValueSelector;
@@ -249,7 +250,8 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
private final Map<String, DimensionDesc> dimensionDescs;
private final List<DimensionDesc> dimensionDescsList;
- private final Map<String, ColumnCapabilitiesImpl> columnCapabilities;
+ // dimension capabilities are provided by the indexers
+ private final Map<String, ColumnCapabilities> timeAndMetricsColumnCapabilities;
private final AtomicInteger numEntries = new AtomicInteger();
private final AtomicLong bytesInMemory = new AtomicLong();
@@ -287,7 +289,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
this.deserializeComplexMetrics = deserializeComplexMetrics;
this.reportParseExceptions = reportParseExceptions;
- this.columnCapabilities = new HashMap<>();
+ this.timeAndMetricsColumnCapabilities = new HashMap<>();
this.metadata = new Metadata(
null,
getCombiningAggregators(metrics),
@@ -302,7 +304,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
for (AggregatorFactory metric : metrics) {
MetricDesc metricDesc = new MetricDesc(metricDescs.size(), metric);
metricDescs.put(metricDesc.getName(), metricDesc);
- columnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities());
+ timeAndMetricsColumnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities());
}
DimensionsSpec dimensionsSpec = incrementalIndexSchema.getDimensionsSpec();
@@ -312,24 +314,22 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
for (DimensionSchema dimSchema : dimensionsSpec.getDimensions()) {
ValueType type = TYPE_MAP.get(dimSchema.getValueType());
String dimName = dimSchema.getName();
- ColumnCapabilitiesImpl capabilities = makeCapabilitiesFromValueType(type);
+ ColumnCapabilitiesImpl capabilities = makeDefaultCapabilitiesFromValueType(type);
capabilities.setHasBitmapIndexes(dimSchema.hasBitmapIndex());
if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) {
capabilities.setHasSpatialIndexes(true);
- } else {
- DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(
- dimName,
- capabilities,
- dimSchema.getMultiValueHandling()
- );
- addNewDimension(dimName, capabilities, handler);
}
- columnCapabilities.put(dimName, capabilities);
+ DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(
+ dimName,
+ capabilities,
+ dimSchema.getMultiValueHandling()
+ );
+ addNewDimension(dimName, handler);
}
//__time capabilities
- columnCapabilities.put(
+ timeAndMetricsColumnCapabilities.put(
ColumnHolder.TIME_COLUMN_NAME,
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG)
);
@@ -589,9 +589,13 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
return row;
}
- public Map<String, ColumnCapabilitiesImpl> getColumnCapabilities()
+ public Map<String, ColumnCapabilities> getColumnCapabilities()
{
- return columnCapabilities;
+ ImmutableMap.Builder<String, ColumnCapabilities> builder =
+ ImmutableMap.<String, ColumnCapabilities>builder().putAll(timeAndMetricsColumnCapabilities);
+
+ dimensionDescs.forEach((dimension, desc) -> builder.put(dimension, desc.getCapabilities()));
+ return builder.build();
}
/**
@@ -658,23 +662,22 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
continue;
}
boolean wasNewDim = false;
- ColumnCapabilitiesImpl capabilities;
DimensionDesc desc = dimensionDescs.get(dimension);
if (desc != null) {
- capabilities = desc.getCapabilities();
absentDimensions.remove(dimension);
} else {
wasNewDim = true;
- capabilities = columnCapabilities.get(dimension);
- if (capabilities == null) {
- // For schemaless type discovery, assume everything is a String for now, can change later.
- capabilities = makeCapabilitiesFromValueType(ValueType.STRING);
- columnCapabilities.put(dimension, capabilities);
- }
- DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null);
- desc = addNewDimension(dimension, capabilities, handler);
+ desc = addNewDimension(
+ dimension,
+ DimensionHandlerUtils.getHandlerFromCapabilities(
+ dimension,
+ // for schemaless type discovery, everything is a String. this should probably try to autodetect
+ // based on the value to use a better handler
+ makeDefaultCapabilitiesFromValueType(ValueType.STRING),
+ null
+ )
+ );
}
- DimensionHandler handler = desc.getHandler();
DimensionIndexer indexer = desc.getIndexer();
Object dimsKey = null;
try {
@@ -684,13 +687,6 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
parseExceptionMessages.add(pe.getMessage());
}
dimsKeySize += indexer.estimateEncodedKeyComponentSize(dimsKey);
- // Set column capabilities as data is coming in
- if (!capabilities.hasMultipleValues().isTrue() &&
- dimsKey != null &&
- handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) {
- capabilities.setHasMultipleValues(true);
- }
-
if (wasNewDim) {
// unless this is the first row we are processing, all newly discovered columns will be sparse
if (maxIngestedEventTime != null) {
@@ -928,7 +924,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
}
}
- private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type)
+ private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType type)
{
if (type == ValueType.STRING) {
// we start out as not having multiple values, but this might change as we encounter them
@@ -949,7 +945,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
*/
public void loadDimensionIterable(
Iterable<String> oldDimensionOrder,
- Map<String, ColumnCapabilitiesImpl> oldColumnCapabilities
+ Map<String, ColumnCapabilities> oldColumnCapabilities
)
{
synchronized (dimensionDescs) {
@@ -958,19 +954,21 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
}
for (String dim : oldDimensionOrder) {
if (dimensionDescs.get(dim) == null) {
- ColumnCapabilitiesImpl capabilities = oldColumnCapabilities.get(dim);
- columnCapabilities.put(dim, capabilities);
+ ColumnCapabilitiesImpl capabilities = ColumnCapabilitiesImpl.snapshot(
+ oldColumnCapabilities.get(dim),
+ IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC
+ );
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null);
- addNewDimension(dim, capabilities, handler);
+ addNewDimension(dim, handler);
}
}
}
}
@GuardedBy("dimensionDescs")
- private DimensionDesc addNewDimension(String dim, ColumnCapabilitiesImpl capabilities, DimensionHandler handler)
+ private DimensionDesc addNewDimension(String dim, DimensionHandler handler)
{
- DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, capabilities, handler);
+ DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, handler);
dimensionDescs.put(dim, desc);
dimensionDescsList.add(desc);
return desc;
@@ -998,7 +996,10 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
@Nullable
public ColumnCapabilities getCapabilities(String column)
{
- return columnCapabilities.get(column);
+ if (dimensionDescs.containsKey(column)) {
+ return dimensionDescs.get(column).getCapabilities();
+ }
+ return timeAndMetricsColumnCapabilities.get(column);
}
public Metadata getMetadata()
@@ -1080,15 +1081,13 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
{
private final int index;
private final String name;
- private final ColumnCapabilitiesImpl capabilities;
private final DimensionHandler handler;
private final DimensionIndexer indexer;
- public DimensionDesc(int index, String name, ColumnCapabilitiesImpl capabilities, DimensionHandler handler)
+ public DimensionDesc(int index, String name, DimensionHandler handler)
{
this.index = index;
this.name = name;
- this.capabilities = capabilities;
this.handler = handler;
this.indexer = handler.makeIndexer();
}
@@ -1103,9 +1102,9 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
return name;
}
- public ColumnCapabilitiesImpl getCapabilities()
+ public ColumnCapabilities getCapabilities()
{
- return capabilities;
+ return indexer.getColumnCapabilities();
}
public DimensionHandler getHandler()
@@ -1124,7 +1123,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
private final int index;
private final String name;
private final String type;
- private final ColumnCapabilitiesImpl capabilities;
+ private final ColumnCapabilities capabilities;
public MetricDesc(int index, AggregatorFactory factory)
{
@@ -1163,7 +1162,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
return type;
}
- public ColumnCapabilitiesImpl getCapabilities()
+ public ColumnCapabilities getCapabilities()
{
return capabilities;
}
diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java
index 8e8520d..9beb168 100644
--- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java
+++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java
@@ -52,6 +52,62 @@ import java.util.Iterator;
*/
public class IncrementalIndexStorageAdapter implements StorageAdapter
{
+ private static final ColumnCapabilities.CoercionLogic STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC =
+ new ColumnCapabilities.CoercionLogic()
+ {
+ @Override
+ public boolean dictionaryEncoded()
+ {
+ return false;
+ }
+
+ @Override
+ public boolean dictionaryValuesSorted()
+ {
+ return false;
+ }
+
+ @Override
+ public boolean dictionaryValuesUnique()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean multipleValues()
+ {
+ return true;
+ }
+ };
+
+ private static final ColumnCapabilities.CoercionLogic SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC =
+ new ColumnCapabilities.CoercionLogic()
+ {
+ @Override
+ public boolean dictionaryEncoded()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean dictionaryValuesSorted()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean dictionaryValuesUnique()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean multipleValues()
+ {
+ return false;
+ }
+ };
+
final IncrementalIndex<?> index;
public IncrementalIndexStorageAdapter(IncrementalIndex<?> index)
@@ -154,7 +210,7 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter
// to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of
// multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could
// be removed.
- return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true);
+ return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC);
}
/**
@@ -165,7 +221,10 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter
*/
public ColumnCapabilities getSnapshotColumnCapabilities(String column)
{
- return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column));
+ return ColumnCapabilitiesImpl.snapshot(
+ index.getCapabilities(column),
+ SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC
+ );
}
@Override
diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java
index 269ac38..48c56c9 100644
--- a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java
+++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java
@@ -83,9 +83,9 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe
spec -> {
final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
if (holder == null
- || !holder.getCapabilities().isDictionaryEncoded()
+ || holder.getCapabilities().isDictionaryEncoded().isFalse()
|| holder.getCapabilities().getType() != ValueType.STRING
- || !holder.getCapabilities().hasMultipleValues().isMaybeTrue()) {
+ || holder.getCapabilities().hasMultipleValues().isFalse()) {
throw new ISE(
"Column[%s] is not a multi-value string column, do not ask for a multi-value selector",
spec.getDimension()
@@ -119,7 +119,7 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe
spec -> {
final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
if (holder == null
- || !holder.getCapabilities().isDictionaryEncoded()
+ || !holder.getCapabilities().isDictionaryEncoded().isTrue()
|| holder.getCapabilities().getType() != ValueType.STRING) {
// Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls.
return NilVectorSelector.create(offset);
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
index 5ab4e46..17c6a3f 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
@@ -153,8 +153,8 @@ public class ExpressionSelectors
);
} else if (capabilities != null
&& capabilities.getType() == ValueType.STRING
- && capabilities.isDictionaryEncoded()
- && !capabilities.hasMultipleValues().isMaybeTrue()
+ && capabilities.isDictionaryEncoded().isTrue()
+ && capabilities.hasMultipleValues().isFalse()
&& exprDetails.getArrayBindings().isEmpty()) {
// Optimization for expressions that hit one scalar string column and nothing else.
return new SingleStringInputCachingExpressionColumnValueSelector(
@@ -225,7 +225,7 @@ public class ExpressionSelectors
// not treating it as an array and not wanting to output an array
if (capabilities != null
&& capabilities.getType() == ValueType.STRING
- && capabilities.isDictionaryEncoded()
+ && capabilities.isDictionaryEncoded().isTrue()
&& !capabilities.hasMultipleValues().isUnknown()
&& !exprDetails.hasInputArrays()
&& !exprDetails.isOutputArray()
diff --git a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java
index 3ca72aa..96167fd 100644
--- a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java
+++ b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java
@@ -138,7 +138,7 @@ public class LookupSegmentTest
// reporting complete single-valued capabilities. It would be good to change this in the future, so query engines
// running on top of lookups can take advantage of singly-valued optimizations.
Assert.assertTrue(capabilities.hasMultipleValues().isUnknown());
- Assert.assertFalse(capabilities.isDictionaryEncoded());
+ Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue());
}
@Test
@@ -151,7 +151,7 @@ public class LookupSegmentTest
// running on top of lookups can take advantage of singly-valued optimizations.
Assert.assertEquals(ValueType.STRING, capabilities.getType());
Assert.assertTrue(capabilities.hasMultipleValues().isUnknown());
- Assert.assertFalse(capabilities.isDictionaryEncoded());
+ Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue());
}
@Test
diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java
index b253614..86e8503 100644
--- a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java
+++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java
@@ -45,7 +45,6 @@ import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
-import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.StringDictionaryEncodedColumn;
@@ -2012,32 +2011,6 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest
Assert.assertEquals(-1, dictIdSeeker.seek(5));
}
- @Test(expected = IllegalArgumentException.class)
- public void testCloser() throws Exception
- {
- final long timestamp = System.currentTimeMillis();
- IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
- IncrementalIndexTest.populateIndex(timestamp, toPersist);
- ColumnCapabilitiesImpl capabilities = (ColumnCapabilitiesImpl) toPersist.getCapabilities("dim1");
- capabilities.setHasSpatialIndexes(true);
-
- final File tempDir = temporaryFolder.newFolder();
- final File v8TmpDir = new File(tempDir, "v8-tmp");
- final File v9TmpDir = new File(tempDir, "v9-tmp");
-
- try {
- indexMerger.persist(toPersist, tempDir, indexSpec, null);
- }
- finally {
- if (v8TmpDir.exists()) {
- Assert.fail("v8-tmp dir not clean.");
- }
- if (v9TmpDir.exists()) {
- Assert.fail("v9-tmp dir not clean.");
- }
- }
- }
-
@Test
public void testMultiValueHandling() throws Exception
{
diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java
index c7783e9..57689a4 100644
--- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java
@@ -150,20 +150,25 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
ColumnCapabilities caps = INC_INDEX.getCapabilities("d1");
Assert.assertEquals(ValueType.STRING, caps.getType());
Assert.assertTrue(caps.hasBitmapIndexes());
- Assert.assertTrue(caps.isDictionaryEncoded());
+ Assert.assertTrue(caps.isDictionaryEncoded().isMaybeTrue());
+ Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
// multi-value is unknown unless explicitly set to 'true'
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
// at index merge or query time we 'complete' the capabilities to take a snapshot of the current state,
// coercing any 'UNKNOWN' values to false
- Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue());
+ Assert.assertFalse(
+ ColumnCapabilitiesImpl.snapshot(caps, IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC)
+ .hasMultipleValues()
+ .isMaybeTrue()
+ );
Assert.assertFalse(caps.hasSpatialIndexes());
caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities();
Assert.assertEquals(ValueType.STRING, caps.getType());
Assert.assertTrue(caps.hasBitmapIndexes());
- Assert.assertTrue(caps.isDictionaryEncoded());
+ Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue());
Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@@ -176,7 +181,7 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
ColumnCapabilities caps = INC_INDEX.getCapabilities("d2");
Assert.assertEquals(ValueType.STRING, caps.getType());
Assert.assertTrue(caps.hasBitmapIndexes());
- Assert.assertTrue(caps.isDictionaryEncoded());
+ Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
Assert.assertTrue(caps.hasMultipleValues().isTrue());
@@ -185,7 +190,7 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
caps = MMAP_INDEX.getColumnHolder("d2").getCapabilities();
Assert.assertEquals(ValueType.STRING, caps.getType());
Assert.assertTrue(caps.hasBitmapIndexes());
- Assert.assertTrue(caps.isDictionaryEncoded());
+ Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue());
Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
Assert.assertTrue(caps.hasMultipleValues().isTrue());
@@ -199,12 +204,11 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
assertNonStringColumnCapabilities(MMAP_INDEX.getColumnHolder("m4").getCapabilities(), ValueType.COMPLEX);
}
-
private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueType valueType)
{
Assert.assertEquals(valueType, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
- Assert.assertFalse(caps.isDictionaryEncoded());
+ Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
diff --git a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java
index e12dac4..a802b81 100644
--- a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java
@@ -51,7 +51,7 @@ public class RowBasedColumnSelectorFactoryTest
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, ColumnHolder.TIME_COLUMN_NAME);
Assert.assertEquals(ValueType.LONG, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
- Assert.assertFalse(caps.isDictionaryEncoded());
+ Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@@ -65,7 +65,7 @@ public class RowBasedColumnSelectorFactoryTest
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, STRING_COLUMN_NAME);
Assert.assertEquals(ValueType.STRING, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
- Assert.assertFalse(caps.isDictionaryEncoded());
+ Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
@@ -79,7 +79,7 @@ public class RowBasedColumnSelectorFactoryTest
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, LONG_COLUMN_NAME);
Assert.assertEquals(ValueType.LONG, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
- Assert.assertFalse(caps.isDictionaryEncoded());
+ Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@@ -93,7 +93,7 @@ public class RowBasedColumnSelectorFactoryTest
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, FLOAT_COLUMN_NAME);
Assert.assertEquals(ValueType.FLOAT, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
- Assert.assertFalse(caps.isDictionaryEncoded());
+ Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@@ -107,7 +107,7 @@ public class RowBasedColumnSelectorFactoryTest
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, DOUBLE_COLUMN_NAME);
Assert.assertEquals(ValueType.DOUBLE, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
- Assert.assertFalse(caps.isDictionaryEncoded());
+ Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@@ -121,7 +121,7 @@ public class RowBasedColumnSelectorFactoryTest
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, COMPLEX_COLUMN_NAME);
Assert.assertEquals(ValueType.COMPLEX, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
- Assert.assertFalse(caps.isDictionaryEncoded());
+ Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
diff --git a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java
index e221edd..ce98506 100644
--- a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java
@@ -44,8 +44,7 @@ public class ColumnCapabilitiesImplTest
ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class);
Assert.assertEquals(ValueType.COMPLEX, cc.getType());
- Assert.assertTrue(cc.isDictionaryEncoded());
- Assert.assertFalse(cc.isRunLengthEncoded());
+ Assert.assertTrue(cc.isDictionaryEncoded().isTrue());
Assert.assertTrue(cc.hasSpatialIndexes());
Assert.assertTrue(cc.hasMultipleValues().isTrue());
Assert.assertTrue(cc.hasBitmapIndexes());
@@ -69,8 +68,7 @@ public class ColumnCapabilitiesImplTest
ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class);
Assert.assertEquals(ValueType.COMPLEX, cc.getType());
- Assert.assertTrue(cc.isDictionaryEncoded());
- Assert.assertTrue(cc.isRunLengthEncoded());
+ Assert.assertTrue(cc.isDictionaryEncoded().isTrue());
Assert.assertTrue(cc.hasSpatialIndexes());
Assert.assertTrue(cc.hasMultipleValues().isTrue());
Assert.assertTrue(cc.hasBitmapIndexes());
diff --git a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java
index 7b80bd2..6406d7a 100644
--- a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java
@@ -200,7 +200,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag
Assert.assertEquals(ValueType.STRING, capabilities.getType());
Assert.assertTrue(capabilities.hasBitmapIndexes());
- Assert.assertTrue(capabilities.isDictionaryEncoded());
+ Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue());
Assert.assertTrue(capabilities.areDictionaryValuesSorted().isTrue());
Assert.assertTrue(capabilities.areDictionaryValuesUnique().isTrue());
}
@@ -216,7 +216,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag
Assert.assertFalse(capabilities.hasBitmapIndexes());
Assert.assertFalse(capabilities.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(capabilities.areDictionaryValuesSorted().isTrue());
- Assert.assertTrue(capabilities.isDictionaryEncoded());
+ Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue());
}
@Test
diff --git a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java
index c75232c..61b5637 100644
--- a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java
@@ -137,7 +137,7 @@ public class IndexedTableJoinableTest
{
final ColumnCapabilities capabilities = target.getColumnCapabilities("str");
Assert.assertEquals(ValueType.STRING, capabilities.getType());
- Assert.assertTrue(capabilities.isDictionaryEncoded());
+ Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue());
Assert.assertFalse(capabilities.hasBitmapIndexes());
Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(capabilities.hasSpatialIndexes());
@@ -148,7 +148,7 @@ public class IndexedTableJoinableTest
{
final ColumnCapabilities capabilities = target.getColumnCapabilities("long");
Assert.assertEquals(ValueType.LONG, capabilities.getType());
- Assert.assertFalse(capabilities.isDictionaryEncoded());
+ Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue());
Assert.assertFalse(capabilities.hasBitmapIndexes());
Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(capabilities.hasSpatialIndexes());
diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java
similarity index 67%
rename from processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java
rename to processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java
index 0b7a66d..08b18c4 100644
--- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java
@@ -21,20 +21,41 @@ package org.apache.druid.segment.virtual;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.apache.druid.common.config.NullHandling;
import org.apache.druid.common.guava.SettableSupplier;
+import org.apache.druid.data.input.MapBasedInputRow;
+import org.apache.druid.data.input.impl.DimensionsSpec;
+import org.apache.druid.data.input.impl.TimestampSpec;
+import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.java.util.common.granularity.Granularities;
+import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExprMacroTable;
+import org.apache.druid.math.expr.Parser;
+import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.BaseSingleValueDimensionSelector;
import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.TestObjectColumnSelector;
+import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.incremental.IncrementalIndex;
+import org.apache.druid.segment.incremental.IncrementalIndexSchema;
+import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
+import org.apache.druid.segment.incremental.IndexSizeExceededException;
+import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
-public class ExpressionColumnValueSelectorTest
+public class ExpressionSelectorsTest extends InitializedNullHandlingTest
{
@Test
public void testSupplierFromDimensionSelector()
@@ -231,6 +252,86 @@ public class ExpressionColumnValueSelectorTest
);
}
+ @Test
+ public void testIncrementIndexStringSelector() throws IndexSizeExceededException
+ {
+ // This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
+ // DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
+ // exception trying to make an optimized string expression selector that was not appropriate to use for the
+ // underlying dimension selector.
+ // This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the
+ // conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details
+ IncrementalIndexSchema schema = new IncrementalIndexSchema(
+ 0,
+ new TimestampSpec("time", "millis", DateTimes.nowUtc()),
+ Granularities.NONE,
+ VirtualColumns.EMPTY,
+ DimensionsSpec.EMPTY,
+ new AggregatorFactory[]{new CountAggregatorFactory("count")},
+ true
+ );
+
+ IncrementalIndex index = new IncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).buildOnheap();
+ index.add(
+ new MapBasedInputRow(
+ DateTimes.nowUtc().getMillis(),
+ ImmutableList.of("x"),
+ ImmutableMap.of("x", "foo")
+ )
+ );
+ index.add(
+ new MapBasedInputRow(
+ DateTimes.nowUtc().plusMillis(1000).getMillis(),
+ ImmutableList.of("y"),
+ ImmutableMap.of("y", "foo")
+ )
+ );
+
+ IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index);
+
+ Sequence<Cursor> cursors = adapter.makeCursors(
+ null,
+ Intervals.ETERNITY,
+ VirtualColumns.EMPTY,
+ Granularities.ALL,
+ false,
+ null
+ );
+ int rowsProcessed = cursors.map(cursor -> {
+ DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector(
+ cursor.getColumnSelectorFactory(),
+ Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()),
+ null
+ );
+ DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector(
+ cursor.getColumnSelectorFactory(),
+ Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()),
+ null
+ );
+ int rowCount = 0;
+ while (!cursor.isDone()) {
+ Object x = xExprSelector.getObject();
+ Object y = yExprSelector.getObject();
+ List<String> expectedFoo = Collections.singletonList("foofoo");
+ List<String> expectedNull = NullHandling.replaceWithDefault()
+ ? Collections.singletonList("foo")
+ : Collections.singletonList(null);
+ if (rowCount == 0) {
+ Assert.assertEquals(expectedFoo, x);
+ Assert.assertEquals(expectedNull, y);
+ } else {
+ Assert.assertEquals(expectedNull, x);
+ Assert.assertEquals(expectedFoo, y);
+ }
+ rowCount++;
+ cursor.advance();
+ }
+ return rowCount;
+ }).accumulate(0, (in, acc) -> in + acc);
+
+ Assert.assertEquals(2, rowsProcessed);
+ }
+
private static DimensionSelector dimensionSelectorFromSupplier(
final Supplier<String> supplier
)
diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
index 16e090d..84bf5fd 100644
--- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
@@ -810,7 +810,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
ColumnCapabilities caps = X_PLUS_Y.capabilities("expr");
Assert.assertEquals(ValueType.FLOAT, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
- Assert.assertFalse(caps.isDictionaryEncoded());
+ Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
@@ -820,7 +820,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
caps = Z_CONCAT_X.capabilities("expr");
Assert.assertEquals(ValueType.STRING, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
- Assert.assertFalse(caps.isDictionaryEncoded());
+ Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java
index 7002ac2..2324a2a 100644
--- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java
+++ b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java
@@ -29,7 +29,7 @@ import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.ReferenceCountingSegment;
-import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexAddResult;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
@@ -377,7 +377,7 @@ public class Sink implements Iterable<FireHydrant>, Overshadowable<Sink>
FireHydrant lastHydrant = hydrants.get(numHydrants - 1);
newCount = lastHydrant.getCount() + 1;
if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) {
- Map<String, ColumnCapabilitiesImpl> oldCapabilities;
+ Map<String, ColumnCapabilities> oldCapabilities;
if (lastHydrant.hasSwapped()) {
oldCapabilities = new HashMap<>();
ReferenceCountingSegment segment = lastHydrant.getIncrementedSegment();
@@ -385,7 +385,7 @@ public class Sink implements Iterable<FireHydrant>, Overshadowable<Sink>
QueryableIndex oldIndex = segment.asQueryableIndex();
for (String dim : oldIndex.getAvailableDimensions()) {
dimOrder.add(dim);
- oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumnHolder(dim).getCapabilities());
+ oldCapabilities.put(dim, oldIndex.getColumnHolder(dim).getCapabilities());
}
}
finally {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org