You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by gi...@apache.org on 2020/08/11 18:07:38 UTC

[druid] branch master updated: fix bug with expressions on sparse string realtime columns without explicit null valued rows (#10248)

This is an automated email from the ASF dual-hosted git repository.

gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new c72f96a  fix bug with expressions on sparse string realtime columns without explicit null valued rows (#10248)
c72f96a is described below

commit c72f96a4babdf5055912bb0fb5eb2236cfe0ef23
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Tue Aug 11 11:07:17 2020 -0700

    fix bug with expressions on sparse string realtime columns without explicit null valued rows (#10248)
    
    * fix bug with realtime expressions on sparse string columns
    
    * fix test
    
    * add comment back
    
    * push capabilities for dimensions to dimension indexers since they know things
    
    * style
    
    * style
    
    * fixes
    
    * getting a bit carried away
    
    * missed one
    
    * fix it
    
    * benchmark build fix
    
    * review stuffs
    
    * javadoc and comments
    
    * add comment
    
    * more strict check
    
    * fix missed usaged of impl instead of interface
---
 .../indexing/StringDimensionIndexerBenchmark.java  |   2 +-
 .../apache/druid/indexer/IndexGeneratorJob.java    |   4 +-
 .../epinephelinae/GroupByQueryEngineV2.java        |   2 +-
 .../druid/query/metadata/SegmentAnalyzer.java      |   2 +-
 .../apache/druid/query/topn/TopNQueryEngine.java   |   2 +-
 .../druid/segment/DimensionHandlerUtils.java       |  12 +--
 .../org/apache/druid/segment/DimensionIndexer.java |   2 +
 .../druid/segment/DoubleDimensionIndexer.java      |  12 +++
 .../druid/segment/FloatDimensionIndexer.java       |  12 +++
 .../org/apache/druid/segment/IndexMergerV9.java    |  70 +++++++++++++-
 .../apache/druid/segment/LongDimensionIndexer.java |  13 +++
 .../druid/segment/StringDimensionHandler.java      |   6 +-
 .../druid/segment/StringDimensionIndexer.java      |  56 ++++++++++-
 .../druid/segment/column/ColumnCapabilities.java   |  85 ++++++++++++++++-
 .../segment/column/ColumnCapabilitiesImpl.java     | 104 ++++++++++-----------
 .../druid/segment/filter/ExpressionFilter.java     |   2 +-
 .../org/apache/druid/segment/filter/Filters.java   |   2 +-
 .../segment/incremental/IncrementalIndex.java      |  95 ++++++++++---------
 .../IncrementalIndexStorageAdapter.java            |  63 ++++++++++++-
 .../QueryableIndexVectorColumnSelectorFactory.java |   6 +-
 .../druid/segment/virtual/ExpressionSelectors.java |   6 +-
 .../druid/query/lookup/LookupSegmentTest.java      |   4 +-
 .../apache/druid/segment/IndexMergerTestBase.java  |  27 ------
 .../QueryableIndexColumnCapabilitiesTest.java      |  18 ++--
 .../segment/RowBasedColumnSelectorFactoryTest.java |  12 +--
 .../segment/column/ColumnCapabilitiesImplTest.java |   6 +-
 .../join/HashJoinSegmentStorageAdapterTest.java    |   4 +-
 .../join/table/IndexedTableJoinableTest.java       |   4 +-
 ...ectorTest.java => ExpressionSelectorsTest.java} | 103 +++++++++++++++++++-
 .../virtual/ExpressionVirtualColumnTest.java       |   4 +-
 .../druid/segment/realtime/plumber/Sink.java       |   6 +-
 31 files changed, 551 insertions(+), 195 deletions(-)

diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
index 52fc851..2e4490b 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
@@ -59,7 +59,7 @@ public class StringDimensionIndexerBenchmark
   @Setup
   public void setup()
   {
-    indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true);
+    indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true, false);
 
     for (int i = 0; i < cardinality; i++) {
       indexer.processRowValsToUnsortedEncodedKeyComponent("abcd-" + i, true);
diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java
index 5dfdcac..72bbaee 100644
--- a/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java
+++ b/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java
@@ -47,7 +47,7 @@ import org.apache.druid.query.aggregation.AggregatorFactory;
 import org.apache.druid.segment.BaseProgressIndicator;
 import org.apache.druid.segment.ProgressIndicator;
 import org.apache.druid.segment.QueryableIndex;
-import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.incremental.IncrementalIndex;
 import org.apache.druid.segment.incremental.IncrementalIndexSchema;
 import org.apache.druid.segment.indexing.TuningConfigs;
@@ -289,7 +289,7 @@ public class IndexGeneratorJob implements Jobby
       AggregatorFactory[] aggs,
       HadoopDruidIndexerConfig config,
       Iterable<String> oldDimOrder,
-      Map<String, ColumnCapabilitiesImpl> oldCapabilities
+      Map<String, ColumnCapabilities> oldCapabilities
   )
   {
     final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
index d3aaa4f..b4cda9b 100644
--- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
+++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
@@ -340,7 +340,7 @@ public class GroupByQueryEngineV2
 
               // Now check column capabilities.
               final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension());
-              return (columnCapabilities != null && !columnCapabilities.hasMultipleValues().isMaybeTrue()) ||
+              return (columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse()) ||
                      (missingMeansNonExistent && columnCapabilities == null);
             });
   }
diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
index 659b55b..15a081d 100644
--- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
+++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
@@ -227,7 +227,7 @@ public class SegmentAnalyzer
         min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0));
         max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1));
       }
-    } else if (capabilities.isDictionaryEncoded()) {
+    } else if (capabilities.isDictionaryEncoded().isTrue()) {
       // fallback if no bitmap index
       DictionaryEncodedColumn<String> theColumn = (DictionaryEncodedColumn<String>) columnHolder.getColumn();
       cardinality = theColumn.getCardinality();
diff --git a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java
index eb22dc9..daad69f 100644
--- a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java
+++ b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java
@@ -194,7 +194,7 @@ public class TopNQueryEngine
     }
     if (capabilities != null && capabilities.getType() == ValueType.STRING) {
       // string columns must use the on heap algorithm unless they have the following capabilites
-      return capabilities.isDictionaryEncoded() && capabilities.areDictionaryValuesUnique().isTrue();
+      return capabilities.isDictionaryEncoded().isTrue() && capabilities.areDictionaryValuesUnique().isTrue();
     } else {
       // non-strings are not eligible to use the pooled algorithm, and should use a heap algorithm
       return false;
diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java
index 9e5a129..f5b7e9f 100644
--- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java
+++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java
@@ -73,16 +73,16 @@ public final class DimensionHandlerUtils
   )
   {
     if (capabilities == null) {
-      return new StringDimensionHandler(dimensionName, multiValueHandling, true);
+      return new StringDimensionHandler(dimensionName, multiValueHandling, true, false);
     }
 
     multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
 
     if (capabilities.getType() == ValueType.STRING) {
-      if (!capabilities.isDictionaryEncoded()) {
+      if (!capabilities.isDictionaryEncoded().isTrue()) {
         throw new IAE("String column must have dictionary encoding.");
       }
-      return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes());
+      return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes(), capabilities.hasSpatialIndexes());
     }
 
     if (capabilities.getType() == ValueType.LONG) {
@@ -98,7 +98,7 @@ public final class DimensionHandlerUtils
     }
 
     // Return a StringDimensionHandler by default (null columns will be treated as String typed)
-    return new StringDimensionHandler(dimensionName, multiValueHandling, true);
+    return new StringDimensionHandler(dimensionName, multiValueHandling, true, false);
   }
 
   public static List<ValueType> getValueTypesFromDimensionSpecs(List<DimensionSpec> dimSpecs)
@@ -226,11 +226,11 @@ public final class DimensionHandlerUtils
       capabilities = ColumnCapabilitiesImpl.copyOf(capabilities)
                                            .setType(ValueType.STRING)
                                            .setDictionaryValuesUnique(
-                                               capabilities.isDictionaryEncoded() &&
+                                               capabilities.isDictionaryEncoded().isTrue() &&
                                                fn.getExtractionType() == ExtractionFn.ExtractionType.ONE_TO_ONE
                                            )
                                            .setDictionaryValuesSorted(
-                                               capabilities.isDictionaryEncoded() && fn.preservesOrdering()
+                                               capabilities.isDictionaryEncoded().isTrue() && fn.preservesOrdering()
                                            );
     }
 
diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
index cf7631d..277deb9 100644
--- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
@@ -22,6 +22,7 @@ package org.apache.druid.segment;
 import org.apache.druid.collections.bitmap.BitmapFactory;
 import org.apache.druid.collections.bitmap.MutableBitmap;
 import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.data.CloseableIndexed;
 import org.apache.druid.segment.incremental.IncrementalIndex;
 import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
@@ -236,6 +237,7 @@ public interface DimensionIndexer
       IncrementalIndex.DimensionDesc desc
   );
 
+  ColumnCapabilities getColumnCapabilities();
   /**
    * Compares the row values for this DimensionIndexer's dimension from a Row key.
    *
diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java
index b802f75..677ed41 100644
--- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java
@@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.java.util.common.guava.Comparators;
 import org.apache.druid.query.dimension.DimensionSpec;
 import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.segment.data.CloseableIndexed;
 import org.apache.druid.segment.incremental.IncrementalIndex;
 import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
@@ -38,6 +41,9 @@ public class DoubleDimensionIndexer implements DimensionIndexer<Double, Double,
 {
   public static final Comparator<Double> DOUBLE_COMPARATOR = Comparators.naturalNullsFirst();
 
+  private final ColumnCapabilitiesImpl capabilities =
+      ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE);
+
   @Override
   public Double processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
   {
@@ -90,6 +96,12 @@ public class DoubleDimensionIndexer implements DimensionIndexer<Double, Double,
   }
 
   @Override
+  public ColumnCapabilities getColumnCapabilities()
+  {
+    return capabilities;
+  }
+
+  @Override
   public DimensionSelector makeDimensionSelector(
       DimensionSpec spec,
       IncrementalIndexRowHolder currEntry,
diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java
index dce58a2..2db5b59 100644
--- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java
@@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.java.util.common.guava.Comparators;
 import org.apache.druid.query.dimension.DimensionSpec;
 import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.segment.data.CloseableIndexed;
 import org.apache.druid.segment.incremental.IncrementalIndex;
 import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
@@ -38,6 +41,9 @@ public class FloatDimensionIndexer implements DimensionIndexer<Float, Float, Flo
 {
   public static final Comparator<Float> FLOAT_COMPARATOR = Comparators.naturalNullsFirst();
 
+  private final ColumnCapabilitiesImpl capabilities =
+      ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT);
+
   @Override
   public Float processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
   {
@@ -91,6 +97,12 @@ public class FloatDimensionIndexer implements DimensionIndexer<Float, Float, Flo
   }
 
   @Override
+  public ColumnCapabilities getColumnCapabilities()
+  {
+    return capabilities;
+  }
+
+  @Override
   public DimensionSelector makeDimensionSelector(
       DimensionSpec spec,
       IncrementalIndexRowHolder currEntry,
diff --git a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
index 066b8dc..19ce745 100644
--- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
+++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
@@ -88,6 +88,63 @@ public class IndexMergerV9 implements IndexMerger
 {
   private static final Logger log = new Logger(IndexMergerV9.class);
 
+  // merge logic for the state capabilities will be in after incremental index is persisted
+  public static final ColumnCapabilities.CoercionLogic DIMENSION_CAPABILITY_MERGE_LOGIC =
+      new ColumnCapabilities.CoercionLogic()
+      {
+        @Override
+        public boolean dictionaryEncoded()
+        {
+          return true;
+        }
+
+        @Override
+        public boolean dictionaryValuesSorted()
+        {
+          return true;
+        }
+
+        @Override
+        public boolean dictionaryValuesUnique()
+        {
+          return true;
+        }
+
+        @Override
+        public boolean multipleValues()
+        {
+          return false;
+        }
+      };
+
+  public static final ColumnCapabilities.CoercionLogic METRIC_CAPABILITY_MERGE_LOGIC =
+      new ColumnCapabilities.CoercionLogic()
+      {
+        @Override
+        public boolean dictionaryEncoded()
+        {
+          return false;
+        }
+
+        @Override
+        public boolean dictionaryValuesSorted()
+        {
+          return false;
+        }
+
+        @Override
+        public boolean dictionaryValuesUnique()
+        {
+          return false;
+        }
+
+        @Override
+        public boolean multipleValues()
+        {
+          return false;
+        }
+      };
+
   private final ObjectMapper mapper;
   private final IndexIO indexIO;
   private final SegmentWriteOutMediumFactory defaultSegmentWriteOutMediumFactory;
@@ -724,14 +781,14 @@ public class IndexMergerV9 implements IndexMerger
       for (String dimension : adapter.getDimensionNames()) {
         ColumnCapabilities capabilities = adapter.getCapabilities(dimension);
         capabilitiesMap.compute(dimension, (d, existingCapabilities) ->
-            ColumnCapabilitiesImpl.snapshot(capabilities)
-                                  .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities)));
+            ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, DIMENSION_CAPABILITY_MERGE_LOGIC)
+        );
       }
       for (String metric : adapter.getMetricNames()) {
         ColumnCapabilities capabilities = adapter.getCapabilities(metric);
         capabilitiesMap.compute(metric, (m, existingCapabilities) ->
-            ColumnCapabilitiesImpl.snapshot(capabilities)
-                                  .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities)));
+            ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, METRIC_CAPABILITY_MERGE_LOGIC)
+        );
         metricsValueTypes.put(metric, capabilities.getType());
         metricTypeNames.put(metric, adapter.getMetricType(metric));
       }
@@ -1011,7 +1068,10 @@ public class IndexMergerV9 implements IndexMerger
   {
     Map<String, DimensionHandler> handlers = new LinkedHashMap<>();
     for (int i = 0; i < mergedDimensions.size(); i++) {
-      ColumnCapabilities capabilities = dimCapabilities.get(i);
+      ColumnCapabilities capabilities = ColumnCapabilitiesImpl.snapshot(
+          dimCapabilities.get(i),
+          DIMENSION_CAPABILITY_MERGE_LOGIC
+      );
       String dimName = mergedDimensions.get(i);
       DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null);
       handlers.put(dimName, handler);
diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java
index f2a9127..2664055 100644
--- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java
@@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.java.util.common.guava.Comparators;
 import org.apache.druid.query.dimension.DimensionSpec;
 import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.segment.data.CloseableIndexed;
 import org.apache.druid.segment.incremental.IncrementalIndex;
 import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
@@ -38,6 +41,10 @@ public class LongDimensionIndexer implements DimensionIndexer<Long, Long, Long>
 {
   public static final Comparator LONG_COMPARATOR = Comparators.<Long>naturalNullsFirst();
 
+  private final ColumnCapabilitiesImpl capabilities =
+      ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG);
+
+
   @Override
   public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
   {
@@ -91,6 +98,12 @@ public class LongDimensionIndexer implements DimensionIndexer<Long, Long, Long>
   }
 
   @Override
+  public ColumnCapabilities getColumnCapabilities()
+  {
+    return capabilities;
+  }
+
+  @Override
   public DimensionSelector makeDimensionSelector(
       DimensionSpec spec,
       IncrementalIndexRowHolder currEntry,
diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
index 65ff7f3..b7fdad1 100644
--- a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
+++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java
@@ -98,12 +98,14 @@ public class StringDimensionHandler implements DimensionHandler<Integer, int[],
   private final String dimensionName;
   private final MultiValueHandling multiValueHandling;
   private final boolean hasBitmapIndexes;
+  private final boolean hasSpatialIndexes;
 
-  public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes)
+  public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes)
   {
     this.dimensionName = dimensionName;
     this.multiValueHandling = multiValueHandling;
     this.hasBitmapIndexes = hasBitmapIndexes;
+    this.hasSpatialIndexes = hasSpatialIndexes;
   }
 
   @Override
@@ -139,7 +141,7 @@ public class StringDimensionHandler implements DimensionHandler<Integer, int[],
   @Override
   public DimensionIndexer<Integer, int[], String> makeIndexer()
   {
-    return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes);
+    return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes);
   }
 
   @Override
diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index c0200e1..bca0a5c 100644
--- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -40,6 +40,9 @@ import org.apache.druid.query.dimension.DimensionSpec;
 import org.apache.druid.query.extraction.ExtractionFn;
 import org.apache.druid.query.filter.ValueMatcher;
 import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.segment.data.ArrayBasedIndexedInts;
 import org.apache.druid.segment.data.CloseableIndexed;
 import org.apache.druid.segment.data.IndexedInts;
@@ -74,7 +77,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
     private String minValue = null;
     @Nullable
     private String maxValue = null;
-    private int idForNull = ABSENT_VALUE_ID;
+    private volatile int idForNull = ABSENT_VALUE_ID;
 
     private final Object2IntMap<String> valueToId = new Object2IntOpenHashMap<>();
 
@@ -233,17 +236,19 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
   private final DimensionDictionary dimLookup;
   private final MultiValueHandling multiValueHandling;
   private final boolean hasBitmapIndexes;
+  private final boolean hasSpatialIndexes;
   private volatile boolean hasMultipleValues = false;
   private volatile boolean isSparse = false;
 
   @Nullable
   private SortedDimensionDictionary sortedLookup;
 
-  public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes)
+  public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes)
   {
     this.dimLookup = new DimensionDictionary();
     this.multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
     this.hasBitmapIndexes = hasBitmapIndexes;
+    this.hasSpatialIndexes = hasSpatialIndexes;
   }
 
   @Override
@@ -400,6 +405,17 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
     return dimLookup.size();
   }
 
+  /**
+   * returns true if all values are encoded in {@link #dimLookup}
+   */
+  private boolean dictionaryEncodesAllValues()
+  {
+    // name lookup is possible in advance if we explicitly process a value for every row, or if we've encountered an
+    // actual null value and it is present in our dictionary. otherwise the dictionary will be missing ids for implicit
+    // null values
+    return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID;
+  }
+
   @Override
   public int compareUnsortedEncodedKeyComponents(int[] lhs, int[] rhs)
   {
@@ -457,6 +473,37 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
   }
 
   @Override
+  public ColumnCapabilities getColumnCapabilities()
+  {
+    ColumnCapabilitiesImpl capabilites = new ColumnCapabilitiesImpl().setType(ValueType.STRING)
+                                                                     .setHasBitmapIndexes(hasBitmapIndexes)
+                                                                     .setHasSpatialIndexes(hasSpatialIndexes)
+                                                                     .setDictionaryValuesUnique(true)
+                                                                     .setDictionaryValuesSorted(false);
+
+    // Strings are opportunistically multi-valued, but the capabilities are initialized as 'unknown', since a
+    // multi-valued row might be processed at any point during ingestion.
+    // We only explicitly set multiple values if we are certain that there are multiple values, otherwise, a race
+    // condition might occur where this indexer might process a multi-valued row in the period between obtaining the
+    // capabilities, and actually processing the rows with a selector. Leaving as unknown allows the caller to decide
+    // how to handle this.
+    if (hasMultipleValues) {
+      capabilites.setHasMultipleValues(true);
+    }
+    // Likewise, only set dictionaryEncoded if explicitly if true for a similar reason as multi-valued handling. The
+    // dictionary is populated as rows are processed, but there might be implicit default values not accounted for in
+    // the dictionary yet. We can be certain that the dictionary has an entry for every value if either of
+    //    a) we have already processed an explitic default (null) valued row for this column
+    //    b) the processing was not 'sparse', meaning that this indexer has processed an explict value for every row
+    // is true.
+    final boolean allValuesEncoded = dictionaryEncodesAllValues();
+    if (allValuesEncoded) {
+      capabilites.setDictionaryEncoded(true);
+    }
+    return capabilites;
+  }
+
+  @Override
   public DimensionSelector makeDimensionSelector(
       final DimensionSpec spec,
       final IncrementalIndexRowHolder currEntry,
@@ -630,9 +677,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
       @Override
       public boolean nameLookupPossibleInAdvance()
       {
-        // name lookup is possible in advance if we got a value for every row (setSparseIndexed was not called on this
-        // column) or we've encountered an actual null value and it is present in our dictionary
-        return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID;
+        return dictionaryEncodesAllValues();
       }
 
       @Nullable
@@ -696,6 +741,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
     return makeDimensionSelector(DefaultDimensionSpec.of(desc.getName()), currEntry, desc);
   }
 
+
   @Nullable
   @Override
   public Object convertUnsortedEncodedKeyComponentToActualList(int[] key)
diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java
index a9af25b..ee252aa 100644
--- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java
+++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java
@@ -26,17 +26,59 @@ import org.apache.druid.java.util.common.StringUtils;
 import javax.annotation.Nullable;
 
 /**
+ * This interface is used to expose information about columns that is interesting to know for all matters dealing with
+ * reading from columns, including query planning and optimization, creating readers to merge segments at ingestion
+ * time, and probably nearly anything else you can imagine.
  */
 public interface ColumnCapabilities
 {
+  /**
+   * Column type, good to know so caller can know what to expect and which optimal selector to use
+   */
   ValueType getType();
-  boolean isDictionaryEncoded();
+
+  /**
+   * Is the column dictionary encoded? If so, a DimensionDictionarySelector may be used instead of using a value
+   * selector, allowing algorithms to operate on primitive integer dictionary ids rather than the looked up dictionary
+   * values
+   */
+  Capable isDictionaryEncoded();
+
+  /**
+   * If the column is dictionary encoded, are those values sorted? Useful to know for optimizations that can defer
+   * looking up values and allowing sorting with the dictionary ids directly
+   */
   Capable areDictionaryValuesSorted();
+
+  /**
+   * If the column is dictionary encoded, is there a 1:1 mapping of dictionary ids to values? If this is true, it
+   * unlocks optimizations such as allowing for things like grouping directly on dictionary ids and deferred value
+   * lookup
+   */
   Capable areDictionaryValuesUnique();
-  boolean isRunLengthEncoded();
+
+  /**
+   * String columns are sneaky, and might have multiple values, this is to allow callers to know and appropriately
+   * prepare themselves
+   */
+  Capable hasMultipleValues();
+
+  /**
+   * Does the column have an inverted index bitmap for each value? If so, these may be employed to 'pre-filter' the
+   * column by examining if the values match the filter and intersecting the bitmaps, to avoid having to scan and
+   * evaluate if every row matches the filter
+   */
   boolean hasBitmapIndexes();
+
+  /**
+   * Does the column have spatial indexes available to allow use with spatial filtering?
+   */
   boolean hasSpatialIndexes();
-  Capable hasMultipleValues();
+
+  /**
+   * All Druid primitive columns support filtering, maybe with or without indexes, but by default complex columns
+   * do not support direct filtering, unless provided by through a custom implementation.
+   */
   boolean isFilterable();
 
   enum Capable
@@ -55,6 +97,11 @@ public interface ColumnCapabilities
       return isTrue() || isUnknown();
     }
 
+    public boolean isFalse()
+    {
+      return this == FALSE;
+    }
+
     public boolean isUnknown()
     {
       return this == UNKNOWN;
@@ -105,4 +152,36 @@ public interface ColumnCapabilities
       return StringUtils.toLowerCase(super.toString());
     }
   }
+
+  /**
+   * This interface define the shape of a mechnism to allow for bespoke coercion of {@link Capable#UNKNOWN} into
+   * {@link Capable#TRUE} or {@link Capable#FALSE} for each {@link Capable} of a {@link ColumnCapabilities}, as is
+   * appropriate for the situation of the caller.
+   */
+  interface CoercionLogic
+  {
+    /**
+     * If {@link ColumnCapabilities#isDictionaryEncoded()} is {@link Capable#UNKNOWN}, define if it should be treated
+     * as true or false.
+     */
+    boolean dictionaryEncoded();
+
+    /**
+     * If {@link ColumnCapabilities#areDictionaryValuesSorted()} ()} is {@link Capable#UNKNOWN}, define if it should be treated
+     * as true or false.
+     */
+    boolean dictionaryValuesSorted();
+
+    /**
+     * If {@link ColumnCapabilities#areDictionaryValuesUnique()} ()} is {@link Capable#UNKNOWN}, define if it should be treated
+     * as true or false.
+     */
+    boolean dictionaryValuesUnique();
+
+    /**
+     * If {@link ColumnCapabilities#hasMultipleValues()} is {@link Capable#UNKNOWN}, define if it should be treated
+     * as true or false.
+     */
+    boolean multipleValues();
+  }
 }
diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
index 9ddbd04..9473efd 100644
--- a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
+++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
@@ -21,6 +21,7 @@ package org.apache.druid.segment.column;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonSetter;
 import com.google.common.base.Preconditions;
 import org.apache.druid.java.util.common.ISE;
 
@@ -37,7 +38,6 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
     if (other != null) {
       capabilities.type = other.getType();
       capabilities.dictionaryEncoded = other.isDictionaryEncoded();
-      capabilities.runLengthEncoded = other.isRunLengthEncoded();
       capabilities.hasInvertedIndexes = other.hasBitmapIndexes();
       capabilities.hasSpatialIndexes = other.hasSpatialIndexes();
       capabilities.hasMultipleValues = other.hasMultipleValues();
@@ -49,30 +49,59 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
   }
 
   /**
-   * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to
-   * {@link Capable#FALSE}, in order to present a snapshot of the state of the this column
+   * Copy a {@link ColumnCapabilities} and coerce all {@link ColumnCapabilities.Capable#UNKNOWN} to
+   * {@link ColumnCapabilities.Capable#TRUE} or {@link ColumnCapabilities.Capable#FALSE} as specified by
+   * {@link ColumnCapabilities.CoercionLogic}
    */
   @Nullable
-  public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities)
+  public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, CoercionLogic coerce)
   {
-    return snapshot(capabilities, false);
+    if (capabilities == null) {
+      return null;
+    }
+    ColumnCapabilitiesImpl copy = copyOf(capabilities);
+    copy.dictionaryEncoded = copy.dictionaryEncoded.coerceUnknownToBoolean(coerce.dictionaryEncoded());
+    copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(coerce.dictionaryValuesSorted());
+    copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(coerce.dictionaryValuesUnique());
+    copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(coerce.multipleValues());
+    return copy;
   }
 
   /**
-   * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to
-   * {@link Capable#FALSE} or {@link Capable#TRUE}, in order to present a snapshot of the state of the this column
+   * Snapshots a pair of capabilities and then merges them
    */
   @Nullable
-  public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, boolean unknownIsTrue)
-  {
-    if (capabilities == null) {
-      return null;
+  public static ColumnCapabilitiesImpl merge(
+      @Nullable final ColumnCapabilities capabilities,
+      @Nullable final ColumnCapabilities other,
+      CoercionLogic coercionLogic
+  )
+  {
+    ColumnCapabilitiesImpl merged = snapshot(capabilities, coercionLogic);
+    ColumnCapabilitiesImpl otherSnapshot = snapshot(other, coercionLogic);
+    if (merged == null) {
+      return otherSnapshot;
+    } else if (otherSnapshot == null) {
+      return merged;
     }
-    ColumnCapabilitiesImpl copy = copyOf(capabilities);
-    copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(unknownIsTrue);
-    copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(unknownIsTrue);
-    copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(unknownIsTrue);
-    return copy;
+
+    if (merged.type == null) {
+      merged.type = other.getType();
+    }
+
+    if (!merged.type.equals(otherSnapshot.getType())) {
+      throw new ISE("Cannot merge columns of type[%s] and [%s]", merged.type, otherSnapshot.getType());
+    }
+
+    merged.dictionaryEncoded = merged.dictionaryEncoded.or(otherSnapshot.isDictionaryEncoded());
+    merged.hasMultipleValues = merged.hasMultipleValues.or(otherSnapshot.hasMultipleValues());
+    merged.dictionaryValuesSorted = merged.dictionaryValuesSorted.and(otherSnapshot.areDictionaryValuesSorted());
+    merged.dictionaryValuesUnique = merged.dictionaryValuesUnique.and(otherSnapshot.areDictionaryValuesUnique());
+    merged.hasInvertedIndexes |= otherSnapshot.hasBitmapIndexes();
+    merged.hasSpatialIndexes |= otherSnapshot.hasSpatialIndexes();
+    merged.filterable &= otherSnapshot.isFilterable();
+
+    return merged;
   }
 
 
@@ -93,10 +122,9 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
   @Nullable
   private ValueType type = null;
 
-  private boolean dictionaryEncoded = false;
-  private boolean runLengthEncoded = false;
   private boolean hasInvertedIndexes = false;
   private boolean hasSpatialIndexes = false;
+  private Capable dictionaryEncoded = Capable.UNKNOWN;
   private Capable hasMultipleValues = Capable.UNKNOWN;
 
   // These capabilities are computed at query time and not persisted in the segment files.
@@ -121,15 +149,16 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
   }
 
   @Override
-  @JsonProperty
-  public boolean isDictionaryEncoded()
+  @JsonProperty("dictionaryEncoded")
+  public Capable isDictionaryEncoded()
   {
     return dictionaryEncoded;
   }
 
+  @JsonSetter("dictionaryEncoded")
   public ColumnCapabilitiesImpl setDictionaryEncoded(boolean dictionaryEncoded)
   {
-    this.dictionaryEncoded = dictionaryEncoded;
+    this.dictionaryEncoded = Capable.of(dictionaryEncoded);
     return this;
   }
 
@@ -158,13 +187,6 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
   }
 
   @Override
-  @JsonProperty
-  public boolean isRunLengthEncoded()
-  {
-    return runLengthEncoded;
-  }
-
-  @Override
   @JsonProperty("hasBitmapIndexes")
   public boolean hasBitmapIndexes()
   {
@@ -218,30 +240,4 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
     this.filterable = filterable;
     return this;
   }
-
-  public ColumnCapabilities merge(@Nullable ColumnCapabilities other)
-  {
-    if (other == null) {
-      return this;
-    }
-
-    if (type == null) {
-      type = other.getType();
-    }
-
-    if (!type.equals(other.getType())) {
-      throw new ISE("Cannot merge columns of type[%s] and [%s]", type, other.getType());
-    }
-
-    this.dictionaryEncoded |= other.isDictionaryEncoded();
-    this.runLengthEncoded |= other.isRunLengthEncoded();
-    this.hasInvertedIndexes |= other.hasBitmapIndexes();
-    this.hasSpatialIndexes |= other.hasSpatialIndexes();
-    this.filterable &= other.isFilterable();
-    this.hasMultipleValues = this.hasMultipleValues.or(other.hasMultipleValues());
-    this.dictionaryValuesSorted = this.dictionaryValuesSorted.and(other.areDictionaryValuesSorted());
-    this.dictionaryValuesUnique = this.dictionaryValuesUnique.and(other.areDictionaryValuesUnique());
-
-    return this;
-  }
 }
diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
index 0baa594..f11e7a7 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
@@ -115,7 +115,7 @@ public class ExpressionFilter implements Filter
       // multiple values. The lack of multiple values is important because expression filters treat multi-value
       // arrays as nulls, which doesn't permit index based filtering.
       final String column = Iterables.getOnlyElement(requiredBindings.get());
-      return selector.getBitmapIndex(column) != null && !selector.hasMultipleValues(column).isMaybeTrue();
+      return selector.getBitmapIndex(column) != null && selector.hasMultipleValues(column).isFalse();
     } else {
       // Multi-column expression.
       return false;
diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
index 9cbecba..91d3409 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
@@ -414,7 +414,7 @@ public class Filters
     if (filter.supportsBitmapIndex(indexSelector)) {
       final ColumnHolder columnHolder = columnSelector.getColumnHolder(dimension);
       if (columnHolder != null) {
-        return !columnHolder.getCapabilities().hasMultipleValues().isMaybeTrue();
+        return columnHolder.getCapabilities().hasMultipleValues().isFalse();
       }
     }
     return false;
diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java
index 649aea9..bb4377f 100644
--- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java
+++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java
@@ -59,6 +59,7 @@ import org.apache.druid.segment.DimensionIndexer;
 import org.apache.druid.segment.DimensionSelector;
 import org.apache.druid.segment.DoubleColumnSelector;
 import org.apache.druid.segment.FloatColumnSelector;
+import org.apache.druid.segment.IndexMergerV9;
 import org.apache.druid.segment.LongColumnSelector;
 import org.apache.druid.segment.Metadata;
 import org.apache.druid.segment.NilColumnValueSelector;
@@ -249,7 +250,8 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
 
   private final Map<String, DimensionDesc> dimensionDescs;
   private final List<DimensionDesc> dimensionDescsList;
-  private final Map<String, ColumnCapabilitiesImpl> columnCapabilities;
+  // dimension capabilities are provided by the indexers
+  private final Map<String, ColumnCapabilities> timeAndMetricsColumnCapabilities;
   private final AtomicInteger numEntries = new AtomicInteger();
   private final AtomicLong bytesInMemory = new AtomicLong();
 
@@ -287,7 +289,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
     this.deserializeComplexMetrics = deserializeComplexMetrics;
     this.reportParseExceptions = reportParseExceptions;
 
-    this.columnCapabilities = new HashMap<>();
+    this.timeAndMetricsColumnCapabilities = new HashMap<>();
     this.metadata = new Metadata(
         null,
         getCombiningAggregators(metrics),
@@ -302,7 +304,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
     for (AggregatorFactory metric : metrics) {
       MetricDesc metricDesc = new MetricDesc(metricDescs.size(), metric);
       metricDescs.put(metricDesc.getName(), metricDesc);
-      columnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities());
+      timeAndMetricsColumnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities());
     }
 
     DimensionsSpec dimensionsSpec = incrementalIndexSchema.getDimensionsSpec();
@@ -312,24 +314,22 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
     for (DimensionSchema dimSchema : dimensionsSpec.getDimensions()) {
       ValueType type = TYPE_MAP.get(dimSchema.getValueType());
       String dimName = dimSchema.getName();
-      ColumnCapabilitiesImpl capabilities = makeCapabilitiesFromValueType(type);
+      ColumnCapabilitiesImpl capabilities = makeDefaultCapabilitiesFromValueType(type);
       capabilities.setHasBitmapIndexes(dimSchema.hasBitmapIndex());
 
       if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) {
         capabilities.setHasSpatialIndexes(true);
-      } else {
-        DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(
-            dimName,
-            capabilities,
-            dimSchema.getMultiValueHandling()
-        );
-        addNewDimension(dimName, capabilities, handler);
       }
-      columnCapabilities.put(dimName, capabilities);
+      DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(
+          dimName,
+          capabilities,
+          dimSchema.getMultiValueHandling()
+      );
+      addNewDimension(dimName, handler);
     }
 
     //__time capabilities
-    columnCapabilities.put(
+    timeAndMetricsColumnCapabilities.put(
         ColumnHolder.TIME_COLUMN_NAME,
         ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG)
     );
@@ -589,9 +589,13 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
     return row;
   }
 
-  public Map<String, ColumnCapabilitiesImpl> getColumnCapabilities()
+  public Map<String, ColumnCapabilities> getColumnCapabilities()
   {
-    return columnCapabilities;
+    ImmutableMap.Builder<String, ColumnCapabilities> builder =
+        ImmutableMap.<String, ColumnCapabilities>builder().putAll(timeAndMetricsColumnCapabilities);
+
+    dimensionDescs.forEach((dimension, desc) -> builder.put(dimension, desc.getCapabilities()));
+    return builder.build();
   }
 
   /**
@@ -658,23 +662,22 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
           continue;
         }
         boolean wasNewDim = false;
-        ColumnCapabilitiesImpl capabilities;
         DimensionDesc desc = dimensionDescs.get(dimension);
         if (desc != null) {
-          capabilities = desc.getCapabilities();
           absentDimensions.remove(dimension);
         } else {
           wasNewDim = true;
-          capabilities = columnCapabilities.get(dimension);
-          if (capabilities == null) {
-            // For schemaless type discovery, assume everything is a String for now, can change later.
-            capabilities = makeCapabilitiesFromValueType(ValueType.STRING);
-            columnCapabilities.put(dimension, capabilities);
-          }
-          DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null);
-          desc = addNewDimension(dimension, capabilities, handler);
+          desc = addNewDimension(
+              dimension,
+              DimensionHandlerUtils.getHandlerFromCapabilities(
+                  dimension,
+                  // for schemaless type discovery, everything is a String. this should probably try to autodetect
+                  // based on the value to use a better handler
+                  makeDefaultCapabilitiesFromValueType(ValueType.STRING),
+                  null
+              )
+          );
         }
-        DimensionHandler handler = desc.getHandler();
         DimensionIndexer indexer = desc.getIndexer();
         Object dimsKey = null;
         try {
@@ -684,13 +687,6 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
           parseExceptionMessages.add(pe.getMessage());
         }
         dimsKeySize += indexer.estimateEncodedKeyComponentSize(dimsKey);
-        // Set column capabilities as data is coming in
-        if (!capabilities.hasMultipleValues().isTrue() &&
-            dimsKey != null &&
-            handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) {
-          capabilities.setHasMultipleValues(true);
-        }
-
         if (wasNewDim) {
           // unless this is the first row we are processing, all newly discovered columns will be sparse
           if (maxIngestedEventTime != null) {
@@ -928,7 +924,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
     }
   }
 
-  private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type)
+  private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType type)
   {
     if (type == ValueType.STRING) {
       // we start out as not having multiple values, but this might change as we encounter them
@@ -949,7 +945,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
    */
   public void loadDimensionIterable(
       Iterable<String> oldDimensionOrder,
-      Map<String, ColumnCapabilitiesImpl> oldColumnCapabilities
+      Map<String, ColumnCapabilities> oldColumnCapabilities
   )
   {
     synchronized (dimensionDescs) {
@@ -958,19 +954,21 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
       }
       for (String dim : oldDimensionOrder) {
         if (dimensionDescs.get(dim) == null) {
-          ColumnCapabilitiesImpl capabilities = oldColumnCapabilities.get(dim);
-          columnCapabilities.put(dim, capabilities);
+          ColumnCapabilitiesImpl capabilities = ColumnCapabilitiesImpl.snapshot(
+              oldColumnCapabilities.get(dim),
+              IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC
+          );
           DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null);
-          addNewDimension(dim, capabilities, handler);
+          addNewDimension(dim, handler);
         }
       }
     }
   }
 
   @GuardedBy("dimensionDescs")
-  private DimensionDesc addNewDimension(String dim, ColumnCapabilitiesImpl capabilities, DimensionHandler handler)
+  private DimensionDesc addNewDimension(String dim, DimensionHandler handler)
   {
-    DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, capabilities, handler);
+    DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, handler);
     dimensionDescs.put(dim, desc);
     dimensionDescsList.add(desc);
     return desc;
@@ -998,7 +996,10 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
   @Nullable
   public ColumnCapabilities getCapabilities(String column)
   {
-    return columnCapabilities.get(column);
+    if (dimensionDescs.containsKey(column)) {
+      return dimensionDescs.get(column).getCapabilities();
+    }
+    return timeAndMetricsColumnCapabilities.get(column);
   }
 
   public Metadata getMetadata()
@@ -1080,15 +1081,13 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
   {
     private final int index;
     private final String name;
-    private final ColumnCapabilitiesImpl capabilities;
     private final DimensionHandler handler;
     private final DimensionIndexer indexer;
 
-    public DimensionDesc(int index, String name, ColumnCapabilitiesImpl capabilities, DimensionHandler handler)
+    public DimensionDesc(int index, String name, DimensionHandler handler)
     {
       this.index = index;
       this.name = name;
-      this.capabilities = capabilities;
       this.handler = handler;
       this.indexer = handler.makeIndexer();
     }
@@ -1103,9 +1102,9 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
       return name;
     }
 
-    public ColumnCapabilitiesImpl getCapabilities()
+    public ColumnCapabilities getCapabilities()
     {
-      return capabilities;
+      return indexer.getColumnCapabilities();
     }
 
     public DimensionHandler getHandler()
@@ -1124,7 +1123,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
     private final int index;
     private final String name;
     private final String type;
-    private final ColumnCapabilitiesImpl capabilities;
+    private final ColumnCapabilities capabilities;
 
     public MetricDesc(int index, AggregatorFactory factory)
     {
@@ -1163,7 +1162,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
       return type;
     }
 
-    public ColumnCapabilitiesImpl getCapabilities()
+    public ColumnCapabilities getCapabilities()
     {
       return capabilities;
     }
diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java
index 8e8520d..9beb168 100644
--- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java
+++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java
@@ -52,6 +52,62 @@ import java.util.Iterator;
  */
 public class IncrementalIndexStorageAdapter implements StorageAdapter
 {
+  private static final ColumnCapabilities.CoercionLogic STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC =
+      new ColumnCapabilities.CoercionLogic()
+      {
+        @Override
+        public boolean dictionaryEncoded()
+        {
+          return false;
+        }
+
+        @Override
+        public boolean dictionaryValuesSorted()
+        {
+          return false;
+        }
+
+        @Override
+        public boolean dictionaryValuesUnique()
+        {
+          return true;
+        }
+
+        @Override
+        public boolean multipleValues()
+        {
+          return true;
+        }
+      };
+
+  private static final ColumnCapabilities.CoercionLogic SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC =
+      new ColumnCapabilities.CoercionLogic()
+      {
+        @Override
+        public boolean dictionaryEncoded()
+        {
+          return true;
+        }
+
+        @Override
+        public boolean dictionaryValuesSorted()
+        {
+          return true;
+        }
+
+        @Override
+        public boolean dictionaryValuesUnique()
+        {
+          return true;
+        }
+
+        @Override
+        public boolean multipleValues()
+        {
+          return false;
+        }
+      };
+
   final IncrementalIndex<?> index;
 
   public IncrementalIndexStorageAdapter(IncrementalIndex<?> index)
@@ -154,7 +210,7 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter
     // to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of
     // multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could
     // be removed.
-    return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true);
+    return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC);
   }
 
   /**
@@ -165,7 +221,10 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter
    */
   public ColumnCapabilities getSnapshotColumnCapabilities(String column)
   {
-    return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column));
+    return ColumnCapabilitiesImpl.snapshot(
+        index.getCapabilities(column),
+        SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC
+    );
   }
 
   @Override
diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java
index 269ac38..48c56c9 100644
--- a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java
+++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java
@@ -83,9 +83,9 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe
         spec -> {
           final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
           if (holder == null
-              || !holder.getCapabilities().isDictionaryEncoded()
+              || holder.getCapabilities().isDictionaryEncoded().isFalse()
               || holder.getCapabilities().getType() != ValueType.STRING
-              || !holder.getCapabilities().hasMultipleValues().isMaybeTrue()) {
+              || holder.getCapabilities().hasMultipleValues().isFalse()) {
             throw new ISE(
                 "Column[%s] is not a multi-value string column, do not ask for a multi-value selector",
                 spec.getDimension()
@@ -119,7 +119,7 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe
         spec -> {
           final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
           if (holder == null
-              || !holder.getCapabilities().isDictionaryEncoded()
+              || !holder.getCapabilities().isDictionaryEncoded().isTrue()
               || holder.getCapabilities().getType() != ValueType.STRING) {
             // Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls.
             return NilVectorSelector.create(offset);
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
index 5ab4e46..17c6a3f 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
@@ -153,8 +153,8 @@ public class ExpressionSelectors
         );
       } else if (capabilities != null
                  && capabilities.getType() == ValueType.STRING
-                 && capabilities.isDictionaryEncoded()
-                 && !capabilities.hasMultipleValues().isMaybeTrue()
+                 && capabilities.isDictionaryEncoded().isTrue()
+                 && capabilities.hasMultipleValues().isFalse()
                  && exprDetails.getArrayBindings().isEmpty()) {
         // Optimization for expressions that hit one scalar string column and nothing else.
         return new SingleStringInputCachingExpressionColumnValueSelector(
@@ -225,7 +225,7 @@ public class ExpressionSelectors
       // not treating it as an array and not wanting to output an array
       if (capabilities != null
           && capabilities.getType() == ValueType.STRING
-          && capabilities.isDictionaryEncoded()
+          && capabilities.isDictionaryEncoded().isTrue()
           && !capabilities.hasMultipleValues().isUnknown()
           && !exprDetails.hasInputArrays()
           && !exprDetails.isOutputArray()
diff --git a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java
index 3ca72aa..96167fd 100644
--- a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java
+++ b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java
@@ -138,7 +138,7 @@ public class LookupSegmentTest
     // reporting complete single-valued capabilities. It would be good to change this in the future, so query engines
     // running on top of lookups can take advantage of singly-valued optimizations.
     Assert.assertTrue(capabilities.hasMultipleValues().isUnknown());
-    Assert.assertFalse(capabilities.isDictionaryEncoded());
+    Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue());
   }
 
   @Test
@@ -151,7 +151,7 @@ public class LookupSegmentTest
     // running on top of lookups can take advantage of singly-valued optimizations.
     Assert.assertEquals(ValueType.STRING, capabilities.getType());
     Assert.assertTrue(capabilities.hasMultipleValues().isUnknown());
-    Assert.assertFalse(capabilities.isDictionaryEncoded());
+    Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue());
   }
 
   @Test
diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java
index b253614..86e8503 100644
--- a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java
+++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java
@@ -45,7 +45,6 @@ import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
 import org.apache.druid.query.aggregation.AggregatorFactory;
 import org.apache.druid.query.aggregation.CountAggregatorFactory;
 import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
-import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
 import org.apache.druid.segment.column.ColumnHolder;
 import org.apache.druid.segment.column.DictionaryEncodedColumn;
 import org.apache.druid.segment.column.StringDictionaryEncodedColumn;
@@ -2012,32 +2011,6 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest
     Assert.assertEquals(-1, dictIdSeeker.seek(5));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testCloser() throws Exception
-  {
-    final long timestamp = System.currentTimeMillis();
-    IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
-    IncrementalIndexTest.populateIndex(timestamp, toPersist);
-    ColumnCapabilitiesImpl capabilities = (ColumnCapabilitiesImpl) toPersist.getCapabilities("dim1");
-    capabilities.setHasSpatialIndexes(true);
-
-    final File tempDir = temporaryFolder.newFolder();
-    final File v8TmpDir = new File(tempDir, "v8-tmp");
-    final File v9TmpDir = new File(tempDir, "v9-tmp");
-
-    try {
-      indexMerger.persist(toPersist, tempDir, indexSpec, null);
-    }
-    finally {
-      if (v8TmpDir.exists()) {
-        Assert.fail("v8-tmp dir not clean.");
-      }
-      if (v9TmpDir.exists()) {
-        Assert.fail("v9-tmp dir not clean.");
-      }
-    }
-  }
-
   @Test
   public void testMultiValueHandling() throws Exception
   {
diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java
index c7783e9..57689a4 100644
--- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java
@@ -150,20 +150,25 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
     ColumnCapabilities caps = INC_INDEX.getCapabilities("d1");
     Assert.assertEquals(ValueType.STRING, caps.getType());
     Assert.assertTrue(caps.hasBitmapIndexes());
-    Assert.assertTrue(caps.isDictionaryEncoded());
+    Assert.assertTrue(caps.isDictionaryEncoded().isMaybeTrue());
+    Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
     // multi-value is unknown unless explicitly set to 'true'
     Assert.assertTrue(caps.hasMultipleValues().isUnknown());
     // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state,
     // coercing any 'UNKNOWN' values to false
-    Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue());
+    Assert.assertFalse(
+        ColumnCapabilitiesImpl.snapshot(caps, IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC)
+                              .hasMultipleValues()
+                              .isMaybeTrue()
+    );
     Assert.assertFalse(caps.hasSpatialIndexes());
 
     caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities();
     Assert.assertEquals(ValueType.STRING, caps.getType());
     Assert.assertTrue(caps.hasBitmapIndexes());
-    Assert.assertTrue(caps.isDictionaryEncoded());
+    Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
     Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
     Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@@ -176,7 +181,7 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
     ColumnCapabilities caps = INC_INDEX.getCapabilities("d2");
     Assert.assertEquals(ValueType.STRING, caps.getType());
     Assert.assertTrue(caps.hasBitmapIndexes());
-    Assert.assertTrue(caps.isDictionaryEncoded());
+    Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
     Assert.assertTrue(caps.hasMultipleValues().isTrue());
@@ -185,7 +190,7 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
     caps = MMAP_INDEX.getColumnHolder("d2").getCapabilities();
     Assert.assertEquals(ValueType.STRING, caps.getType());
     Assert.assertTrue(caps.hasBitmapIndexes());
-    Assert.assertTrue(caps.isDictionaryEncoded());
+    Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
     Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
     Assert.assertTrue(caps.hasMultipleValues().isTrue());
@@ -199,12 +204,11 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
     assertNonStringColumnCapabilities(MMAP_INDEX.getColumnHolder("m4").getCapabilities(), ValueType.COMPLEX);
   }
 
-
   private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueType valueType)
   {
     Assert.assertEquals(valueType, caps.getType());
     Assert.assertFalse(caps.hasBitmapIndexes());
-    Assert.assertFalse(caps.isDictionaryEncoded());
+    Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
     Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
diff --git a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java
index e12dac4..a802b81 100644
--- a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java
@@ -51,7 +51,7 @@ public class RowBasedColumnSelectorFactoryTest
         RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, ColumnHolder.TIME_COLUMN_NAME);
     Assert.assertEquals(ValueType.LONG, caps.getType());
     Assert.assertFalse(caps.hasBitmapIndexes());
-    Assert.assertFalse(caps.isDictionaryEncoded());
+    Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
     Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@@ -65,7 +65,7 @@ public class RowBasedColumnSelectorFactoryTest
         RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, STRING_COLUMN_NAME);
     Assert.assertEquals(ValueType.STRING, caps.getType());
     Assert.assertFalse(caps.hasBitmapIndexes());
-    Assert.assertFalse(caps.isDictionaryEncoded());
+    Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
     Assert.assertTrue(caps.hasMultipleValues().isUnknown());
@@ -79,7 +79,7 @@ public class RowBasedColumnSelectorFactoryTest
         RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, LONG_COLUMN_NAME);
     Assert.assertEquals(ValueType.LONG, caps.getType());
     Assert.assertFalse(caps.hasBitmapIndexes());
-    Assert.assertFalse(caps.isDictionaryEncoded());
+    Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
     Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@@ -93,7 +93,7 @@ public class RowBasedColumnSelectorFactoryTest
         RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, FLOAT_COLUMN_NAME);
     Assert.assertEquals(ValueType.FLOAT, caps.getType());
     Assert.assertFalse(caps.hasBitmapIndexes());
-    Assert.assertFalse(caps.isDictionaryEncoded());
+    Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
     Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@@ -107,7 +107,7 @@ public class RowBasedColumnSelectorFactoryTest
         RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, DOUBLE_COLUMN_NAME);
     Assert.assertEquals(ValueType.DOUBLE, caps.getType());
     Assert.assertFalse(caps.hasBitmapIndexes());
-    Assert.assertFalse(caps.isDictionaryEncoded());
+    Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
     Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@@ -121,7 +121,7 @@ public class RowBasedColumnSelectorFactoryTest
         RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, COMPLEX_COLUMN_NAME);
     Assert.assertEquals(ValueType.COMPLEX, caps.getType());
     Assert.assertFalse(caps.hasBitmapIndexes());
-    Assert.assertFalse(caps.isDictionaryEncoded());
+    Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
     Assert.assertTrue(caps.hasMultipleValues().isUnknown());
diff --git a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java
index e221edd..ce98506 100644
--- a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java
@@ -44,8 +44,7 @@ public class ColumnCapabilitiesImplTest
     ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class);
 
     Assert.assertEquals(ValueType.COMPLEX, cc.getType());
-    Assert.assertTrue(cc.isDictionaryEncoded());
-    Assert.assertFalse(cc.isRunLengthEncoded());
+    Assert.assertTrue(cc.isDictionaryEncoded().isTrue());
     Assert.assertTrue(cc.hasSpatialIndexes());
     Assert.assertTrue(cc.hasMultipleValues().isTrue());
     Assert.assertTrue(cc.hasBitmapIndexes());
@@ -69,8 +68,7 @@ public class ColumnCapabilitiesImplTest
     ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class);
 
     Assert.assertEquals(ValueType.COMPLEX, cc.getType());
-    Assert.assertTrue(cc.isDictionaryEncoded());
-    Assert.assertTrue(cc.isRunLengthEncoded());
+    Assert.assertTrue(cc.isDictionaryEncoded().isTrue());
     Assert.assertTrue(cc.hasSpatialIndexes());
     Assert.assertTrue(cc.hasMultipleValues().isTrue());
     Assert.assertTrue(cc.hasBitmapIndexes());
diff --git a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java
index 7b80bd2..6406d7a 100644
--- a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java
@@ -200,7 +200,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag
 
     Assert.assertEquals(ValueType.STRING, capabilities.getType());
     Assert.assertTrue(capabilities.hasBitmapIndexes());
-    Assert.assertTrue(capabilities.isDictionaryEncoded());
+    Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue());
     Assert.assertTrue(capabilities.areDictionaryValuesSorted().isTrue());
     Assert.assertTrue(capabilities.areDictionaryValuesUnique().isTrue());
   }
@@ -216,7 +216,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag
     Assert.assertFalse(capabilities.hasBitmapIndexes());
     Assert.assertFalse(capabilities.areDictionaryValuesUnique().isTrue());
     Assert.assertFalse(capabilities.areDictionaryValuesSorted().isTrue());
-    Assert.assertTrue(capabilities.isDictionaryEncoded());
+    Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue());
   }
 
   @Test
diff --git a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java
index c75232c..61b5637 100644
--- a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java
@@ -137,7 +137,7 @@ public class IndexedTableJoinableTest
   {
     final ColumnCapabilities capabilities = target.getColumnCapabilities("str");
     Assert.assertEquals(ValueType.STRING, capabilities.getType());
-    Assert.assertTrue(capabilities.isDictionaryEncoded());
+    Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue());
     Assert.assertFalse(capabilities.hasBitmapIndexes());
     Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(capabilities.hasSpatialIndexes());
@@ -148,7 +148,7 @@ public class IndexedTableJoinableTest
   {
     final ColumnCapabilities capabilities = target.getColumnCapabilities("long");
     Assert.assertEquals(ValueType.LONG, capabilities.getType());
-    Assert.assertFalse(capabilities.isDictionaryEncoded());
+    Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue());
     Assert.assertFalse(capabilities.hasBitmapIndexes());
     Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(capabilities.hasSpatialIndexes());
diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java
similarity index 67%
rename from processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java
rename to processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java
index 0b7a66d..08b18c4 100644
--- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java
@@ -21,20 +21,41 @@ package org.apache.druid.segment.virtual;
 
 import com.google.common.base.Supplier;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.common.guava.SettableSupplier;
+import org.apache.druid.data.input.MapBasedInputRow;
+import org.apache.druid.data.input.impl.DimensionsSpec;
+import org.apache.druid.data.input.impl.TimestampSpec;
+import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.java.util.common.granularity.Granularities;
+import org.apache.druid.java.util.common.guava.Sequence;
 import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExprMacroTable;
+import org.apache.druid.math.expr.Parser;
+import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.query.aggregation.CountAggregatorFactory;
 import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
 import org.apache.druid.segment.BaseSingleValueDimensionSelector;
 import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.Cursor;
 import org.apache.druid.segment.DimensionSelector;
 import org.apache.druid.segment.TestObjectColumnSelector;
+import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.incremental.IncrementalIndex;
+import org.apache.druid.segment.incremental.IncrementalIndexSchema;
+import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
+import org.apache.druid.segment.incremental.IndexSizeExceededException;
+import org.apache.druid.testing.InitializedNullHandlingTest;
 import org.junit.Assert;
 import org.junit.Test;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
-public class ExpressionColumnValueSelectorTest
+public class ExpressionSelectorsTest extends InitializedNullHandlingTest
 {
   @Test
   public void testSupplierFromDimensionSelector()
@@ -231,6 +252,86 @@ public class ExpressionColumnValueSelectorTest
     );
   }
 
+  @Test
+  public void testIncrementIndexStringSelector() throws IndexSizeExceededException
+  {
+    // This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
+    // DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
+    // exception trying to make an optimized string expression selector that was not appropriate to use for the
+    // underlying dimension selector.
+    // This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the
+    // conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details
+    IncrementalIndexSchema schema = new IncrementalIndexSchema(
+        0,
+        new TimestampSpec("time", "millis", DateTimes.nowUtc()),
+        Granularities.NONE,
+        VirtualColumns.EMPTY,
+        DimensionsSpec.EMPTY,
+        new AggregatorFactory[]{new CountAggregatorFactory("count")},
+        true
+    );
+
+    IncrementalIndex index = new IncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).buildOnheap();
+    index.add(
+        new MapBasedInputRow(
+            DateTimes.nowUtc().getMillis(),
+            ImmutableList.of("x"),
+            ImmutableMap.of("x", "foo")
+        )
+    );
+    index.add(
+        new MapBasedInputRow(
+            DateTimes.nowUtc().plusMillis(1000).getMillis(),
+            ImmutableList.of("y"),
+            ImmutableMap.of("y", "foo")
+        )
+    );
+
+    IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index);
+
+    Sequence<Cursor> cursors = adapter.makeCursors(
+        null,
+        Intervals.ETERNITY,
+        VirtualColumns.EMPTY,
+        Granularities.ALL,
+        false,
+        null
+    );
+    int rowsProcessed = cursors.map(cursor -> {
+      DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector(
+          cursor.getColumnSelectorFactory(),
+          Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()),
+          null
+      );
+      DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector(
+          cursor.getColumnSelectorFactory(),
+          Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()),
+          null
+      );
+      int rowCount = 0;
+      while (!cursor.isDone()) {
+        Object x = xExprSelector.getObject();
+        Object y = yExprSelector.getObject();
+        List<String> expectedFoo = Collections.singletonList("foofoo");
+        List<String> expectedNull = NullHandling.replaceWithDefault()
+                                    ? Collections.singletonList("foo")
+                                    : Collections.singletonList(null);
+        if (rowCount == 0) {
+          Assert.assertEquals(expectedFoo, x);
+          Assert.assertEquals(expectedNull, y);
+        } else {
+          Assert.assertEquals(expectedNull, x);
+          Assert.assertEquals(expectedFoo, y);
+        }
+        rowCount++;
+        cursor.advance();
+      }
+      return rowCount;
+    }).accumulate(0, (in, acc) -> in + acc);
+
+    Assert.assertEquals(2, rowsProcessed);
+  }
+
   private static DimensionSelector dimensionSelectorFromSupplier(
       final Supplier<String> supplier
   )
diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
index 16e090d..84bf5fd 100644
--- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
@@ -810,7 +810,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
     ColumnCapabilities caps = X_PLUS_Y.capabilities("expr");
     Assert.assertEquals(ValueType.FLOAT, caps.getType());
     Assert.assertFalse(caps.hasBitmapIndexes());
-    Assert.assertFalse(caps.isDictionaryEncoded());
+    Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
     Assert.assertTrue(caps.hasMultipleValues().isUnknown());
@@ -820,7 +820,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
     caps = Z_CONCAT_X.capabilities("expr");
     Assert.assertEquals(ValueType.STRING, caps.getType());
     Assert.assertFalse(caps.hasBitmapIndexes());
-    Assert.assertFalse(caps.isDictionaryEncoded());
+    Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
     Assert.assertTrue(caps.hasMultipleValues().isUnknown());
diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java
index 7002ac2..2324a2a 100644
--- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java
+++ b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java
@@ -29,7 +29,7 @@ import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.query.aggregation.AggregatorFactory;
 import org.apache.druid.segment.QueryableIndex;
 import org.apache.druid.segment.ReferenceCountingSegment;
-import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.incremental.IncrementalIndex;
 import org.apache.druid.segment.incremental.IncrementalIndexAddResult;
 import org.apache.druid.segment.incremental.IncrementalIndexSchema;
@@ -377,7 +377,7 @@ public class Sink implements Iterable<FireHydrant>, Overshadowable<Sink>
           FireHydrant lastHydrant = hydrants.get(numHydrants - 1);
           newCount = lastHydrant.getCount() + 1;
           if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) {
-            Map<String, ColumnCapabilitiesImpl> oldCapabilities;
+            Map<String, ColumnCapabilities> oldCapabilities;
             if (lastHydrant.hasSwapped()) {
               oldCapabilities = new HashMap<>();
               ReferenceCountingSegment segment = lastHydrant.getIncrementedSegment();
@@ -385,7 +385,7 @@ public class Sink implements Iterable<FireHydrant>, Overshadowable<Sink>
                 QueryableIndex oldIndex = segment.asQueryableIndex();
                 for (String dim : oldIndex.getAvailableDimensions()) {
                   dimOrder.add(dim);
-                  oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumnHolder(dim).getCapabilities());
+                  oldCapabilities.put(dim, oldIndex.getColumnHolder(dim).getCapabilities());
                 }
               }
               finally {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org