You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by hi...@apache.org on 2019/05/21 00:04:41 UTC

[incubator-druid] branch master updated: Virtual column updates for exploiting base column internal structure (#7618)

This is an automated email from the ASF dual-hosted git repository.

himanshug pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git


The following commit(s) were added to refs/heads/master by this push:
     new fb0c846  Virtual column updates for exploiting base column internal structure (#7618)
fb0c846 is described below

commit fb0c84694143cda3e66d59e3d706f38f3fd5d1cb
Author: Himanshu <g....@gmail.com>
AuthorDate: Mon May 20 17:04:35 2019 -0700

    Virtual column updates for exploiting base column internal structure (#7618)
    
    * VirtualColumn updates for exploiting base column internal structure
    
    * unit tests for virtual column interface updates
    
    * groupBy needs to use VirtualizedColumnSelectorFactory if outer query in
    nested groupBy has virtual columns.
    
    * fix strict compile checks
    
    * fix teamcity build errors
    
    * add comment explaining useVirtualizedColumnSelectorFactory flag in RowBasedGrouperHelper.createGrouperAccumulatorPair(..)
---
 .../epinephelinae/GroupByMergingQueryRunnerV2.java |   3 +-
 .../groupby/epinephelinae/GroupByRowProcessor.java |   6 +-
 .../epinephelinae/RowBasedGrouperHelper.java       |  29 +-
 .../query/groupby/strategy/GroupByStrategyV2.java  |   4 +-
 .../segment/ColumnSelectorBitmapIndexSelector.java |  55 ++-
 .../QueryableIndexColumnSelectorFactory.java       |  14 +-
 .../org/apache/druid/segment/VirtualColumn.java    |  55 ++-
 .../org/apache/druid/segment/VirtualColumns.java   |  32 ++
 .../query/groupby/GroupByQueryRunnerTest.java      |   6 +-
 .../segment/virtual/DummyStringVirtualColumn.java  | 289 +++++++++++++++
 .../virtual/DummyStringVirtualColumnTest.java      | 395 +++++++++++++++++++++
 11 files changed, 865 insertions(+), 23 deletions(-)

diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByMergingQueryRunnerV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByMergingQueryRunnerV2.java
index b8cc735..ef8c304 100644
--- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByMergingQueryRunnerV2.java
+++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByMergingQueryRunnerV2.java
@@ -201,7 +201,8 @@ public class GroupByMergingQueryRunnerV2 implements QueryRunner<Row>
                       priority,
                       hasTimeout,
                       timeoutAt,
-                      mergeBufferSize
+                      mergeBufferSize,
+                      false
                   );
               final Grouper<RowBasedKey> grouper = pair.lhs;
               final Accumulator<AggregateResult, Row> accumulator = pair.rhs;
diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByRowProcessor.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByRowProcessor.java
index 171b8ad..2fa3b57 100644
--- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByRowProcessor.java
+++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByRowProcessor.java
@@ -65,7 +65,8 @@ public class GroupByRowProcessor
       final String processingTmpDir,
       final int mergeBufferSize,
       final List<Closeable> closeOnExit,
-      final boolean wasQueryPushedDown
+      final boolean wasQueryPushedDown,
+      final boolean useVirtualizedColumnSelectorFactory
   )
   {
     final GroupByQuery query = (GroupByQuery) queryParam;
@@ -112,7 +113,8 @@ public class GroupByRowProcessor
         temporaryStorage,
         spillMapper,
         aggregatorFactories,
-        mergeBufferSize
+        mergeBufferSize,
+        useVirtualizedColumnSelectorFactory
     );
     final Grouper<RowBasedKey> grouper = pair.lhs;
     final Accumulator<AggregateResult, Row> accumulator = pair.rhs;
diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
index 0b82472..bd41d08 100644
--- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
+++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java
@@ -105,7 +105,8 @@ public class RowBasedGrouperHelper
       final LimitedTemporaryStorage temporaryStorage,
       final ObjectMapper spillMapper,
       final AggregatorFactory[] aggregatorFactories,
-      final int mergeBufferSize
+      final int mergeBufferSize,
+      final boolean useVirtualizedColumnSelectorFactory
   )
   {
     return createGrouperAccumulatorPair(
@@ -123,7 +124,8 @@ public class RowBasedGrouperHelper
         UNKNOWN_THREAD_PRIORITY,
         false,
         UNKNOWN_TIMEOUT,
-        mergeBufferSize
+        mergeBufferSize,
+        useVirtualizedColumnSelectorFactory
     );
   }
 
@@ -147,7 +149,8 @@ public class RowBasedGrouperHelper
       final int priority,
       final boolean hasQueryTimeout,
       final long queryTimeoutAt,
-      final int mergeBufferSize
+      final int mergeBufferSize,
+      final boolean useVirtualizedColumnSelectorFactory
   )
   {
     // concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
@@ -159,13 +162,23 @@ public class RowBasedGrouperHelper
     final boolean includeTimestamp = GroupByStrategyV2.getUniversalTimestamp(query) == null;
 
     final ThreadLocal<Row> columnSelectorRow = new ThreadLocal<>();
-    final ColumnSelectorFactory columnSelectorFactory = query.getVirtualColumns().wrap(
-        RowBasedColumnSelectorFactory.create(
-            columnSelectorRow,
-            rawInputRowSignature
-        )
+
+    ColumnSelectorFactory columnSelectorFactory = RowBasedColumnSelectorFactory.create(
+        columnSelectorRow,
+        rawInputRowSignature
     );
 
+    // Although queries would work fine if we always wrap the columnSelectorFactory into a
+    // VirtualizedColumnSelectorFactory. However, VirtualizedColumnSelectorFactory is incapable of using
+    // ColumnSelector based variants of makeXXX methods which are more efficient.
+    // this flag is set to true when it is essential to wrap e.g. a nested groupBy query with virtual columns in
+    // the outer query. Without this flag, groupBy query processing would never use more efficient ColumnSelector
+    // based methods in VirtualColumn interface.
+    // For more details, See https://github.com/apache/incubator-druid/issues/7574
+    if (useVirtualizedColumnSelectorFactory) {
+      columnSelectorFactory = query.getVirtualColumns().wrap(columnSelectorFactory);
+    }
+
     final boolean willApplyLimitPushDown = query.isApplyLimitPushDown();
     final DefaultLimitSpec limitSpec = willApplyLimitPushDown ? (DefaultLimitSpec) query.getLimitSpec() : null;
     boolean sortHasNonGroupingFields = false;
diff --git a/processing/src/main/java/org/apache/druid/query/groupby/strategy/GroupByStrategyV2.java b/processing/src/main/java/org/apache/druid/query/groupby/strategy/GroupByStrategyV2.java
index 434a828..7a5f6e8 100644
--- a/processing/src/main/java/org/apache/druid/query/groupby/strategy/GroupByStrategyV2.java
+++ b/processing/src/main/java/org/apache/druid/query/groupby/strategy/GroupByStrategyV2.java
@@ -351,7 +351,8 @@ public class GroupByStrategyV2 implements GroupByStrategy
               processingConfig.getTmpDir(),
               processingConfig.intermediateComputeSizeBytes(),
               closeOnExit,
-              wasQueryPushedDown
+              wasQueryPushedDown,
+              true
           )
       );
 
@@ -414,6 +415,7 @@ public class GroupByStrategyV2 implements GroupByStrategy
               processingConfig.getTmpDir(),
               processingConfig.intermediateComputeSizeBytes(),
               closeOnExit,
+              false,
               false
           )
       );
diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java
index 596a9f3..b9263d1 100644
--- a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java
+++ b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java
@@ -61,8 +61,49 @@ public class ColumnSelectorBitmapIndexSelector implements BitmapIndexSelector
   public CloseableIndexed<String> getDimensionValues(String dimension)
   {
     if (isVirtualColumn(dimension)) {
-      // Virtual columns don't have dictionaries or indexes.
-      return null;
+      BitmapIndex bitmapIndex = virtualColumns.getBitmapIndex(dimension, index);
+      if (bitmapIndex == null) {
+        return null;
+      }
+
+      return new CloseableIndexed<String>()
+      {
+        @Override
+        public int size()
+        {
+          return bitmapIndex.getCardinality();
+        }
+
+        @Override
+        public String get(int index)
+        {
+          return bitmapIndex.getValue(index);
+        }
+
+        @Override
+        public int indexOf(String value)
+        {
+          return bitmapIndex.getIndex(value);
+        }
+
+        @Override
+        public Iterator<String> iterator()
+        {
+          return IndexedIterable.create(this).iterator();
+        }
+
+        @Override
+        public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+        {
+          inspector.visit("column", bitmapIndex);
+        }
+
+        @Override
+        public void close() throws IOException
+        {
+
+        }
+      };
     }
 
     final ColumnHolder columnHolder = index.getColumnHolder(dimension);
@@ -144,8 +185,7 @@ public class ColumnSelectorBitmapIndexSelector implements BitmapIndexSelector
   public BitmapIndex getBitmapIndex(String dimension)
   {
     if (isVirtualColumn(dimension)) {
-      // Virtual columns don't have dictionaries or indexes.
-      return null;
+      return virtualColumns.getBitmapIndex(dimension, index);
     }
 
     final ColumnHolder columnHolder = index.getColumnHolder(dimension);
@@ -214,8 +254,11 @@ public class ColumnSelectorBitmapIndexSelector implements BitmapIndexSelector
   public ImmutableBitmap getBitmapIndex(String dimension, String value)
   {
     if (isVirtualColumn(dimension)) {
-      // Virtual columns don't have dictionaries or indexes.
-      return null;
+      BitmapIndex idx = virtualColumns.getBitmapIndex(dimension, index);
+      if (idx == null) {
+        return null;
+      }
+      return idx.getBitmap(idx.getIndex(value));
     }
 
     final ColumnHolder columnHolder = index.getColumnHolder(dimension);
diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexColumnSelectorFactory.java
index ec5ee1d..0794b06 100644
--- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexColumnSelectorFactory.java
+++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexColumnSelectorFactory.java
@@ -80,7 +80,12 @@ class QueryableIndexColumnSelectorFactory implements ColumnSelectorFactory
         dimensionSpec,
         spec -> {
           if (virtualColumns.exists(spec.getDimension())) {
-            return virtualColumns.makeDimensionSelector(spec, this);
+            DimensionSelector dimensionSelector = virtualColumns.makeDimensionSelector(dimensionSpec, index, offset);
+            if (dimensionSelector == null) {
+              return virtualColumns.makeDimensionSelector(dimensionSpec, this);
+            } else {
+              return dimensionSelector;
+            }
           }
 
           return spec.decorate(makeDimensionSelectorUndecorated(spec));
@@ -123,7 +128,12 @@ class QueryableIndexColumnSelectorFactory implements ColumnSelectorFactory
         columnName,
         name -> {
           if (virtualColumns.exists(columnName)) {
-            return virtualColumns.makeColumnValueSelector(columnName, this);
+            ColumnValueSelector<?> selector = virtualColumns.makeColumnValueSelector(columnName, index, offset);
+            if (selector == null) {
+              return virtualColumns.makeColumnValueSelector(columnName, this);
+            } else {
+              return selector;
+            }
           }
 
           BaseColumn column = getCachedColumn(columnName, BaseColumn.class);
diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
index 7382b2e..217f72f 100644
--- a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
@@ -23,13 +23,16 @@ import com.fasterxml.jackson.annotation.JsonSubTypes;
 import com.fasterxml.jackson.annotation.JsonTypeInfo;
 import org.apache.druid.java.util.common.Cacheable;
 import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.segment.column.BitmapIndex;
 import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.data.ReadableOffset;
 import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
 
+import javax.annotation.Nullable;
 import java.util.List;
 
 /**
- * Virtual columns are "views" created over a ColumnSelectorFactory. They can potentially draw from multiple
+ * Virtual columns are "views" created over a ColumnSelectorFactory or ColumnSelector. They can potentially draw from multiple
  * underlying columns, although they always present themselves as if they were a single column.
  *
  * A virtual column object will be shared amongst threads and must be thread safe. The selectors returned
@@ -62,6 +65,25 @@ public interface VirtualColumn extends Cacheable
   DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory factory);
 
   /**
+   * Returns similar DimensionSelector object as returned by {@link #makeDimensionSelector(DimensionSpec, ColumnSelectorFactory)}
+   * except this method has full access to underlying column and can potentially provide a more efficient implementation.
+   *
+   * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called in
+   * query paths on top of IncrementalIndex which doesn't have columns as in persisted segments.
+   *
+   * @param dimensionSpec
+   * @param columnSelector
+   * @param offset
+   * @return the selector
+   */
+  @SuppressWarnings("unused")
+  @Nullable
+  default DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, ColumnSelector columnSelector, ReadableOffset offset)
+  {
+    return null;
+  }
+
+  /**
    * Build a selector corresponding to this virtual column. Also provides the name that the
    * virtual column was referenced with, which is useful if this column uses dot notation.
    *
@@ -72,6 +94,24 @@ public interface VirtualColumn extends Cacheable
    */
   ColumnValueSelector<?> makeColumnValueSelector(String columnName, ColumnSelectorFactory factory);
 
+  /**
+   * Returns similar ColumnValueSelector object as returned by {@link #makeColumnValueSelector(String, ColumnSelectorFactory)}
+   * except this method has full access to underlying column and can potentially provide a more efficient implementation.
+   *
+   * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called in
+   * query paths on top of IncrementalIndex which doesn't have columns as in persisted segments.
+   *
+   * @param columnName
+   * @param columnSelector
+   * @param offset
+   * @return the selector
+   */
+  @SuppressWarnings("unused")
+  @Nullable
+  default ColumnValueSelector<?> makeColumnValueSelector(String columnName, ColumnSelector columnSelector, ReadableOffset offset)
+  {
+    return null;
+  }
 
   /**
    * Returns the capabilities of this virtual column, which includes a type that corresponds to the best
@@ -107,4 +147,17 @@ public interface VirtualColumn extends Cacheable
    * @return whether to use dot notation
    */
   boolean usesDotNotation();
+
+  /**
+   * Returns the BitmapIndex for efficient filtering on columns that support it. This method is only used if
+   * {@link ColumnCapabilities} returned from {@link #capabilities(String)} has flag for BitmapIndex support.
+   * @param columnName
+   * @param selector
+   * @return BitmapIndex
+   */
+  @SuppressWarnings("unused")
+  default BitmapIndex getBitmapIndex(String columnName, ColumnSelector selector)
+  {
+    throw new UnsupportedOperationException("not supported");
+  }
 }
diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
index ce4dcaf..1d0d5bf 100644
--- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
+++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
@@ -31,8 +31,10 @@ import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.java.util.common.Pair;
 import org.apache.druid.query.cache.CacheKeyBuilder;
 import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.segment.column.BitmapIndex;
 import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.data.ReadableOffset;
 import org.apache.druid.segment.virtual.VirtualizedColumnSelectorFactory;
 
 import javax.annotation.Nullable;
@@ -171,6 +173,36 @@ public class VirtualColumns implements Cacheable
     }
   }
 
+  public BitmapIndex getBitmapIndex(String columnName, ColumnSelector columnSelector)
+  {
+    final VirtualColumn virtualColumn = getVirtualColumn(columnName);
+    if (virtualColumn == null) {
+      throw new IAE("No such virtual column[%s]", columnName);
+    } else {
+      return virtualColumn.capabilities(columnName).hasBitmapIndexes() ? virtualColumn.getBitmapIndex(columnName, columnSelector) : null;
+    }
+  }
+
+  public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, ColumnSelector columnSelector, ReadableOffset offset)
+  {
+    final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension());
+    if (virtualColumn == null) {
+      throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension());
+    } else {
+      return virtualColumn.makeDimensionSelector(dimensionSpec, columnSelector, offset);
+    }
+  }
+
+  public ColumnValueSelector<?> makeColumnValueSelector(String columnName, ColumnSelector columnSelector, ReadableOffset offset)
+  {
+    final VirtualColumn virtualColumn = getVirtualColumn(columnName);
+    if (virtualColumn == null) {
+      throw new IAE("No such virtual column[%s]", columnName);
+    } else {
+      return virtualColumn.makeColumnValueSelector(columnName, columnSelector, offset);
+    }
+  }
+
   /**
    * Create a column value selector.
    *
diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java
index cc25be4..36b7994 100644
--- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java
+++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java
@@ -6316,9 +6316,11 @@ public class GroupByQueryRunnerTest
         .builder()
         .setDataSource(QueryRunnerTestHelper.dataSource)
         .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
+        .setVirtualColumns(new ExpressionVirtualColumn("alias", "quality", ValueType.STRING, TestExprMacroTable.INSTANCE))
         .setDimensions(Lists.newArrayList(
-            new DefaultDimensionSpec("quality", "alias"),
-            new DefaultDimensionSpec("market", "market")
+            new DefaultDimensionSpec("quality", "quality"),
+            new DefaultDimensionSpec("market", "market"),
+            new DefaultDimensionSpec("alias", "alias")
         ))
         .setAggregatorSpecs(
             Arrays.asList(
diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/DummyStringVirtualColumn.java b/processing/src/test/java/org/apache/druid/segment/virtual/DummyStringVirtualColumn.java
new file mode 100644
index 0000000..b532f78
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/virtual/DummyStringVirtualColumn.java
@@ -0,0 +1,289 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.virtual;
+
+import com.google.common.base.Predicate;
+import org.apache.druid.java.util.common.IAE;
+import org.apache.druid.query.dimension.DefaultDimensionSpec;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.query.filter.ValueMatcher;
+import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
+import org.apache.druid.segment.ColumnSelector;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.IdLookup;
+import org.apache.druid.segment.NilColumnValueSelector;
+import org.apache.druid.segment.VirtualColumn;
+import org.apache.druid.segment.column.BaseColumn;
+import org.apache.druid.segment.column.BitmapIndex;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.column.StringDictionaryEncodedColumn;
+import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.data.IndexedInts;
+import org.apache.druid.segment.data.ReadableOffset;
+
+import javax.annotation.Nullable;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * A String column like VirtualColumn to test drive VirtualColumn interface.
+ */
+public class DummyStringVirtualColumn implements VirtualColumn
+{
+  private final String baseColumnName;
+  private final String outputName;
+
+  private final boolean enableRowBasedMethods;
+  private final boolean enableColumnBasedMethods;
+  private final boolean enableBitmaps;
+  private final boolean disableValueMatchers;
+
+  public DummyStringVirtualColumn(
+      String baseColumnName,
+      String outputName,
+      boolean enableRowBasedMethods,
+      boolean enableColumnBasedMethods,
+      boolean enableBitmaps,
+      boolean disableValueMatchers
+  )
+  {
+    this.baseColumnName = baseColumnName;
+    this.outputName = outputName;
+    this.enableRowBasedMethods = enableRowBasedMethods;
+    this.enableColumnBasedMethods = enableColumnBasedMethods;
+    this.enableBitmaps = enableBitmaps;
+    this.disableValueMatchers = disableValueMatchers;
+  }
+
+  @Override
+  public String getOutputName()
+  {
+    return this.outputName;
+  }
+
+  @Override
+  public DimensionSelector makeDimensionSelector(
+      DimensionSpec dimensionSpec,
+      ColumnSelector columnSelector,
+      ReadableOffset offset
+  )
+  {
+    if (enableColumnBasedMethods) {
+      ColumnHolder holder = columnSelector.getColumnHolder(baseColumnName);
+      if (holder == null) {
+        return DimensionSelector.constant(null);
+      }
+
+      StringDictionaryEncodedColumn stringCol = toStringDictionaryEncodedColumn(holder.getColumn());
+
+      DimensionSelector baseDimensionSelector = stringCol.makeDimensionSelector(
+          offset,
+          dimensionSpec.getExtractionFn()
+      );
+      if (disableValueMatchers) {
+        baseDimensionSelector = disableValueMatchers(baseDimensionSelector);
+      }
+      return dimensionSpec.decorate(baseDimensionSelector);
+    } else {
+      return null;
+    }
+  }
+
+  @Override
+  public DimensionSelector makeDimensionSelector(
+      DimensionSpec dimensionSpec,
+      ColumnSelectorFactory factory
+  )
+  {
+    if (enableRowBasedMethods) {
+      DimensionSelector baseDimensionSelector = factory.makeDimensionSelector(new DefaultDimensionSpec(
+          baseColumnName,
+          baseColumnName,
+          null
+      ));
+
+      if (disableValueMatchers) {
+        baseDimensionSelector = disableValueMatchers(baseDimensionSelector);
+      }
+      return dimensionSpec.decorate(baseDimensionSelector);
+    } else {
+      throw new UnsupportedOperationException("not supported");
+    }
+  }
+
+  @Override
+  public ColumnValueSelector<?> makeColumnValueSelector(
+      String columnName,
+      ColumnSelector columnSelector,
+      ReadableOffset offset
+  )
+  {
+    if (enableColumnBasedMethods) {
+      ColumnHolder holder = columnSelector.getColumnHolder(baseColumnName);
+      if (holder == null) {
+        return NilColumnValueSelector.instance();
+      }
+
+      StringDictionaryEncodedColumn stringCol = toStringDictionaryEncodedColumn(holder.getColumn());
+      return stringCol.makeColumnValueSelector(offset);
+    } else {
+      return null;
+    }
+  }
+
+  @Override
+  public ColumnValueSelector<?> makeColumnValueSelector(
+      String columnName,
+      ColumnSelectorFactory factory
+  )
+  {
+    if (enableRowBasedMethods) {
+      return factory.makeColumnValueSelector(baseColumnName);
+    } else {
+      throw new UnsupportedOperationException("not supported");
+    }
+  }
+
+  @Override
+  public BitmapIndex getBitmapIndex(String columnName, ColumnSelector columnSelector)
+  {
+    if (enableBitmaps) {
+      ColumnHolder holder = columnSelector.getColumnHolder(baseColumnName);
+      if (holder == null) {
+        return null;
+      }
+
+      return holder.getBitmapIndex();
+    } else {
+      throw new UnsupportedOperationException("not supported");
+    }
+  }
+
+  @Override
+  public ColumnCapabilities capabilities(String columnName)
+  {
+    ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl().setType(ValueType.STRING)
+                                                                      .setDictionaryEncoded(true);
+    if (enableBitmaps) {
+      capabilities.setHasBitmapIndexes(true);
+    }
+    return capabilities;
+  }
+
+  @Override
+  public List<String> requiredColumns()
+  {
+    return Collections.singletonList(baseColumnName);
+  }
+
+  @Override
+  public boolean usesDotNotation()
+  {
+    return false;
+  }
+
+  @Override
+  public byte[] getCacheKey()
+  {
+    return new byte[0];
+  }
+
+  private StringDictionaryEncodedColumn toStringDictionaryEncodedColumn(BaseColumn column)
+  {
+    if (!(column instanceof StringDictionaryEncodedColumn)) {
+      throw new IAE("I can only work with StringDictionaryEncodedColumn");
+    }
+
+    return (StringDictionaryEncodedColumn) column;
+  }
+
+  private DimensionSelector disableValueMatchers(DimensionSelector base)
+  {
+    return new DimensionSelector()
+    {
+      @Override
+      public IndexedInts getRow()
+      {
+        return base.getRow();
+      }
+
+      @Override
+      public ValueMatcher makeValueMatcher(@Nullable String value)
+      {
+        throw new UnsupportedOperationException("not supported");
+      }
+
+      @Override
+      public ValueMatcher makeValueMatcher(Predicate<String> predicate)
+      {
+        throw new UnsupportedOperationException("not supported");
+      }
+
+      @Override
+      public int getValueCardinality()
+      {
+        return base.getValueCardinality();
+      }
+
+      @Nullable
+      @Override
+      public String lookupName(int id)
+      {
+        return base.lookupName(id);
+      }
+
+      @Override
+      public boolean nameLookupPossibleInAdvance()
+      {
+        return base.nameLookupPossibleInAdvance();
+      }
+
+      @Nullable
+      @Override
+      public IdLookup idLookup()
+      {
+        return base.idLookup();
+      }
+
+      @Override
+      public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+      {
+        base.inspectRuntimeShape(inspector);
+      }
+
+      @Nullable
+      @Override
+      public Object getObject()
+      {
+        return base.getObject();
+      }
+
+      @Override
+      public Class<?> classOfObject()
+      {
+        return base.classOfObject();
+      }
+    };
+  }
+}
diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/DummyStringVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/DummyStringVirtualColumnTest.java
new file mode 100644
index 0000000..1158d57
--- /dev/null
+++ b/processing/src/test/java/org/apache/druid/segment/virtual/DummyStringVirtualColumnTest.java
@@ -0,0 +1,395 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.virtual;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+import org.apache.druid.data.input.Row;
+import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.granularity.Granularities;
+import org.apache.druid.query.QueryRunnerTestHelper;
+import org.apache.druid.query.Result;
+import org.apache.druid.query.aggregation.AggregationTestHelper;
+import org.apache.druid.query.aggregation.CountAggregatorFactory;
+import org.apache.druid.query.filter.RegexDimFilter;
+import org.apache.druid.query.filter.SelectorDimFilter;
+import org.apache.druid.query.groupby.GroupByQuery;
+import org.apache.druid.query.groupby.GroupByQueryConfig;
+import org.apache.druid.query.groupby.GroupByQueryRunnerTestHelper;
+import org.apache.druid.query.topn.TopNQuery;
+import org.apache.druid.query.topn.TopNQueryBuilder;
+import org.apache.druid.query.topn.TopNResultValue;
+import org.apache.druid.segment.IncrementalIndexSegment;
+import org.apache.druid.segment.QueryableIndexSegment;
+import org.apache.druid.segment.Segment;
+import org.apache.druid.segment.TestHelper;
+import org.apache.druid.segment.TestIndex;
+import org.apache.druid.timeline.SegmentId;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+public class DummyStringVirtualColumnTest
+{
+  private static final String VSTRING_DIM = "vstring";
+  private static final String COUNT = "count";
+
+  private final List<Segment> mmappedSegments;
+  private final List<Segment> inMemorySegments;
+  private final List<Segment> mixedSegments;
+
+  private final AggregationTestHelper topNTestHelper;
+  private final AggregationTestHelper groupByTestHelper;
+
+  public DummyStringVirtualColumnTest()
+  {
+    QueryableIndexSegment queryableIndexSegment = new QueryableIndexSegment(
+        TestIndex.getMMappedTestIndex(),
+        SegmentId.dummy(QueryRunnerTestHelper.dataSource)
+    );
+    IncrementalIndexSegment incrementalIndexSegment = new IncrementalIndexSegment(
+        TestIndex.getIncrementalTestIndex(),
+        SegmentId.dummy(QueryRunnerTestHelper.dataSource)
+    );
+
+    mmappedSegments = Lists.newArrayList(queryableIndexSegment, queryableIndexSegment);
+    inMemorySegments = Lists.newArrayList(incrementalIndexSegment, incrementalIndexSegment);
+    mixedSegments = Lists.newArrayList(incrementalIndexSegment, queryableIndexSegment);
+
+    topNTestHelper = AggregationTestHelper.createTopNQueryAggregationTestHelper(
+        Collections.EMPTY_LIST,
+        null
+    );
+
+    groupByTestHelper = AggregationTestHelper.createGroupByQueryAggregationTestHelper(
+        Collections.EMPTY_LIST,
+        new GroupByQueryConfig(),
+        null
+    );
+  }
+
+  @Test
+  public void testGroupByWithMMappedSegments()
+  {
+    testGroupBy(mmappedSegments, true, true);
+    testGroupBy(mmappedSegments, true, false);
+    testGroupBy(mmappedSegments, false, true);
+  }
+
+  @Test
+  public void testGroupByWithInMemorySegments()
+  {
+    testGroupBy(inMemorySegments, true, true);
+    testGroupBy(inMemorySegments, true, false);
+
+    try {
+      testGroupBy(inMemorySegments, false, true);
+      Assert.fail("must need row based methods");
+    }
+    catch (Exception ex) {
+    }
+  }
+
+  @Test
+  public void testGroupByWithMixedSegments()
+  {
+    testGroupBy(mixedSegments, true, true);
+    testGroupBy(mixedSegments, true, false);
+
+    try {
+      testGroupBy(mixedSegments, false, true);
+      Assert.fail("must need row based methods");
+    }
+    catch (Exception ex) {
+    }
+  }
+
+  @Test
+  public void testGroupByWithSelectFilterWithMMappedSegments()
+  {
+    testGroupByWithSelectFilter(mmappedSegments, true, false, false, false);
+    testGroupByWithSelectFilter(mmappedSegments, true, false, true, true);
+    testGroupByWithSelectFilter(mmappedSegments, false, true, true, true);
+    testGroupByWithSelectFilter(mmappedSegments, true, true, true, false);
+  }
+
+  @Test
+  public void testGroupByWithSelectFilterWithInMemorySegments()
+  {
+    testGroupByWithSelectFilter(inMemorySegments, true, false, false, false);
+    testGroupByWithSelectFilter(inMemorySegments, true, true, true, false);
+
+    try {
+      testGroupByWithSelectFilter(inMemorySegments, true, true, true, true);
+      Assert.fail("value matchers must be required");
+    }
+    catch (Exception ex) {
+
+    }
+  }
+
+  @Test
+  public void testGroupByWithSelectFilterWithMixedSegments()
+  {
+    testGroupByWithSelectFilter(mixedSegments, true, false, false, false);
+    testGroupByWithSelectFilter(mixedSegments, true, true, true, false);
+
+    try {
+      testGroupByWithSelectFilter(mixedSegments, true, true, true, true);
+      Assert.fail("value matchers must be required");
+    }
+    catch (Exception ex) {
+
+    }
+  }
+
+  @Test
+  public void testGroupByWithRegexFilterWithMMappedSegments()
+  {
+    testGroupByWithRegexFilter(mmappedSegments, true, false, false, false);
+    testGroupByWithRegexFilter(mmappedSegments, true, false, true, true);
+    testGroupByWithRegexFilter(mmappedSegments, false, true, true, true);
+    testGroupByWithRegexFilter(mmappedSegments, true, true, true, false);
+  }
+
+  @Test
+  public void testGroupByWithRegexFilterWithInMemorySegments()
+  {
+    testGroupByWithRegexFilter(inMemorySegments, true, false, false, false);
+    testGroupByWithRegexFilter(inMemorySegments, true, true, true, false);
+
+    try {
+      testGroupByWithRegexFilter(inMemorySegments, true, true, true, true);
+      Assert.fail("value matchers must be required");
+    }
+    catch (Exception ex) {
+
+    }
+  }
+
+  @Test
+  public void testGroupByWithRegexFilterWithMixedSegments()
+  {
+    testGroupByWithRegexFilter(mixedSegments, true, false, false, false);
+    testGroupByWithRegexFilter(mixedSegments, true, true, true, false);
+
+    try {
+      testGroupByWithRegexFilter(mixedSegments, true, true, true, true);
+      Assert.fail("value matchers must be required");
+    }
+    catch (Exception ex) {
+
+    }
+  }
+
+  @Test
+  public void testTopNWithMMappedSegments()
+  {
+    testTopN(mmappedSegments, true, true);
+    testTopN(mmappedSegments, true, false);
+    testTopN(mmappedSegments, false, true);
+  }
+
+  @Test
+  public void testTopNWithInMemorySegments()
+  {
+    testTopN(inMemorySegments, true, true);
+    testTopN(inMemorySegments, true, false);
+
+    try {
+      testTopN(inMemorySegments, false, true);
+      Assert.fail("must need row based methods");
+    }
+    catch (Exception ex) {
+    }
+  }
+
+  @Test
+  public void testTopNWithMixedSegments()
+  {
+    testTopN(mixedSegments, true, true);
+    testTopN(mixedSegments, true, false);
+
+    try {
+      testTopN(mixedSegments, false, true);
+      Assert.fail("must need row based methods");
+    }
+    catch (Exception ex) {
+    }
+  }
+
+  private void testGroupBy(List<Segment> segments, boolean enableRowBasedMethods, boolean enableColumnBasedMethods)
+  {
+    GroupByQuery query = new GroupByQuery.Builder()
+        .setDataSource(QueryRunnerTestHelper.dataSource)
+        .setGranularity(Granularities.ALL)
+        .setVirtualColumns(
+            new DummyStringVirtualColumn(QueryRunnerTestHelper.marketDimension, VSTRING_DIM,
+                                         enableRowBasedMethods, enableColumnBasedMethods,
+                                         false, true
+            )
+        )
+        .addDimension(VSTRING_DIM)
+        .setAggregatorSpecs(new CountAggregatorFactory(COUNT))
+        .setInterval("2000/2030")
+        .addOrderByColumn(VSTRING_DIM)
+        .build();
+
+    List<Row> rows = groupByTestHelper.runQueryOnSegmentsObjs(segments, query).toList();
+
+    List<Row> expectedRows = Arrays.asList(
+        GroupByQueryRunnerTestHelper.createExpectedRow("2000-01-01T00:00:00.000Z", COUNT, 1674L, VSTRING_DIM, "spot"),
+        GroupByQueryRunnerTestHelper.createExpectedRow(
+            "2000-01-01T00:00:00.000Z",
+            COUNT,
+            372L,
+            VSTRING_DIM,
+            "total_market"
+        ),
+        GroupByQueryRunnerTestHelper.createExpectedRow("2000-01-01T00:00:00.000Z", COUNT, 372L, VSTRING_DIM, "upfront")
+    );
+
+    TestHelper.assertExpectedObjects(expectedRows, rows, "failed");
+  }
+
+  private void testGroupByWithSelectFilter(
+      List<Segment> segments,
+      boolean enableRowBasedMethods,
+      boolean enableColumnBasedMethods,
+      boolean enableBitmaps,
+      boolean disableValueMatchers
+  )
+  {
+    GroupByQuery query = new GroupByQuery.Builder()
+        .setDataSource(QueryRunnerTestHelper.dataSource)
+        .setGranularity(Granularities.ALL)
+        .setVirtualColumns(
+            new DummyStringVirtualColumn(
+                QueryRunnerTestHelper.marketDimension,
+                VSTRING_DIM,
+                enableRowBasedMethods,
+                enableColumnBasedMethods,
+                enableBitmaps,
+                disableValueMatchers
+            )
+        )
+        .addDimension(VSTRING_DIM)
+        .setAggregatorSpecs(new CountAggregatorFactory(COUNT))
+        .setInterval("2000/2030")
+        .addOrderByColumn(VSTRING_DIM)
+        .setDimFilter(new SelectorDimFilter(VSTRING_DIM, "spot", null))
+        .build();
+
+    List<Row> rows = groupByTestHelper.runQueryOnSegmentsObjs(segments, query).toList();
+
+    List<Row> expectedRows = Collections.singletonList(
+        GroupByQueryRunnerTestHelper.createExpectedRow("2000-01-01T00:00:00.000Z", COUNT, 1674L, VSTRING_DIM, "spot")
+    );
+
+    TestHelper.assertExpectedObjects(expectedRows, rows, "failed");
+  }
+
+  private void testGroupByWithRegexFilter(
+      List<Segment> segments,
+      boolean enableRowBasedMethods,
+      boolean enableColumnBasedMethods,
+      boolean enableBitmaps,
+      boolean disableValueMatchers
+  )
+  {
+    GroupByQuery query = new GroupByQuery.Builder()
+        .setDataSource(QueryRunnerTestHelper.dataSource)
+        .setGranularity(Granularities.ALL)
+        .setVirtualColumns(
+            new DummyStringVirtualColumn(
+                QueryRunnerTestHelper.marketDimension,
+                VSTRING_DIM,
+                enableRowBasedMethods,
+                enableColumnBasedMethods,
+                enableBitmaps,
+                disableValueMatchers
+            )
+        )
+        .addDimension(VSTRING_DIM)
+        .setAggregatorSpecs(new CountAggregatorFactory(COUNT))
+        .setInterval("2000/2030")
+        .addOrderByColumn(VSTRING_DIM)
+        .setDimFilter(new RegexDimFilter(VSTRING_DIM, "(spot)|(upfront)", null))
+        .build();
+
+    List<Row> rows = groupByTestHelper.runQueryOnSegmentsObjs(segments, query).toList();
+
+    List<Row> expectedRows = Arrays.asList(
+        GroupByQueryRunnerTestHelper.createExpectedRow("2000-01-01T00:00:00.000Z", COUNT, 1674L, VSTRING_DIM, "spot"),
+        GroupByQueryRunnerTestHelper.createExpectedRow("2000-01-01T00:00:00.000Z", COUNT, 372L, VSTRING_DIM, "upfront")
+    );
+
+    TestHelper.assertExpectedObjects(expectedRows, rows, "failed");
+  }
+
+  private void testTopN(
+      List<Segment> segments,
+      boolean enableRowBasedMethods,
+      boolean enableColumnBasedMethods
+  )
+  {
+    TopNQuery query = new TopNQueryBuilder()
+        .dataSource(QueryRunnerTestHelper.dataSource)
+        .granularity(Granularities.ALL)
+        .dimension(VSTRING_DIM)
+        .metric(COUNT)
+        .threshold(1)
+        .aggregators(
+            Collections.singletonList(new CountAggregatorFactory(COUNT))
+        )
+        .virtualColumns(new DummyStringVirtualColumn(
+            QueryRunnerTestHelper.marketDimension,
+            VSTRING_DIM,
+            enableRowBasedMethods,
+            enableColumnBasedMethods,
+            false,
+            true
+        ))
+        .intervals("2000/2030")
+        .build();
+
+    List rows = topNTestHelper.runQueryOnSegmentsObjs(segments, query).toList();
+
+    List<Result<TopNResultValue>> expectedRows = Collections.singletonList(
+        new Result<>(
+            DateTimes.of("2011-01-12T00:00:00.000Z"),
+            new TopNResultValue(
+                Collections.<Map<String, Object>>singletonList(
+                    ImmutableMap.<String, Object>builder()
+                        .put(COUNT, 1674L)
+                        .put(VSTRING_DIM, "spot")
+                        .build()
+                )
+            )
+        )
+    );
+
+    TestHelper.assertExpectedResults(expectedRows, (List<Result<TopNResultValue>>) rows, "failed");
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org