You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by cw...@apache.org on 2023/02/09 11:16:52 UTC

[druid] branch master updated: fix filtering nested field virtual column when used with non nested column input (#13779)

This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new ffeda72abb fix filtering nested field virtual column when used with non nested column input (#13779)
ffeda72abb is described below

commit ffeda72abb40d8e849a95a6ccc1a76a870e45dd0
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Thu Feb 9 03:16:38 2023 -0800

    fix filtering nested field virtual column when used with non nested column input (#13779)
    
    * fix filtering nested field virtual column when used with non nested column input
---
 .../query/dimension/DefaultDimensionSpec.java      |   5 +
 .../org/apache/druid/segment/VirtualColumn.java    |   5 +-
 .../segment/nested/NestedDataComplexColumn.java    |  26 ---
 .../segment/virtual/ExpressionVirtualColumn.java   |   1 +
 .../segment/virtual/FallbackVirtualColumn.java     |   1 +
 .../segment/virtual/ListFilteredVirtualColumn.java |   1 +
 .../segment/virtual/NestedFieldVirtualColumn.java  |  60 ++++--
 .../query/groupby/NestedDataGroupByQueryTest.java  | 225 +++++++++++++++++++++
 8 files changed, 282 insertions(+), 42 deletions(-)

diff --git a/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java b/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java
index 0a869a9227..5f613e5964 100644
--- a/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java
+++ b/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java
@@ -41,6 +41,11 @@ public class DefaultDimensionSpec implements DimensionSpec
     return new DefaultDimensionSpec(dimensionName, dimensionName);
   }
 
+  public static DefaultDimensionSpec of(String dimensionName, ColumnType columnType)
+  {
+    return new DefaultDimensionSpec(dimensionName, dimensionName, columnType);
+  }
+
   private static final byte CACHE_TYPE_ID = 0x0;
   private final String dimension;
   private final String outputName;
diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
index b32f54186d..e79baa00b1 100644
--- a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
@@ -257,10 +257,10 @@ public interface VirtualColumn extends Cacheable
 
   /**
    * Return the {@link ColumnCapabilities} which best describe the optimal selector to read from this virtual column.
-   *
+   * <p>
    * The {@link ColumnInspector} (most likely corresponding to an underlying {@link ColumnSelectorFactory} of a query)
    * allows the virtual column to consider this information if necessary to compute its output type details.
-   *
+   * <p>
    * Examples of this include the {@link ExpressionVirtualColumn}, which takes input from other columns and uses the
    * {@link ColumnInspector} to infer the output type of expressions based on the types of the inputs.
    *
@@ -268,6 +268,7 @@ public interface VirtualColumn extends Cacheable
    * @param columnName the name this virtual column was referenced with
    * @return capabilities, must not be null
    */
+  @Nullable
   default ColumnCapabilities capabilities(ColumnInspector inspector, String columnName)
   {
     return capabilities(columnName);
diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java
index 2cc9cfb7b0..2ee1a5808e 100644
--- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java
@@ -20,12 +20,9 @@
 
 package org.apache.druid.segment.nested;
 
-import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.query.extraction.ExtractionFn;
-import org.apache.druid.segment.ColumnSelector;
 import org.apache.druid.segment.ColumnValueSelector;
 import org.apache.druid.segment.DimensionSelector;
-import org.apache.druid.segment.column.BaseColumn;
 import org.apache.druid.segment.column.ColumnHolder;
 import org.apache.druid.segment.column.ColumnIndexSupplier;
 import org.apache.druid.segment.column.ColumnType;
@@ -47,29 +44,6 @@ import java.util.Set;
  */
 public abstract class NestedDataComplexColumn implements ComplexColumn
 {
-  @Nullable
-  public static NestedDataComplexColumn fromColumnSelector(
-      ColumnSelector columnSelector,
-      String columnName
-  )
-  {
-    ColumnHolder holder = columnSelector.getColumnHolder(columnName);
-    if (holder == null) {
-      return null;
-    }
-    BaseColumn theColumn = holder.getColumn();
-    if (theColumn instanceof CompressedNestedDataComplexColumn) {
-      return (CompressedNestedDataComplexColumn) theColumn;
-    }
-    throw new IAE(
-        "Column [%s] is invalid type, found [%s] instead of [%s]",
-        columnName,
-        theColumn.getClass(),
-        NestedDataComplexColumn.class.getSimpleName()
-    );
-  }
-
-
   /**
    * Make a {@link DimensionSelector} for a nested literal field column associated with this nested
    * complex column specified by a sequence of {@link NestedPathPart}.
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
index b9a9366b83..286626baf8 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
@@ -194,6 +194,7 @@ public class ExpressionVirtualColumn implements VirtualColumn
     return new ColumnCapabilitiesImpl().setType(outputType == null ? ColumnType.FLOAT : outputType);
   }
 
+  @Nullable
   @Override
   public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName)
   {
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/FallbackVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/FallbackVirtualColumn.java
index 3a9209fb33..461a6b68df 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/FallbackVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/FallbackVirtualColumn.java
@@ -163,6 +163,7 @@ public class FallbackVirtualColumn implements VirtualColumn
     return ColumnCapabilitiesImpl.createDefault();
   }
 
+  @Nullable
   @SuppressWarnings("ConstantConditions")
   @Override
   public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName)
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
index f5f7e94d37..3cdaf24508 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
@@ -158,6 +158,7 @@ public class ListFilteredVirtualColumn implements VirtualColumn
                                        .setHasBitmapIndexes(true);
   }
 
+  @Nullable
   @Override
   public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName)
   {
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
index 86e9c95a80..62cd303bc9 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
@@ -54,6 +54,7 @@ import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.segment.column.ValueTypes;
 import org.apache.druid.segment.data.IndexedInts;
 import org.apache.druid.segment.data.ReadableOffset;
+import org.apache.druid.segment.nested.CompressedNestedDataComplexColumn;
 import org.apache.druid.segment.nested.NestedDataComplexColumn;
 import org.apache.druid.segment.nested.NestedDataComplexTypeSerde;
 import org.apache.druid.segment.nested.NestedPathArrayElement;
@@ -410,6 +411,9 @@ public class NestedFieldVirtualColumn implements VirtualColumn
     // not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts
     if (parts.isEmpty()) {
       ColumnCapabilities capabilities = holder.getCapabilities();
+      // expectedType shouldn't possibly be null if we are being asked for an object selector and the underlying column
+      // is numeric, else we would have been asked for a value selector
+      Preconditions.checkArgument(expectedType != null, "Asked for a VectorObjectSelector on a numeric column, 'expectedType' must not be null");
       if (capabilities.isNumeric()) {
         return ExpressionVectorSelectors.castValueSelectorToObject(
             offset,
@@ -600,12 +604,18 @@ public class NestedFieldVirtualColumn implements VirtualColumn
       ColumnSelector selector
   )
   {
-    final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(selector, this.columnName);
-
-    if (column == null) {
+    ColumnHolder holder = selector.getColumnHolder(this.columnName);
+    if (holder == null) {
       return null;
     }
-    return column.getColumnIndexSupplier(parts);
+    BaseColumn theColumn = holder.getColumn();
+    if (theColumn instanceof CompressedNestedDataComplexColumn) {
+      return ((CompressedNestedDataComplexColumn<?>) theColumn).getColumnIndexSupplier(parts);
+    }
+    if (parts.isEmpty()) {
+      return holder.getIndexSupplier();
+    }
+    return null;
   }
 
   @Override
@@ -625,6 +635,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn
                                  .setHasNulls(expectedType == null || !expectedType.isNumeric() || (expectedType.isNumeric() && NullHandling.sqlCompatible()));
   }
 
+  @Nullable
   @Override
   public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName)
   {
@@ -637,17 +648,38 @@ public class NestedFieldVirtualColumn implements VirtualColumn
     }
     // ColumnInspector isn't really enough... we need the ability to read the complex column itself to examine
     // the nested fields type information to really be accurate here, so we rely on the expectedType to guide us
-    final ColumnCapabilities complexCapabilites = inspector.getColumnCapabilities(this.columnName);
-    if (complexCapabilites != null && complexCapabilites.isDictionaryEncoded().isTrue()) {
-      return ColumnCapabilitiesImpl.createDefault()
-                                   .setType(expectedType != null ? expectedType : ColumnType.STRING)
-                                   .setDictionaryEncoded(true)
-                                   .setDictionaryValuesSorted(true)
-                                   .setDictionaryValuesUnique(true)
-                                   .setHasBitmapIndexes(true)
-                                   .setHasNulls(expectedType == null || (expectedType.isNumeric()
-                                                                         && NullHandling.sqlCompatible()));
+    final ColumnCapabilities capabilities = inspector.getColumnCapabilities(this.columnName);
+
+    if (capabilities != null) {
+      // if the underlying column is a nested column (and persisted to disk, re: the dictionary encoded check)
+      if (capabilities.is(ValueType.COMPLEX) &&
+          capabilities.getComplexTypeName().equals(NestedDataComplexTypeSerde.TYPE_NAME) &&
+          capabilities.isDictionaryEncoded().isTrue()) {
+        return ColumnCapabilitiesImpl.createDefault()
+                                     .setType(expectedType != null ? expectedType : ColumnType.STRING)
+                                     .setDictionaryEncoded(true)
+                                     .setDictionaryValuesSorted(true)
+                                     .setDictionaryValuesUnique(true)
+                                     .setHasBitmapIndexes(true)
+                                     .setHasNulls(expectedType == null || (expectedType.isNumeric()
+                                                                           && NullHandling.sqlCompatible()));
+      }
+      // column is not nested, use underlying column capabilities, adjusted for expectedType as necessary
+      if (parts.isEmpty()) {
+        ColumnCapabilitiesImpl copy = ColumnCapabilitiesImpl.copyOf(capabilities);
+        if (expectedType != null) {
+          copy.setType(expectedType);
+          copy.setHasNulls(
+              copy.hasNulls().or(ColumnCapabilities.Capable.of(expectedType.getType() != capabilities.getType()))
+          );
+        }
+        return copy;
+      } else if (capabilities.isPrimitive()) {
+        // path doesn't exist and column isn't nested, so effectively column doesn't exist
+        return null;
+      }
     }
+
     return capabilities(columnName);
   }
 
diff --git a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java
index 9f33735c03..079307eadf 100644
--- a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java
+++ b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java
@@ -37,6 +37,7 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
 import org.apache.druid.query.dimension.DefaultDimensionSpec;
 import org.apache.druid.query.expression.TestExprMacroTable;
 import org.apache.druid.query.filter.InDimFilter;
+import org.apache.druid.query.filter.SelectorDimFilter;
 import org.apache.druid.query.groupby.strategy.GroupByStrategySelector;
 import org.apache.druid.segment.Segment;
 import org.apache.druid.segment.column.ColumnType;
@@ -312,6 +313,230 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
     );
   }
 
+  @Test
+  public void testGroupBySomeFieldOnStringColumn()
+  {
+    GroupByQuery groupQuery = GroupByQuery.builder()
+                                          .setDataSource("test_datasource")
+                                          .setGranularity(Granularities.ALL)
+                                          .setInterval(Intervals.ETERNITY)
+                                          .setDimensions(DefaultDimensionSpec.of("v0"), DefaultDimensionSpec.of("v1"))
+                                          .setVirtualColumns(
+                                              new NestedFieldVirtualColumn("dim", "$", "v0"),
+                                              new NestedFieldVirtualColumn("dim", "$.x", "v1")
+                                          )
+                                          .setAggregatorSpecs(new CountAggregatorFactory("count"))
+                                          .setContext(getContext())
+                                          .build();
+
+
+    runResults(
+        groupQuery,
+        ImmutableList.of(
+            new Object[]{"100", null, 2L},
+            new Object[]{"hello", null, 12L},
+            new Object[]{"world", null, 2L}
+        )
+    );
+  }
+
+  @Test
+  public void testGroupBySomeFieldOnStringColumnWithFilter()
+  {
+    List<String> vals = new ArrayList<>();
+    vals.add("100");
+    vals.add("200");
+    vals.add("300");
+    GroupByQuery groupQuery = GroupByQuery.builder()
+                                          .setDataSource("test_datasource")
+                                          .setGranularity(Granularities.ALL)
+                                          .setInterval(Intervals.ETERNITY)
+                                          .setDimensions(DefaultDimensionSpec.of("v0"))
+                                          .setVirtualColumns(new NestedFieldVirtualColumn("dim", "$", "v0"))
+                                          .setAggregatorSpecs(new CountAggregatorFactory("count"))
+                                          .setContext(getContext())
+                                          .setDimFilter(new InDimFilter("v0", vals, null))
+                                          .build();
+
+
+    runResults(
+        groupQuery,
+        ImmutableList.of(
+            new Object[]{"100", 2L}
+        )
+    );
+  }
+
+  @Test
+  public void testGroupBySomeFieldOnStringColumnWithFilterExpectedType()
+  {
+    List<String> vals = new ArrayList<>();
+    vals.add("100");
+    vals.add("200");
+    vals.add("300");
+    GroupByQuery groupQuery = GroupByQuery.builder()
+                                          .setDataSource("test_datasource")
+                                          .setGranularity(Granularities.ALL)
+                                          .setInterval(Intervals.ETERNITY)
+                                          .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.LONG))
+                                          .setVirtualColumns(new NestedFieldVirtualColumn("dim", "$", "v0", ColumnType.LONG))
+                                          .setAggregatorSpecs(new CountAggregatorFactory("count"))
+                                          .setContext(getContext())
+                                          .setDimFilter(new InDimFilter("v0", vals, null))
+                                          .build();
+
+
+    runResults(
+        groupQuery,
+        ImmutableList.of(
+            new Object[]{100L, 2L}
+        ),
+        false,
+        true
+    );
+  }
+
+  @Test
+  public void testGroupBySomeFieldOnStringColumnWithFilterNil()
+  {
+    List<String> vals = new ArrayList<>();
+    vals.add("100");
+    vals.add("200");
+    vals.add("300");
+    GroupByQuery groupQuery = GroupByQuery.builder()
+                                          .setDataSource("test_datasource")
+                                          .setGranularity(Granularities.ALL)
+                                          .setInterval(Intervals.ETERNITY)
+                                          .setDimensions(DefaultDimensionSpec.of("v0"))
+                                          .setVirtualColumns(new NestedFieldVirtualColumn("dim", "$.x", "v0"))
+                                          .setAggregatorSpecs(new CountAggregatorFactory("count"))
+                                          .setContext(getContext())
+                                          .setDimFilter(new InDimFilter("v0", vals, null))
+                                          .build();
+
+
+    runResults(
+        groupQuery,
+        ImmutableList.of()
+    );
+  }
+
+  @Test
+  public void testGroupBySomeFieldOnLongColumn()
+  {
+    GroupByQuery groupQuery = GroupByQuery.builder()
+                                          .setDataSource("test_datasource")
+                                          .setGranularity(Granularities.ALL)
+                                          .setInterval(Intervals.ETERNITY)
+                                          .setDimensions(
+                                              DefaultDimensionSpec.of("v0", ColumnType.LONG),
+                                              DefaultDimensionSpec.of("v1", ColumnType.LONG)
+                                          )
+                                          .setVirtualColumns(
+                                              new NestedFieldVirtualColumn("__time", "$", "v0"),
+                                              new NestedFieldVirtualColumn("__time", "$.x", "v1")
+                                          )
+                                          .setAggregatorSpecs(new CountAggregatorFactory("count"))
+                                          .setContext(getContext())
+                                          .build();
+
+
+    runResults(
+        groupQuery,
+        ImmutableList.of(
+            new Object[]{1609459200000L, NullHandling.defaultLongValue(), 8L},
+            new Object[]{1609545600000L, NullHandling.defaultLongValue(), 8L}
+        ),
+        false,
+        true
+    );
+  }
+
+  @Test
+  public void testGroupBySomeFieldOnLongColumnFilter()
+  {
+    GroupByQuery groupQuery = GroupByQuery.builder()
+                                          .setDataSource("test_datasource")
+                                          .setGranularity(Granularities.ALL)
+                                          .setInterval(Intervals.ETERNITY)
+                                          .setDimensions(
+                                              DefaultDimensionSpec.of("v0", ColumnType.LONG)
+                                          )
+                                          .setVirtualColumns(
+                                              new NestedFieldVirtualColumn("__time", "$", "v0")
+                                          )
+                                          .setAggregatorSpecs(new CountAggregatorFactory("count"))
+                                          .setDimFilter(new SelectorDimFilter("v0", "1609459200000", null))
+                                          .setContext(getContext())
+                                          .build();
+
+
+    runResults(
+        groupQuery,
+        ImmutableList.of(
+            new Object[]{1609459200000L, 8L}
+        ),
+        false,
+        true
+    );
+  }
+
+  @Test
+  public void testGroupBySomeFieldOnLongColumnFilterExpectedType()
+  {
+    GroupByQuery groupQuery = GroupByQuery.builder()
+                                          .setDataSource("test_datasource")
+                                          .setGranularity(Granularities.ALL)
+                                          .setInterval(Intervals.ETERNITY)
+                                          .setDimensions(
+                                              DefaultDimensionSpec.of("v0", ColumnType.STRING)
+                                          )
+                                          .setVirtualColumns(
+                                              new NestedFieldVirtualColumn("__time", "$", "v0", ColumnType.STRING)
+                                          )
+                                          .setAggregatorSpecs(new CountAggregatorFactory("count"))
+                                          .setDimFilter(new SelectorDimFilter("v0", "1609459200000", null))
+                                          .setContext(getContext())
+                                          .build();
+
+
+    runResults(
+        groupQuery,
+        ImmutableList.of(
+            new Object[]{"1609459200000", 8L}
+        ),
+        true,
+        false
+    );
+  }
+
+  @Test
+  public void testGroupBySomeFieldOnLongColumnFilterNil()
+  {
+    GroupByQuery groupQuery = GroupByQuery.builder()
+                                          .setDataSource("test_datasource")
+                                          .setGranularity(Granularities.ALL)
+                                          .setInterval(Intervals.ETERNITY)
+                                          .setDimensions(
+                                              DefaultDimensionSpec.of("v0", ColumnType.LONG)
+                                          )
+                                          .setVirtualColumns(
+                                              new NestedFieldVirtualColumn("__time", "$.x", "v0")
+                                          )
+                                          .setAggregatorSpecs(new CountAggregatorFactory("count"))
+                                          .setDimFilter(new SelectorDimFilter("v0", "1609459200000", null))
+                                          .setContext(getContext())
+                                          .build();
+
+
+    runResults(
+        groupQuery,
+        ImmutableList.of(),
+        false,
+        true
+    );
+  }
+
   private void runResults(GroupByQuery groupQuery, List<Object[]> expectedResults)
   {
     runResults(groupQuery, expectedResults, false, false);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org