You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by cw...@apache.org on 2023/02/09 11:16:52 UTC
[druid] branch master updated: fix filtering nested field virtual column when used with non nested column input (#13779)
This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new ffeda72abb fix filtering nested field virtual column when used with non nested column input (#13779)
ffeda72abb is described below
commit ffeda72abb40d8e849a95a6ccc1a76a870e45dd0
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Thu Feb 9 03:16:38 2023 -0800
fix filtering nested field virtual column when used with non nested column input (#13779)
* fix filtering nested field virtual column when used with non nested column input
---
.../query/dimension/DefaultDimensionSpec.java | 5 +
.../org/apache/druid/segment/VirtualColumn.java | 5 +-
.../segment/nested/NestedDataComplexColumn.java | 26 ---
.../segment/virtual/ExpressionVirtualColumn.java | 1 +
.../segment/virtual/FallbackVirtualColumn.java | 1 +
.../segment/virtual/ListFilteredVirtualColumn.java | 1 +
.../segment/virtual/NestedFieldVirtualColumn.java | 60 ++++--
.../query/groupby/NestedDataGroupByQueryTest.java | 225 +++++++++++++++++++++
8 files changed, 282 insertions(+), 42 deletions(-)
diff --git a/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java b/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java
index 0a869a9227..5f613e5964 100644
--- a/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java
+++ b/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java
@@ -41,6 +41,11 @@ public class DefaultDimensionSpec implements DimensionSpec
return new DefaultDimensionSpec(dimensionName, dimensionName);
}
+ public static DefaultDimensionSpec of(String dimensionName, ColumnType columnType)
+ {
+ return new DefaultDimensionSpec(dimensionName, dimensionName, columnType);
+ }
+
private static final byte CACHE_TYPE_ID = 0x0;
private final String dimension;
private final String outputName;
diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
index b32f54186d..e79baa00b1 100644
--- a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java
@@ -257,10 +257,10 @@ public interface VirtualColumn extends Cacheable
/**
* Return the {@link ColumnCapabilities} which best describe the optimal selector to read from this virtual column.
- *
+ * <p>
* The {@link ColumnInspector} (most likely corresponding to an underlying {@link ColumnSelectorFactory} of a query)
* allows the virtual column to consider this information if necessary to compute its output type details.
- *
+ * <p>
* Examples of this include the {@link ExpressionVirtualColumn}, which takes input from other columns and uses the
* {@link ColumnInspector} to infer the output type of expressions based on the types of the inputs.
*
@@ -268,6 +268,7 @@ public interface VirtualColumn extends Cacheable
* @param columnName the name this virtual column was referenced with
* @return capabilities, must not be null
*/
+ @Nullable
default ColumnCapabilities capabilities(ColumnInspector inspector, String columnName)
{
return capabilities(columnName);
diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java
index 2cc9cfb7b0..2ee1a5808e 100644
--- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java
@@ -20,12 +20,9 @@
package org.apache.druid.segment.nested;
-import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.extraction.ExtractionFn;
-import org.apache.druid.segment.ColumnSelector;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionSelector;
-import org.apache.druid.segment.column.BaseColumn;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.ColumnType;
@@ -47,29 +44,6 @@ import java.util.Set;
*/
public abstract class NestedDataComplexColumn implements ComplexColumn
{
- @Nullable
- public static NestedDataComplexColumn fromColumnSelector(
- ColumnSelector columnSelector,
- String columnName
- )
- {
- ColumnHolder holder = columnSelector.getColumnHolder(columnName);
- if (holder == null) {
- return null;
- }
- BaseColumn theColumn = holder.getColumn();
- if (theColumn instanceof CompressedNestedDataComplexColumn) {
- return (CompressedNestedDataComplexColumn) theColumn;
- }
- throw new IAE(
- "Column [%s] is invalid type, found [%s] instead of [%s]",
- columnName,
- theColumn.getClass(),
- NestedDataComplexColumn.class.getSimpleName()
- );
- }
-
-
/**
* Make a {@link DimensionSelector} for a nested literal field column associated with this nested
* complex column specified by a sequence of {@link NestedPathPart}.
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
index b9a9366b83..286626baf8 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
@@ -194,6 +194,7 @@ public class ExpressionVirtualColumn implements VirtualColumn
return new ColumnCapabilitiesImpl().setType(outputType == null ? ColumnType.FLOAT : outputType);
}
+ @Nullable
@Override
public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName)
{
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/FallbackVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/FallbackVirtualColumn.java
index 3a9209fb33..461a6b68df 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/FallbackVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/FallbackVirtualColumn.java
@@ -163,6 +163,7 @@ public class FallbackVirtualColumn implements VirtualColumn
return ColumnCapabilitiesImpl.createDefault();
}
+ @Nullable
@SuppressWarnings("ConstantConditions")
@Override
public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName)
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
index f5f7e94d37..3cdaf24508 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java
@@ -158,6 +158,7 @@ public class ListFilteredVirtualColumn implements VirtualColumn
.setHasBitmapIndexes(true);
}
+ @Nullable
@Override
public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName)
{
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
index 86e9c95a80..62cd303bc9 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
@@ -54,6 +54,7 @@ import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.column.ValueTypes;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.data.ReadableOffset;
+import org.apache.druid.segment.nested.CompressedNestedDataComplexColumn;
import org.apache.druid.segment.nested.NestedDataComplexColumn;
import org.apache.druid.segment.nested.NestedDataComplexTypeSerde;
import org.apache.druid.segment.nested.NestedPathArrayElement;
@@ -410,6 +411,9 @@ public class NestedFieldVirtualColumn implements VirtualColumn
// not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts
if (parts.isEmpty()) {
ColumnCapabilities capabilities = holder.getCapabilities();
+ // expectedType shouldn't possibly be null if we are being asked for an object selector and the underlying column
+ // is numeric, else we would have been asked for a value selector
+ Preconditions.checkArgument(expectedType != null, "Asked for a VectorObjectSelector on a numeric column, 'expectedType' must not be null");
if (capabilities.isNumeric()) {
return ExpressionVectorSelectors.castValueSelectorToObject(
offset,
@@ -600,12 +604,18 @@ public class NestedFieldVirtualColumn implements VirtualColumn
ColumnSelector selector
)
{
- final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(selector, this.columnName);
-
- if (column == null) {
+ ColumnHolder holder = selector.getColumnHolder(this.columnName);
+ if (holder == null) {
return null;
}
- return column.getColumnIndexSupplier(parts);
+ BaseColumn theColumn = holder.getColumn();
+ if (theColumn instanceof CompressedNestedDataComplexColumn) {
+ return ((CompressedNestedDataComplexColumn<?>) theColumn).getColumnIndexSupplier(parts);
+ }
+ if (parts.isEmpty()) {
+ return holder.getIndexSupplier();
+ }
+ return null;
}
@Override
@@ -625,6 +635,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn
.setHasNulls(expectedType == null || !expectedType.isNumeric() || (expectedType.isNumeric() && NullHandling.sqlCompatible()));
}
+ @Nullable
@Override
public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName)
{
@@ -637,17 +648,38 @@ public class NestedFieldVirtualColumn implements VirtualColumn
}
// ColumnInspector isn't really enough... we need the ability to read the complex column itself to examine
// the nested fields type information to really be accurate here, so we rely on the expectedType to guide us
- final ColumnCapabilities complexCapabilites = inspector.getColumnCapabilities(this.columnName);
- if (complexCapabilites != null && complexCapabilites.isDictionaryEncoded().isTrue()) {
- return ColumnCapabilitiesImpl.createDefault()
- .setType(expectedType != null ? expectedType : ColumnType.STRING)
- .setDictionaryEncoded(true)
- .setDictionaryValuesSorted(true)
- .setDictionaryValuesUnique(true)
- .setHasBitmapIndexes(true)
- .setHasNulls(expectedType == null || (expectedType.isNumeric()
- && NullHandling.sqlCompatible()));
+ final ColumnCapabilities capabilities = inspector.getColumnCapabilities(this.columnName);
+
+ if (capabilities != null) {
+ // if the underlying column is a nested column (and persisted to disk, re: the dictionary encoded check)
+ if (capabilities.is(ValueType.COMPLEX) &&
+ capabilities.getComplexTypeName().equals(NestedDataComplexTypeSerde.TYPE_NAME) &&
+ capabilities.isDictionaryEncoded().isTrue()) {
+ return ColumnCapabilitiesImpl.createDefault()
+ .setType(expectedType != null ? expectedType : ColumnType.STRING)
+ .setDictionaryEncoded(true)
+ .setDictionaryValuesSorted(true)
+ .setDictionaryValuesUnique(true)
+ .setHasBitmapIndexes(true)
+ .setHasNulls(expectedType == null || (expectedType.isNumeric()
+ && NullHandling.sqlCompatible()));
+ }
+ // column is not nested, use underlying column capabilities, adjusted for expectedType as necessary
+ if (parts.isEmpty()) {
+ ColumnCapabilitiesImpl copy = ColumnCapabilitiesImpl.copyOf(capabilities);
+ if (expectedType != null) {
+ copy.setType(expectedType);
+ copy.setHasNulls(
+ copy.hasNulls().or(ColumnCapabilities.Capable.of(expectedType.getType() != capabilities.getType()))
+ );
+ }
+ return copy;
+ } else if (capabilities.isPrimitive()) {
+ // path doesn't exist and column isn't nested, so effectively column doesn't exist
+ return null;
+ }
}
+
return capabilities(columnName);
}
diff --git a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java
index 9f33735c03..079307eadf 100644
--- a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java
+++ b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java
@@ -37,6 +37,7 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.filter.InDimFilter;
+import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.query.groupby.strategy.GroupByStrategySelector;
import org.apache.druid.segment.Segment;
import org.apache.druid.segment.column.ColumnType;
@@ -312,6 +313,230 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
);
}
+ @Test
+ public void testGroupBySomeFieldOnStringColumn()
+ {
+ GroupByQuery groupQuery = GroupByQuery.builder()
+ .setDataSource("test_datasource")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimensions(DefaultDimensionSpec.of("v0"), DefaultDimensionSpec.of("v1"))
+ .setVirtualColumns(
+ new NestedFieldVirtualColumn("dim", "$", "v0"),
+ new NestedFieldVirtualColumn("dim", "$.x", "v1")
+ )
+ .setAggregatorSpecs(new CountAggregatorFactory("count"))
+ .setContext(getContext())
+ .build();
+
+
+ runResults(
+ groupQuery,
+ ImmutableList.of(
+ new Object[]{"100", null, 2L},
+ new Object[]{"hello", null, 12L},
+ new Object[]{"world", null, 2L}
+ )
+ );
+ }
+
+ @Test
+ public void testGroupBySomeFieldOnStringColumnWithFilter()
+ {
+ List<String> vals = new ArrayList<>();
+ vals.add("100");
+ vals.add("200");
+ vals.add("300");
+ GroupByQuery groupQuery = GroupByQuery.builder()
+ .setDataSource("test_datasource")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimensions(DefaultDimensionSpec.of("v0"))
+ .setVirtualColumns(new NestedFieldVirtualColumn("dim", "$", "v0"))
+ .setAggregatorSpecs(new CountAggregatorFactory("count"))
+ .setContext(getContext())
+ .setDimFilter(new InDimFilter("v0", vals, null))
+ .build();
+
+
+ runResults(
+ groupQuery,
+ ImmutableList.of(
+ new Object[]{"100", 2L}
+ )
+ );
+ }
+
+ @Test
+ public void testGroupBySomeFieldOnStringColumnWithFilterExpectedType()
+ {
+ List<String> vals = new ArrayList<>();
+ vals.add("100");
+ vals.add("200");
+ vals.add("300");
+ GroupByQuery groupQuery = GroupByQuery.builder()
+ .setDataSource("test_datasource")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.LONG))
+ .setVirtualColumns(new NestedFieldVirtualColumn("dim", "$", "v0", ColumnType.LONG))
+ .setAggregatorSpecs(new CountAggregatorFactory("count"))
+ .setContext(getContext())
+ .setDimFilter(new InDimFilter("v0", vals, null))
+ .build();
+
+
+ runResults(
+ groupQuery,
+ ImmutableList.of(
+ new Object[]{100L, 2L}
+ ),
+ false,
+ true
+ );
+ }
+
+ @Test
+ public void testGroupBySomeFieldOnStringColumnWithFilterNil()
+ {
+ List<String> vals = new ArrayList<>();
+ vals.add("100");
+ vals.add("200");
+ vals.add("300");
+ GroupByQuery groupQuery = GroupByQuery.builder()
+ .setDataSource("test_datasource")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimensions(DefaultDimensionSpec.of("v0"))
+ .setVirtualColumns(new NestedFieldVirtualColumn("dim", "$.x", "v0"))
+ .setAggregatorSpecs(new CountAggregatorFactory("count"))
+ .setContext(getContext())
+ .setDimFilter(new InDimFilter("v0", vals, null))
+ .build();
+
+
+ runResults(
+ groupQuery,
+ ImmutableList.of()
+ );
+ }
+
+ @Test
+ public void testGroupBySomeFieldOnLongColumn()
+ {
+ GroupByQuery groupQuery = GroupByQuery.builder()
+ .setDataSource("test_datasource")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimensions(
+ DefaultDimensionSpec.of("v0", ColumnType.LONG),
+ DefaultDimensionSpec.of("v1", ColumnType.LONG)
+ )
+ .setVirtualColumns(
+ new NestedFieldVirtualColumn("__time", "$", "v0"),
+ new NestedFieldVirtualColumn("__time", "$.x", "v1")
+ )
+ .setAggregatorSpecs(new CountAggregatorFactory("count"))
+ .setContext(getContext())
+ .build();
+
+
+ runResults(
+ groupQuery,
+ ImmutableList.of(
+ new Object[]{1609459200000L, NullHandling.defaultLongValue(), 8L},
+ new Object[]{1609545600000L, NullHandling.defaultLongValue(), 8L}
+ ),
+ false,
+ true
+ );
+ }
+
+ @Test
+ public void testGroupBySomeFieldOnLongColumnFilter()
+ {
+ GroupByQuery groupQuery = GroupByQuery.builder()
+ .setDataSource("test_datasource")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimensions(
+ DefaultDimensionSpec.of("v0", ColumnType.LONG)
+ )
+ .setVirtualColumns(
+ new NestedFieldVirtualColumn("__time", "$", "v0")
+ )
+ .setAggregatorSpecs(new CountAggregatorFactory("count"))
+ .setDimFilter(new SelectorDimFilter("v0", "1609459200000", null))
+ .setContext(getContext())
+ .build();
+
+
+ runResults(
+ groupQuery,
+ ImmutableList.of(
+ new Object[]{1609459200000L, 8L}
+ ),
+ false,
+ true
+ );
+ }
+
+ @Test
+ public void testGroupBySomeFieldOnLongColumnFilterExpectedType()
+ {
+ GroupByQuery groupQuery = GroupByQuery.builder()
+ .setDataSource("test_datasource")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimensions(
+ DefaultDimensionSpec.of("v0", ColumnType.STRING)
+ )
+ .setVirtualColumns(
+ new NestedFieldVirtualColumn("__time", "$", "v0", ColumnType.STRING)
+ )
+ .setAggregatorSpecs(new CountAggregatorFactory("count"))
+ .setDimFilter(new SelectorDimFilter("v0", "1609459200000", null))
+ .setContext(getContext())
+ .build();
+
+
+ runResults(
+ groupQuery,
+ ImmutableList.of(
+ new Object[]{"1609459200000", 8L}
+ ),
+ true,
+ false
+ );
+ }
+
+ @Test
+ public void testGroupBySomeFieldOnLongColumnFilterNil()
+ {
+ GroupByQuery groupQuery = GroupByQuery.builder()
+ .setDataSource("test_datasource")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimensions(
+ DefaultDimensionSpec.of("v0", ColumnType.LONG)
+ )
+ .setVirtualColumns(
+ new NestedFieldVirtualColumn("__time", "$.x", "v0")
+ )
+ .setAggregatorSpecs(new CountAggregatorFactory("count"))
+ .setDimFilter(new SelectorDimFilter("v0", "1609459200000", null))
+ .setContext(getContext())
+ .build();
+
+
+ runResults(
+ groupQuery,
+ ImmutableList.of(),
+ false,
+ true
+ );
+ }
+
private void runResults(GroupByQuery groupQuery, List<Object[]> expectedResults)
{
runResults(groupQuery, expectedResults, false, false);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org