You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by cw...@apache.org on 2024/01/04 05:00:45 UTC
(druid) branch master updated: expression virtual column indexes (#15585)
This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new f19ece146f1 expression virtual column indexes (#15585)
f19ece146f1 is described below
commit f19ece146f1222ffe69281b70963406efb66fc40
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Wed Jan 3 21:00:39 2024 -0800
expression virtual column indexes (#15585)
* ExpressionVirtualColumn + indexes = bff. Expression virtual columns can now use indexes of the underlying columns similar to how expression filters
---
.../benchmark/query/SqlExpressionBenchmark.java | 55 ++-
.../main/java/org/apache/druid/math/expr/Expr.java | 49 ++
.../expr/ExpressionPredicateIndexSupplier.java | 287 +++++++++++
.../org/apache/druid/math/expr/IdentifierExpr.java | 17 +
.../org/apache/druid/math/expr/InputBindings.java | 16 +
.../query/filter/ArrayContainsElementFilter.java | 2 +-
.../apache/druid/query/filter/EqualityFilter.java | 12 +-
.../org/apache/druid/query/filter/NullFilter.java | 11 +-
.../DictionaryEncodedStringValueIndex.java | 17 +-
.../semantic/DictionaryEncodedValueIndex.java | 19 +-
.../segment/virtual/ExpressionVirtualColumn.java | 9 +
.../druid/segment/filter/BaseFilterTest.java | 3 +
.../druid/segment/filter/EqualityFilterTests.java | 50 ++
.../druid/segment/filter/NullFilterTests.java | 31 ++
.../druid/segment/filter/RangeFilterTests.java | 45 ++
.../druid/sql/calcite/expression/Expressions.java | 25 +-
.../builtin/ArrayContainsOperatorConversion.java | 13 +-
.../builtin/ArrayOverlapOperatorConversion.java | 13 +-
.../druid/sql/calcite/CalciteArraysQueryTest.java | 47 +-
.../druid/sql/calcite/CalciteExplainQueryTest.java | 8 +-
.../calcite/CalciteMultiValueStringQueryTest.java | 36 ++
.../apache/druid/sql/calcite/CalciteQueryTest.java | 522 +++++++++++++--------
.../druid/sql/calcite/CalciteSelectQueryTest.java | 18 +-
.../druid/sql/calcite/CalciteSubqueryTest.java | 10 +-
24 files changed, 1042 insertions(+), 273 deletions(-)
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
index 291a0727a1d..d24ce997b9e 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
@@ -19,19 +19,26 @@
package org.apache.druid.benchmark.query;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonMappingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
+import org.apache.druid.java.util.common.guava.Yielder;
+import org.apache.druid.java.util.common.guava.Yielders;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.DruidProcessingConfig;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
+import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
@@ -73,6 +80,7 @@ import javax.annotation.Nullable;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
/**
* Benchmark that tests various SQL queries.
@@ -202,7 +210,9 @@ public class SqlExpressionBenchmark
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long5), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
// 38,39: array element filtering
"SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\", 100) GROUP BY 1,2",
- "SELECT string1, long1 FROM foo WHERE ARRAY_OVERLAP(\"multi-string3\", ARRAY[100, 200]) GROUP BY 1,2"
+ "SELECT string1, long1 FROM foo WHERE ARRAY_OVERLAP(\"multi-string3\", ARRAY[100, 200]) GROUP BY 1,2",
+ // 40: regex filtering
+ "SELECT string4, COUNT(*) FROM foo WHERE REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL GROUP BY 1"
);
@Param({"5000000"})
@@ -262,7 +272,8 @@ public class SqlExpressionBenchmark
"36",
"37",
"38",
- "39"
+ "39",
+ "40"
})
private String query;
@@ -290,10 +301,15 @@ public class SqlExpressionBenchmark
log.info("Starting benchmark setup using cacheDir[%s], rows[%,d], schema[%s].", segmentGenerator.getCacheDir(), rowsPerSegment, schema);
final QueryableIndex index;
if ("auto".equals(schema)) {
+ List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
+ .getDimensions()
+ .stream()
+ .map(x -> new AutoTypeColumnSchema(x.getName(), null))
+ .collect(Collectors.toList());
index = segmentGenerator.generate(
dataSegment,
schemaInfo,
- DimensionsSpec.builder().useSchemaDiscovery(true).build(),
+ DimensionsSpec.builder().setDimensions(columnSchemas).build(),
TransformSpec.NONE,
IndexSpec.DEFAULT,
Granularities.NONE,
@@ -313,7 +329,7 @@ public class SqlExpressionBenchmark
index
);
closer.register(walker);
-
+ final ObjectMapper jsonMapper = CalciteTests.getJsonMapper();
final DruidSchemaCatalog rootSchema =
CalciteTests.createMockRootSchema(conglomerate, walker, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER);
engine = CalciteTests.createMockSqlEngine(walker, conglomerate);
@@ -323,7 +339,7 @@ public class SqlExpressionBenchmark
CalciteTests.createExprMacroTable(),
plannerConfig,
AuthTestUtils.TEST_AUTHORIZER_MAPPER,
- CalciteTests.getJsonMapper(),
+ jsonMapper,
CalciteTests.DRUID_SCHEMA_NAME,
new CalciteRulesManager(ImmutableSet.of()),
CalciteTests.createJoinableFactoryWrapper(),
@@ -340,6 +356,35 @@ public class SqlExpressionBenchmark
catch (Throwable ignored) {
// the show must go on
}
+ final String sql = QUERIES.get(Integer.parseInt(query));
+
+ try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) {
+ final PlannerResult plannerResult = planner.plan();
+ final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
+ final Object[] planResult = resultSequence.toList().get(0);
+ log.info("Native query plan:\n" +
+ jsonMapper.writerWithDefaultPrettyPrinter()
+ .writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
+ );
+ }
+ catch (JsonMappingException e) {
+ throw new RuntimeException(e);
+ }
+ catch (JsonProcessingException e) {
+ throw new RuntimeException(e);
+ }
+
+ try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
+ final PlannerResult plannerResult = planner.plan();
+ final Sequence<Object[]> resultSequence = plannerResult.run().getResults();
+ final Yielder<Object[]> yielder = Yielders.each(resultSequence);
+ int rowCounter = 0;
+ while (!yielder.isDone()) {
+ rowCounter++;
+ yielder.next(yielder.get());
+ }
+ log.info("Total result row count:" + rowCounter);
+ }
}
@TearDown(Level.Trial)
diff --git a/processing/src/main/java/org/apache/druid/math/expr/Expr.java b/processing/src/main/java/org/apache/druid/math/expr/Expr.java
index 11bc0c922a6..42907df451a 100644
--- a/processing/src/main/java/org/apache/druid/math/expr/Expr.java
+++ b/processing/src/main/java/org/apache/druid/math/expr/Expr.java
@@ -21,12 +21,20 @@ package org.apache.druid.math.expr;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import org.apache.druid.annotations.SubclassesMustOverrideEqualsAndHashCode;
import org.apache.druid.java.util.common.Cacheable;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.math.expr.vector.ExprVectorProcessor;
import org.apache.druid.query.cache.CacheKeyBuilder;
+import org.apache.druid.segment.ColumnSelector;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.column.ColumnIndexSupplier;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex;
+import org.apache.druid.segment.serde.NoIndexesColumnIndexSupplier;
import javax.annotation.Nullable;
import java.util.ArrayList;
@@ -185,6 +193,47 @@ public interface Expr extends Cacheable
throw Exprs.cannotVectorize(this);
}
+ @Nullable
+ default ColumnIndexSupplier asColumnIndexSupplier(ColumnSelector columnSelector, @Nullable ColumnType outputType)
+ {
+ final Expr.BindingAnalysis details = analyzeInputs();
+ if (details.getRequiredBindings().size() == 1) {
+ // Single-column expression. We can use bitmap indexes if this column has an index and the expression can
+ // map over the values of the index.
+ final String column = Iterables.getOnlyElement(details.getRequiredBindings());
+
+ final ColumnHolder holder = columnSelector.getColumnHolder(column);
+ if (holder == null) {
+ // column doesn't exist, no index supplier
+ return null;
+ }
+ final ColumnCapabilities capabilities = holder.getCapabilities();
+ final ColumnIndexSupplier delegateIndexSupplier = holder.getIndexSupplier();
+ final DictionaryEncodedValueIndex<?> delegateRawIndex = delegateIndexSupplier.as(
+ DictionaryEncodedValueIndex.class
+ );
+
+ final ExpressionType inputType = ExpressionType.fromColumnTypeStrict(capabilities);
+ final ColumnType outType;
+ if (outputType == null) {
+ outType = ExpressionType.toColumnType(getOutputType(InputBindings.inspectorForColumn(column, inputType)));
+ } else {
+ outType = outputType;
+ }
+
+ if (delegateRawIndex != null && outputType != null) {
+ return new ExpressionPredicateIndexSupplier(
+ this,
+ column,
+ inputType,
+ outType,
+ delegateRawIndex
+ );
+ }
+ }
+ return NoIndexesColumnIndexSupplier.getInstance();
+ }
+
/**
* Decorates the {@link CacheKeyBuilder} for the default implementation of {@link #getCacheKey()}. The default cache
diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java b/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java
new file mode 100644
index 00000000000..f82de43c072
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java
@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.math.expr;
+
+import com.google.common.base.Predicate;
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
+import org.apache.druid.collections.bitmap.ImmutableBitmap;
+import org.apache.druid.query.filter.DruidDoublePredicate;
+import org.apache.druid.query.filter.DruidFloatPredicate;
+import org.apache.druid.query.filter.DruidLongPredicate;
+import org.apache.druid.query.filter.DruidPredicateFactory;
+import org.apache.druid.segment.column.ColumnIndexSupplier;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.index.BitmapColumnIndex;
+import org.apache.druid.segment.index.SimpleImmutableBitmapIterableIndex;
+import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex;
+import org.apache.druid.segment.index.semantic.DruidPredicateIndexes;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.List;
+
+public class ExpressionPredicateIndexSupplier implements ColumnIndexSupplier
+{
+ private final Expr expr;
+ private final String inputColumn;
+ private final ExpressionType inputType;
+ private final ColumnType outputType;
+ private final DictionaryEncodedValueIndex<?> inputColumnIndexes;
+
+ public ExpressionPredicateIndexSupplier(
+ Expr expr,
+ String inputColumn,
+ ExpressionType inputType,
+ ColumnType outputType,
+ DictionaryEncodedValueIndex<?> inputColumnValueIndexes
+ )
+ {
+ this.expr = expr;
+ this.inputColumn = inputColumn;
+ this.inputType = inputType;
+ this.outputType = outputType;
+ this.inputColumnIndexes = inputColumnValueIndexes;
+ }
+
+ @Nullable
+ @Override
+ public <T> T as(Class<T> clazz)
+ {
+ if (clazz.equals(DruidPredicateIndexes.class)) {
+ return (T) new ExpressionPredicateIndexes();
+ }
+ return null;
+ }
+
+ private final class ExpressionPredicateIndexes implements DruidPredicateIndexes
+ {
+ @Nullable
+ @Override
+ public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
+ {
+ final Supplier<ValueAndUnknownIndexes> bitmapsSupplier;
+ final java.util.function.Function<Object, ExprEval<?>> evalFunction =
+ inputValue -> expr.eval(InputBindings.forInputSupplier(inputColumn, inputType, () -> inputValue));
+
+ switch (outputType.getType()) {
+ case STRING:
+ bitmapsSupplier = Suppliers.memoize(() -> computeStringBitmaps(matcherFactory, evalFunction));
+ break;
+ case LONG:
+ bitmapsSupplier = Suppliers.memoize(() -> computeLongBitmaps(matcherFactory, evalFunction));
+ break;
+ case DOUBLE:
+ bitmapsSupplier = Suppliers.memoize(() -> computeDoubleBitmaps(matcherFactory, evalFunction));
+ break;
+ case FLOAT:
+ bitmapsSupplier = Suppliers.memoize(() -> computeFloatBitmaps(matcherFactory, evalFunction));
+ break;
+ case ARRAY:
+ bitmapsSupplier = Suppliers.memoize(() -> computeArrayBitmaps(matcherFactory, evalFunction));
+ break;
+ default:
+ bitmapsSupplier = Suppliers.memoize(() -> computeComplexBitmaps(matcherFactory, evalFunction));
+ break;
+ }
+
+ return new SimpleImmutableBitmapIterableIndex()
+ {
+ @Override
+ public Iterable<ImmutableBitmap> getBitmapIterable()
+ {
+ return bitmapsSupplier.get().getMatches();
+ }
+
+ @Nullable
+ @Override
+ protected ImmutableBitmap getUnknownsBitmap()
+ {
+ return inputColumnIndexes.getBitmapFactory().union(bitmapsSupplier.get().getUnknowns());
+ }
+ };
+ }
+ }
+
+ private ValueAndUnknownIndexes computeStringBitmaps(
+ DruidPredicateFactory matcherFactory,
+ java.util.function.Function<Object, ExprEval<?>> evalFunction
+ )
+ {
+ final Predicate<String> predicate = matcherFactory.makeStringPredicate();
+ final List<ImmutableBitmap> matches = new ArrayList<>();
+ final List<ImmutableBitmap> unknowns = new ArrayList<>();
+
+ for (int i = 0; i < inputColumnIndexes.getCardinality(); i++) {
+ final Object inputValue = inputColumnIndexes.getValue(i);
+ final String result = evalFunction.apply(inputValue).asString();
+ if (result == null && matcherFactory.isNullInputUnknown()) {
+ unknowns.add(inputColumnIndexes.getBitmap(i));
+ } else if (predicate.apply(result)) {
+ matches.add(inputColumnIndexes.getBitmap(i));
+ }
+ }
+
+ return new ValueAndUnknownIndexes(matches, unknowns);
+ }
+
+ private ValueAndUnknownIndexes computeLongBitmaps(
+ DruidPredicateFactory matcherFactory,
+ java.util.function.Function<Object, ExprEval<?>> evalFunction
+ )
+ {
+ final DruidLongPredicate predicate = matcherFactory.makeLongPredicate();
+ final List<ImmutableBitmap> matches = new ArrayList<>();
+ final List<ImmutableBitmap> unknowns = new ArrayList<>();
+
+ for (int i = 0; i < inputColumnIndexes.getCardinality(); i++) {
+ final Object inputValue = inputColumnIndexes.getValue(i);
+ final ExprEval<?> result = evalFunction.apply(inputValue);
+ if (result.isNumericNull() && matcherFactory.isNullInputUnknown()) {
+ unknowns.add(inputColumnIndexes.getBitmap(i));
+ } else if (result.isNumericNull() && predicate.applyNull()) {
+ matches.add(inputColumnIndexes.getBitmap(i));
+ } else if (!result.isNumericNull() && predicate.applyLong(result.asLong())) {
+ matches.add(inputColumnIndexes.getBitmap(i));
+ }
+ }
+
+ return new ValueAndUnknownIndexes(matches, unknowns);
+ }
+
+ private ValueAndUnknownIndexes computeDoubleBitmaps(
+ DruidPredicateFactory matcherFactory,
+ java.util.function.Function<Object, ExprEval<?>> evalFunction
+ )
+ {
+ final DruidDoublePredicate predicate = matcherFactory.makeDoublePredicate();
+ final List<ImmutableBitmap> matches = new ArrayList<>();
+ final List<ImmutableBitmap> unknowns = new ArrayList<>();
+
+ for (int i = 0; i < inputColumnIndexes.getCardinality(); i++) {
+ final Object inputValue = inputColumnIndexes.getValue(i);
+ final ExprEval<?> result = evalFunction.apply(inputValue);
+ if (result.isNumericNull() && matcherFactory.isNullInputUnknown()) {
+ unknowns.add(inputColumnIndexes.getBitmap(i));
+ } else if (result.isNumericNull() && predicate.applyNull()) {
+ matches.add(inputColumnIndexes.getBitmap(i));
+ } else if (!result.isNumericNull() && predicate.applyDouble(result.asDouble())) {
+ matches.add(inputColumnIndexes.getBitmap(i));
+ }
+ }
+
+ return new ValueAndUnknownIndexes(matches, unknowns);
+ }
+
+ private ValueAndUnknownIndexes computeFloatBitmaps(
+ DruidPredicateFactory matcherFactory,
+ java.util.function.Function<Object, ExprEval<?>> evalFunction
+ )
+ {
+ final DruidFloatPredicate predicate = matcherFactory.makeFloatPredicate();
+ final List<ImmutableBitmap> matches = new ArrayList<>();
+ final List<ImmutableBitmap> unknowns = new ArrayList<>();
+
+ for (int i = 0; i < inputColumnIndexes.getCardinality(); i++) {
+ final Object inputValue = inputColumnIndexes.getValue(i);
+ final ExprEval<?> result = evalFunction.apply(inputValue);
+ if (result.isNumericNull() && matcherFactory.isNullInputUnknown()) {
+ unknowns.add(inputColumnIndexes.getBitmap(i));
+ } else if (result.isNumericNull() && predicate.applyNull()) {
+ matches.add(inputColumnIndexes.getBitmap(i));
+ } else if (!result.isNumericNull() && predicate.applyFloat((float) result.asDouble())) {
+ matches.add(inputColumnIndexes.getBitmap(i));
+ }
+ }
+
+ return new ValueAndUnknownIndexes(matches, unknowns);
+ }
+
+ private ValueAndUnknownIndexes computeArrayBitmaps(
+ DruidPredicateFactory matcherFactory,
+ java.util.function.Function<Object, ExprEval<?>> evalFunction
+ )
+ {
+ final Predicate<Object[]> predicate = matcherFactory.makeArrayPredicate(outputType);
+ final List<ImmutableBitmap> matches = new ArrayList<>();
+ final List<ImmutableBitmap> unknowns = new ArrayList<>();
+
+ for (int i = 0; i < inputColumnIndexes.getCardinality(); i++) {
+ final Object inputValue = inputColumnIndexes.getValue(i);
+ final Object[] result = evalFunction.apply(inputValue).asArray();
+ if (result == null && matcherFactory.isNullInputUnknown()) {
+ unknowns.add(inputColumnIndexes.getBitmap(i));
+ } else if (predicate.apply(result)) {
+ matches.add(inputColumnIndexes.getBitmap(i));
+ }
+ }
+
+ return new ValueAndUnknownIndexes(matches, unknowns);
+ }
+
+ private ValueAndUnknownIndexes computeComplexBitmaps(
+ DruidPredicateFactory matcherFactory,
+ java.util.function.Function<Object, ExprEval<?>> evalFunction
+ )
+ {
+ final Predicate<Object> predicate = matcherFactory.makeObjectPredicate();
+ final List<ImmutableBitmap> matches = new ArrayList<>();
+ final List<ImmutableBitmap> unknowns = new ArrayList<>();
+
+ for (int i = 0; i < inputColumnIndexes.getCardinality(); i++) {
+ final Object inputValue = inputColumnIndexes.getValue(i);
+ final Object result = evalFunction.apply(inputValue).valueOrDefault();
+ if (result == null && matcherFactory.isNullInputUnknown()) {
+ unknowns.add(inputColumnIndexes.getBitmap(i));
+ } else if (predicate.apply(result)) {
+ matches.add(inputColumnIndexes.getBitmap(i));
+ }
+ }
+
+ return new ValueAndUnknownIndexes(matches, unknowns);
+ }
+
+ /**
+ * Holder for two sets of {@link ImmutableBitmap}, the first set representing values that match the predicate after
+ * computing the expression, and the second for values which computing the expression evaluates to null and are
+ * considered 'unknown'
+ */
+ private static class ValueAndUnknownIndexes
+ {
+ private final List<ImmutableBitmap> matches;
+ private final List<ImmutableBitmap> unknowns;
+
+ private ValueAndUnknownIndexes(List<ImmutableBitmap> matches, List<ImmutableBitmap> unknowns)
+ {
+ this.matches = matches;
+ this.unknowns = unknowns;
+ }
+
+ public List<ImmutableBitmap> getMatches()
+ {
+ return matches;
+ }
+
+ public List<ImmutableBitmap> getUnknowns()
+ {
+ return unknowns;
+ }
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java b/processing/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java
index 1fb2b249909..9a16acbe92b 100644
--- a/processing/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java
+++ b/processing/src/main/java/org/apache/druid/math/expr/IdentifierExpr.java
@@ -23,6 +23,10 @@ import org.apache.commons.lang.StringEscapeUtils;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.math.expr.vector.ExprVectorProcessor;
import org.apache.druid.math.expr.vector.VectorProcessors;
+import org.apache.druid.segment.ColumnSelector;
+import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.column.ColumnIndexSupplier;
+import org.apache.druid.segment.column.ColumnType;
import javax.annotation.Nullable;
import java.util.Objects;
@@ -152,6 +156,19 @@ class IdentifierExpr implements Expr
return VectorProcessors.identifier(inspector, binding);
}
+ @Nullable
+ @Override
+ public ColumnIndexSupplier asColumnIndexSupplier(ColumnSelector columnSelector, @Nullable ColumnType outputType)
+ {
+ // identifier just wraps a column, we can return its index supplier directly if the column exists
+ final ColumnHolder holder = columnSelector.getColumnHolder(binding);
+ if (holder == null) {
+ // column doesn't exist, no index supplier
+ return null;
+ }
+ return holder.getIndexSupplier();
+ }
+
@Override
public boolean equals(Object o)
{
diff --git a/processing/src/main/java/org/apache/druid/math/expr/InputBindings.java b/processing/src/main/java/org/apache/druid/math/expr/InputBindings.java
index 469ea05706b..e047f9e941a 100644
--- a/processing/src/main/java/org/apache/druid/math/expr/InputBindings.java
+++ b/processing/src/main/java/org/apache/druid/math/expr/InputBindings.java
@@ -99,6 +99,22 @@ public class InputBindings
};
}
+ public static Expr.InputBindingInspector inspectorForColumn(String column, ExpressionType type)
+ {
+ return new Expr.InputBindingInspector()
+ {
+ @Nullable
+ @Override
+ public ExpressionType getType(String name)
+ {
+ if (column.equals(name)) {
+ return type;
+ }
+ return null;
+ }
+ };
+ }
+
/**
* Creates a {@link Expr.ObjectBinding} backed by some {@link Row}. {@link ColumnHolder#TIME_COLUMN_NAME} is special
* handled to be backed by {@link Row#getTimestampFromEpoch()}, all other values are ethically sourced from
diff --git a/processing/src/main/java/org/apache/druid/query/filter/ArrayContainsElementFilter.java b/processing/src/main/java/org/apache/druid/query/filter/ArrayContainsElementFilter.java
index 0ca2a7a1394..25558bf42e6 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/ArrayContainsElementFilter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/ArrayContainsElementFilter.java
@@ -229,7 +229,7 @@ public class ArrayContainsElementFilter extends AbstractOptimizableDimFilter imp
if (elementMatchValueEval.valueOrDefault() != null && selector.getColumnCapabilities(column) != null && !selector.getColumnCapabilities(column).isArray()) {
// column is not an array, behave like a normal equality filter
- return EqualityFilter.getEqualityIndex(column, elementMatchValueEval, elementMatchValueType, selector);
+ return EqualityFilter.getEqualityIndex(column, elementMatchValueEval, elementMatchValueType, selector, predicateFactory);
}
// column exists, but has no indexes we can use
return null;
diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java
index 2c638b76340..85063e99431 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java
@@ -57,6 +57,7 @@ import org.apache.druid.segment.filter.PredicateValueMatcherFactory;
import org.apache.druid.segment.filter.ValueMatchers;
import org.apache.druid.segment.index.AllUnknownBitmapColumnIndex;
import org.apache.druid.segment.index.BitmapColumnIndex;
+import org.apache.druid.segment.index.semantic.DruidPredicateIndexes;
import org.apache.druid.segment.index.semantic.StringValueSetIndexes;
import org.apache.druid.segment.index.semantic.ValueIndexes;
import org.apache.druid.segment.nested.StructuredData;
@@ -228,7 +229,7 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt
if (!Filters.checkFilterTuningUseIndex(column, selector, filterTuning)) {
return null;
}
- return getEqualityIndex(column, matchValueEval, matchValueType, selector);
+ return getEqualityIndex(column, matchValueEval, matchValueType, selector, predicateFactory);
}
@Override
@@ -303,7 +304,8 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt
String column,
ExprEval<?> matchValueEval,
ColumnType matchValueType,
- ColumnIndexSelector selector
+ ColumnIndexSelector selector,
+ DruidPredicateFactory predicateFactory
)
{
final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(column);
@@ -325,6 +327,12 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt
return stringValueSetIndexes.forValue(matchValueEval.asString());
}
}
+ // fall back to predicate based index if it is available
+ final DruidPredicateIndexes predicateIndexes = indexSupplier.as(DruidPredicateIndexes.class);
+ if (predicateIndexes != null) {
+ return predicateIndexes.forPredicate(predicateFactory);
+ }
+
// column exists, but has no indexes we can use
return null;
}
diff --git a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java
index f120ee8fd90..903882b2a99 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java
@@ -42,6 +42,7 @@ import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.filter.Filters;
import org.apache.druid.segment.index.AllTrueBitmapColumnIndex;
import org.apache.druid.segment.index.BitmapColumnIndex;
+import org.apache.druid.segment.index.semantic.DruidPredicateIndexes;
import org.apache.druid.segment.index.semantic.NullValueIndex;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
@@ -129,10 +130,14 @@ public class NullFilter extends AbstractOptimizableDimFilter implements Filter
return new AllTrueBitmapColumnIndex(selector);
}
final NullValueIndex nullValueIndex = indexSupplier.as(NullValueIndex.class);
- if (nullValueIndex == null) {
- return null;
+ if (nullValueIndex != null) {
+ return nullValueIndex.get();
+ }
+ final DruidPredicateIndexes predicateIndexes = indexSupplier.as(DruidPredicateIndexes.class);
+ if (predicateIndexes != null) {
+ return predicateIndexes.forPredicate(NullPredicateFactory.INSTANCE);
}
- return nullValueIndex.get();
+ return null;
}
@Override
diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedStringValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedStringValueIndex.java
index f7b76289a60..c905178653a 100644
--- a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedStringValueIndex.java
+++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedStringValueIndex.java
@@ -19,11 +19,8 @@
package org.apache.druid.segment.index.semantic;
-import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
-import javax.annotation.Nullable;
-
/**
* This exposes a 'raw' view into bitmap value indexes of a string {@link DictionaryEncodedColumn}. This allows callers
* to directly retrieve bitmaps via dictionary ids, as well as access to lower level details of such a column like
@@ -32,19 +29,7 @@ import javax.annotation.Nullable;
* Most filter implementations should likely be using higher level index instead, such as {@link StringValueSetIndexes},
* {@link LexicographicalRangeIndexes}, {@link NumericRangeIndexes}, or {@link DruidPredicateIndexes}
*/
-public interface DictionaryEncodedStringValueIndex extends DictionaryEncodedValueIndex
+public interface DictionaryEncodedStringValueIndex extends DictionaryEncodedValueIndex<String>
{
- /**
- * Get the cardinality of the underlying value dictionary
- */
- int getCardinality();
-
- /**
- * Get the value in the underlying value dictionary of the specified dictionary id
- */
- @Nullable
- String getValue(int index);
- @SuppressWarnings({"unreachable", "unused"})
- BitmapFactory getBitmapFactory();
}
diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java
index b9e997e9d43..a99f91c2dc1 100644
--- a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java
+++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java
@@ -19,9 +19,12 @@
package org.apache.druid.segment.index.semantic;
+import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
+import javax.annotation.Nullable;
+
/**
* This exposes a 'raw' view into bitmap value indexes for {@link DictionaryEncodedColumn}. This allows callers
* to directly retrieve bitmaps via dictionary ids.
@@ -33,10 +36,24 @@ import org.apache.druid.segment.column.DictionaryEncodedColumn;
* Most filter implementations should likely be using higher level index instead, such as {@link StringValueSetIndexes},
* {@link LexicographicalRangeIndexes}, {@link NumericRangeIndexes}, or {@link DruidPredicateIndexes}.
*/
-public interface DictionaryEncodedValueIndex
+public interface DictionaryEncodedValueIndex<T>
{
/**
* Get the {@link ImmutableBitmap} for dictionary id of the underlying dictionary
*/
ImmutableBitmap getBitmap(int idx);
+
+ /**
+ * Get the cardinality of the underlying value dictionary
+ */
+ int getCardinality();
+
+ /**
+ * Get the value in the underlying value dictionary of the specified dictionary id
+ */
+ @Nullable
+ T getValue(int index);
+
+ @SuppressWarnings({"unreachable", "unused"})
+ BitmapFactory getBitmapFactory();
}
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
index f6488d4e9e3..f8ace2ca2f2 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
@@ -35,12 +35,14 @@ import org.apache.druid.math.expr.Parser;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.segment.ColumnInspector;
+import org.apache.druid.segment.ColumnSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
@@ -240,6 +242,13 @@ public class ExpressionVirtualColumn implements VirtualColumn
return ExpressionVectorSelectors.makeVectorObjectSelector(factory, parsedExpression.get());
}
+ @Nullable
+ @Override
+ public ColumnIndexSupplier getIndexSupplier(String columnName, ColumnSelector columnSelector)
+ {
+ return getParsedExpression().get().asColumnIndexSupplier(columnSelector, outputType);
+ }
+
@Override
public ColumnCapabilities capabilities(String columnName)
{
diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
index 56346c76a13..c01db0407d4 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java
@@ -140,6 +140,9 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest
new ExpressionVirtualColumn("vd0", "d0", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE),
new ExpressionVirtualColumn("vf0", "f0", ColumnType.FLOAT, TestExprMacroTable.INSTANCE),
new ExpressionVirtualColumn("vl0", "l0", ColumnType.LONG, TestExprMacroTable.INSTANCE),
+ new ExpressionVirtualColumn("vd0-add-sub", "d0 + (d0 - d0)", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE),
+ new ExpressionVirtualColumn("vf0-add-sub", "f0 + (f0 - f0)", ColumnType.FLOAT, TestExprMacroTable.INSTANCE),
+ new ExpressionVirtualColumn("vl0-add-sub", "l0 + (l0 - l0)", ColumnType.LONG, TestExprMacroTable.INSTANCE),
new ExpressionVirtualColumn("nestedArrayLong", "array(arrayLong)", ColumnType.ofArray(ColumnType.LONG_ARRAY), TestExprMacroTable.INSTANCE),
new ListFilteredVirtualColumn("allow-dim0", DefaultDimensionSpec.of("dim0"), ImmutableSet.of("3", "4"), true),
new ListFilteredVirtualColumn("deny-dim0", DefaultDimensionSpec.of("dim0"), ImmutableSet.of("3", "4"), false),
diff --git a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
index 982d7b70976..10862fa1a9e 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
@@ -629,6 +629,26 @@ public class EqualityFilterTests
ImmutableList.of("1", "2", "4", "5")
);
+ assertFilterMatches(new EqualityFilter("vf0-add-sub", ColumnType.STRING, "0", null), ImmutableList.of("0", "4"));
+ assertFilterMatches(new EqualityFilter("vd0-add-sub", ColumnType.STRING, "0", null), ImmutableList.of("0", "2"));
+ assertFilterMatches(new EqualityFilter("vl0-add-sub", ColumnType.STRING, "0", null), ImmutableList.of("0", "3"));
+
+ assertFilterMatches(new EqualityFilter("vf0-add-sub", ColumnType.FLOAT, 0f, null), ImmutableList.of("0", "4"));
+ assertFilterMatches(
+ NotDimFilter.of(new EqualityFilter("vf0-add-sub", ColumnType.FLOAT, 0f, null)),
+ ImmutableList.of("1", "2", "3", "5")
+ );
+ assertFilterMatches(new EqualityFilter("vd0-add-sub", ColumnType.DOUBLE, 0.0, null), ImmutableList.of("0", "2"));
+ assertFilterMatches(
+ NotDimFilter.of(new EqualityFilter("vd0-add-sub", ColumnType.DOUBLE, 0.0, null)),
+ ImmutableList.of("1", "3", "4", "5")
+ );
+ assertFilterMatches(new EqualityFilter("vl0", ColumnType.LONG, 0L, null), ImmutableList.of("0", "3"));
+ assertFilterMatches(
+ NotDimFilter.of(new EqualityFilter("vl0", ColumnType.LONG, 0L, null)),
+ ImmutableList.of("1", "2", "4", "5")
+ );
+
assertFilterMatches(new EqualityFilter("vf0", ColumnType.STRING, "0", null), ImmutableList.of("0", "4"));
assertFilterMatches(new EqualityFilter("vd0", ColumnType.STRING, "0", null), ImmutableList.of("0", "2"));
assertFilterMatches(new EqualityFilter("vl0", ColumnType.STRING, "0", null), ImmutableList.of("0", "3"));
@@ -658,6 +678,36 @@ public class EqualityFilterTests
assertFilterMatches(new EqualityFilter("vf0", ColumnType.STRING, "0", null), ImmutableList.of("0"));
assertFilterMatches(new EqualityFilter("vd0", ColumnType.STRING, "0", null), ImmutableList.of("0"));
assertFilterMatches(new EqualityFilter("vl0", ColumnType.STRING, "0", null), ImmutableList.of("0"));
+
+ if (NullHandling.sqlCompatible()) {
+ // these fail in default value mode that cannot be tested as numeric default values becuase of type
+ // mismatch for subtract operation
+ assertFilterMatches(new EqualityFilter("vf0-add-sub", ColumnType.FLOAT, 0f, null), ImmutableList.of("0"));
+ assertFilterMatches(
+ NotDimFilter.of(new EqualityFilter("vf0-add-sub", ColumnType.FLOAT, 0f, null)),
+ NullHandling.sqlCompatible()
+ ? ImmutableList.of("1", "2", "3", "5")
+ : ImmutableList.of("1", "2", "3", "4", "5")
+ );
+ assertFilterMatches(new EqualityFilter("vd0-add-sub", ColumnType.DOUBLE, 0.0, null), ImmutableList.of("0"));
+ assertFilterMatches(
+ NotDimFilter.of(new EqualityFilter("vd0-add-sub", ColumnType.DOUBLE, 0.0, null)),
+ NullHandling.sqlCompatible()
+ ? ImmutableList.of("1", "3", "4", "5")
+ : ImmutableList.of("1", "2", "3", "4", "5")
+ );
+ assertFilterMatches(new EqualityFilter("vl0-add-sub", ColumnType.LONG, 0L, null), ImmutableList.of("0"));
+ assertFilterMatches(
+ NotDimFilter.of(new EqualityFilter("vl0-add-sub", ColumnType.LONG, 0L, null)),
+ NullHandling.sqlCompatible()
+ ? ImmutableList.of("1", "2", "4", "5")
+ : ImmutableList.of("1", "2", "3", "4", "5")
+ );
+
+ assertFilterMatches(new EqualityFilter("vf0-add-sub", ColumnType.STRING, "0", null), ImmutableList.of("0"));
+ assertFilterMatches(new EqualityFilter("vd0-add-sub", ColumnType.STRING, "0", null), ImmutableList.of("0"));
+ assertFilterMatches(new EqualityFilter("vl0-add-sub", ColumnType.STRING, "0", null), ImmutableList.of("0"));
+ }
}
}
diff --git a/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTests.java
index e563852be4f..443f655baae 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTests.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTests.java
@@ -230,6 +230,24 @@ public class NullFilterTests
NotDimFilter.of(NullFilter.forColumn("vl0")),
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
+
+ assertFilterMatches(NullFilter.forColumn("vf0-add-sub"), ImmutableList.of());
+ assertFilterMatches(
+ NotDimFilter.of(NullFilter.forColumn("vf0-add-sub")),
+ ImmutableList.of("0", "1", "2", "3", "4", "5")
+ );
+
+ assertFilterMatches(NullFilter.forColumn("vd0-add-sub"), ImmutableList.of());
+ assertFilterMatches(
+ NotDimFilter.of(NullFilter.forColumn("vd0-add-sub")),
+ ImmutableList.of("0", "1", "2", "3", "4", "5")
+ );
+
+ assertFilterMatches(NullFilter.forColumn("vl0-add-sub"), ImmutableList.of());
+ assertFilterMatches(
+ NotDimFilter.of(NullFilter.forColumn("vl0-add-sub")),
+ ImmutableList.of("0", "1", "2", "3", "4", "5")
+ );
} else {
assertFilterMatches(NullFilter.forColumn("vf0"), ImmutableList.of("4"));
assertFilterMatches(NotDimFilter.of(NullFilter.forColumn("vf0")), ImmutableList.of("0", "1", "2", "3", "5"));
@@ -239,6 +257,19 @@ public class NullFilterTests
assertFilterMatches(NullFilter.forColumn("vl0"), ImmutableList.of("3"));
assertFilterMatches(NotDimFilter.of(NullFilter.forColumn("vl0")), ImmutableList.of("0", "1", "2", "4", "5"));
+
+ if (NullHandling.sqlCompatible()) {
+ // these fail in default value mode that cannot be tested as numeric default values becuase of type
+ // mismatch for subtract operation
+ assertFilterMatches(NullFilter.forColumn("vf0-add-sub"), ImmutableList.of("4"));
+ assertFilterMatches(NotDimFilter.of(NullFilter.forColumn("vf0-add-sub")), ImmutableList.of("0", "1", "2", "3", "5"));
+
+ assertFilterMatches(NullFilter.forColumn("vd0-add-sub"), ImmutableList.of("2"));
+ assertFilterMatches(NotDimFilter.of(NullFilter.forColumn("vd0-add-sub")), ImmutableList.of("0", "1", "3", "4", "5"));
+
+ assertFilterMatches(NullFilter.forColumn("vl0-add-sub"), ImmutableList.of("3"));
+ assertFilterMatches(NotDimFilter.of(NullFilter.forColumn("vl0-add-sub")), ImmutableList.of("0", "1", "2", "4", "5"));
+ }
}
}
diff --git a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTests.java
index 4cacfbc8dfc..997a1b9dc04 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTests.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTests.java
@@ -875,6 +875,51 @@ public class RangeFilterTests
? ImmutableList.of("0", "3", "7")
: ImmutableList.of("0")
);
+
+ if (NullHandling.sqlCompatible() || canTestNumericNullsAsDefaultValues) {
+ // these fail in default value mode that cannot be tested as numeric default values becuase of type
+ // mismatch for subtract operation
+ assertFilterMatches(
+ new RangeFilter(
+ "vd0-add-sub",
+ ColumnType.DOUBLE,
+ 0.0,
+ 1.0,
+ false,
+ false,
+ null
+ ),
+ canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "2", "7") : ImmutableList.of("0")
+ );
+
+ assertFilterMatches(
+ new RangeFilter(
+ "vf0-add-sub",
+ ColumnType.FLOAT,
+ 0.0,
+ 1.0,
+ false,
+ false,
+ null
+ ),
+ canTestNumericNullsAsDefaultValues ? ImmutableList.of("0", "4", "6") : ImmutableList.of("0")
+ );
+
+ assertFilterMatches(
+ new RangeFilter(
+ "vl0-add-sub",
+ ColumnType.LONG,
+ 0L,
+ 1L,
+ false,
+ false,
+ null
+ ),
+ NullHandling.replaceWithDefault() && canTestNumericNullsAsDefaultValues
+ ? ImmutableList.of("0", "3", "7")
+ : ImmutableList.of("0")
+ );
+ }
}
@Test
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java
index 7a8786e21ef..538663395d0 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java
@@ -562,11 +562,12 @@ public class Expressions
druidExpression.getSimpleExtraction().getExtractionFn()
);
} else {
- if (druidExpression.getSimpleExtraction().getExtractionFn() != null) {
- // return null to fallback to using an expression filter
- return null;
+ if (virtualColumnRegistry != null && druidExpression.getSimpleExtraction().getExtractionFn() != null) {
+ String column = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(druidExpression, druidExpression.getDruidType());
+ equalFilter = NullFilter.forColumn(column);
+ } else {
+ equalFilter = NullFilter.forColumn(druidExpression.getDirectColumn());
}
- equalFilter = NullFilter.forColumn(druidExpression.getDirectColumn());
}
} else if (virtualColumnRegistry != null) {
final String virtualColumn = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(
@@ -666,7 +667,7 @@ public class Expressions
);
}
- final String column;
+ String column;
final ExtractionFn extractionFn;
if (lhsExpression.isSimpleExtraction()) {
column = lhsExpression.getSimpleExtraction().getColumn();
@@ -755,11 +756,23 @@ public class Expressions
} else {
final Object val = rhsParsed.getLiteralValue();
- if (extractionFn != null || val == null) {
+ if (val == null) {
// fall back to expression filter
return null;
}
+ // extractionFn are not supported by equality/range filter
+ if (extractionFn != null) {
+ if (virtualColumnRegistry != null) {
+ column = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(
+ lhsExpression,
+ lhs.getType()
+ );
+ } else {
+ // if this happens for some reason, bail and use an expression filter
+ }
+ }
+
final RangeRefKey rangeRefKey = new RangeRefKey(column, matchValueType);
final DimFilter filter;
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java
index 9036d7e406d..d1e24b6429e 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java
@@ -99,9 +99,7 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper
// if the input column is not actually an ARRAY type, but rather an MVD, we can optimize this into
// selector/equality filters on the individual array elements
- if (leftExpr.isSimpleExtraction()
- && !leftExpr.isArray()
- && (plannerContext.isUseBoundsAndSelectors() || leftExpr.isDirectColumnAccess())) {
+ if (leftExpr.isSimpleExtraction() && !leftExpr.isArray()) {
Expr expr = plannerContext.parseExpression(rightExpr.getExpression());
// To convert this expression filter into an And of Selector filters, we need to extract all array elements.
// For now, we can optimize only when rightExpr is a literal because there is no way to extract the array elements
@@ -125,10 +123,15 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper
if (plannerContext.isUseBoundsAndSelectors()) {
filters.add(newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(val)));
} else {
- // Cannot handle extractionFn here. We won't get one due to the isDirectColumnAccess check above.
+ final String column = leftExpr.isDirectColumnAccess()
+ ? leftExpr.getSimpleExtraction().getColumn()
+ : virtualColumnRegistry.getOrCreateVirtualColumnForExpression(
+ leftExpr,
+ leftExpr.getDruidType()
+ );
filters.add(
new EqualityFilter(
- leftExpr.getSimpleExtraction().getColumn(),
+ column,
ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())),
val,
null
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
index af5c65d9f84..f979ba073c5 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
@@ -114,9 +114,7 @@ public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOpera
}
final Expr expr = plannerContext.parseExpression(complexExpr.getExpression());
- if (expr.isLiteral()
- && !simpleExtractionExpr.isArray()
- && (plannerContext.isUseBoundsAndSelectors() || simpleExtractionExpr.isDirectColumnAccess())) {
+ if (expr.isLiteral() && !simpleExtractionExpr.isArray()) {
// Evaluate the expression to take out the array elements.
// We can safely pass null if the expression is literal.
ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
@@ -131,9 +129,14 @@ public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOpera
if (plannerContext.isUseBoundsAndSelectors()) {
return newSelectorDimFilter(simpleExtractionExpr.getSimpleExtraction(), Evals.asString(arrayElements[0]));
} else {
- // Cannot handle extractionFn here. We won't get one due to the isDirectColumnAccess check above.
+ final String column = simpleExtractionExpr.isDirectColumnAccess()
+ ? simpleExtractionExpr.getSimpleExtraction().getColumn()
+ : virtualColumnRegistry.getOrCreateVirtualColumnForExpression(
+ simpleExtractionExpr,
+ simpleExtractionExpr.getDruidType()
+ );
return new EqualityFilter(
- simpleExtractionExpr.getSimpleExtraction().getColumn(),
+ column,
ExpressionType.toColumnType(exprEval.type()),
arrayElements[0],
null
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
index f6a88a2aa89..54e28f0b01f 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
@@ -957,9 +957,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
.dataSource(CalciteTests.DATASOURCE3)
.intervals(querySegmentSpec(Filtration.eternity()))
.filters(
- NullHandling.sqlCompatible()
- ? expressionFilter("array_overlap(substring(\"dim3\", 0, 1),array('a','b'))")
- : new InDimFilter("dim3", ImmutableList.of("a", "b"), new SubstringDimExtractionFn(0, 1))
+ new InDimFilter("dim3", ImmutableList.of("a", "b"), new SubstringDimExtractionFn(0, 1))
)
.columns("dim3")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
@@ -1190,26 +1188,33 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
@Test
public void testArrayContainsFilterWithExtractionFn()
{
+ Druids.ScanQueryBuilder builder = newScanQueryBuilder()
+ .dataSource(CalciteTests.DATASOURCE3)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .columns("dim3")
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .limit(5)
+ .context(QUERY_CONTEXT_DEFAULT);
+
+ if (NullHandling.sqlCompatible()) {
+ builder = builder.virtualColumns(expressionVirtualColumn("v0", "substring(\"dim3\", 0, 1)", ColumnType.STRING))
+ .filters(
+ and(
+ equality("v0", "a", ColumnType.STRING),
+ equality("v0", "b", ColumnType.STRING)
+ )
+ );
+ } else {
+ builder = builder.filters(
+ and(
+ selector("dim3", "a", new SubstringDimExtractionFn(0, 1)),
+ selector("dim3", "b", new SubstringDimExtractionFn(0, 1))
+ )
+ );
+ }
testQuery(
"SELECT dim3 FROM druid.numfoo WHERE ARRAY_CONTAINS(SUBSTRING(dim3, 1, 1), ARRAY['a','b']) LIMIT 5",
- ImmutableList.of(
- newScanQueryBuilder()
- .dataSource(CalciteTests.DATASOURCE3)
- .intervals(querySegmentSpec(Filtration.eternity()))
- .filters(
- NullHandling.sqlCompatible()
- ? expressionFilter("array_contains(substring(\"dim3\", 0, 1),array('a','b'))")
- : and(
- selector("dim3", "a", new SubstringDimExtractionFn(0, 1)),
- selector("dim3", "b", new SubstringDimExtractionFn(0, 1))
- )
- )
- .columns("dim3")
- .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
- .limit(5)
- .context(QUERY_CONTEXT_DEFAULT)
- .build()
- ),
+ ImmutableList.of(builder.build()),
ImmutableList.of(
new Object[]{"[\"a\",\"b\"]"}
)
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExplainQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExplainQueryTest.java
index afbb3cda8e3..83b322140a3 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExplainQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExplainQueryTest.java
@@ -42,7 +42,7 @@ public class CalciteExplainQueryTest extends BaseCalciteQueryTest
final String query = "EXPLAIN PLAN FOR SELECT COUNT(*) FROM view.aview WHERE dim1_firstchar <> 'z'";
final String legacyExplanation = NullHandling.replaceWithDefault()
? "DruidQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"filter\":{\"type\":\"and\",\"fields\":[{\"type\":\"selector\",\"dimension\":\"dim2\",\"value\":\"a\"},{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":\"z\",\"extractionFn\":{\"type\":\"subst [...]
- : "DruidQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"filter\":{\"type\":\"and\",\"fields\":[{\"type\":\"equals\",\"column\":\"dim2\",\"matchValueType\":\"STRING\",\"matchValue\":\"a\"},{\"type\":\"expression\",\"expression\":\"(substring(\\\"dim1\\\", 0, 1) != 'z')\"}]},\"granul [...]
+ : "DruidQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[{\"type\":\"expression\",\"name\":\"v0\",\"expression\":\"substring(\\\"dim1\\\", 0, 1)\",\"outputType\":\"STRING\"}],\"filter\":{\"type\":\"and\",\"fields\":[{\"type\":\"equals\",\"column\":\"dim2\",\"matchV [...]
final String explanation = NullHandling.replaceWithDefault()
? "[{"
+ "\"query\":{\"queryType\":\"timeseries\","
@@ -59,11 +59,13 @@ public class CalciteExplainQueryTest extends BaseCalciteQueryTest
+ "\"query\":{\"queryType\":\"timeseries\","
+ "\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},"
+ "\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},"
- + "\"filter\":{\"type\":\"and\",\"fields\":[{\"type\":\"equals\",\"column\":\"dim2\",\"matchValueType\":\"STRING\",\"matchValue\":\"a\"},{\"type\":\"expression\",\"expression\":\"(substring(\\\"dim1\\\", 0, 1) != 'z')\"}]},"
+ + "\"virtualColumns\":[{\"type\":\"expression\",\"name\":\"v0\",\"expression\":\"substring(\\\"dim1\\\", 0, 1)\",\"outputType\":\"STRING\"}],"
+ + "\"filter\":{\"type\":\"and\",\"fields\":[{\"type\":\"equals\",\"column\":\"dim2\",\"matchValueType\":\"STRING\",\"matchValue\":\"a\"},{\"type\":\"not\",\"field\":{\"type\":\"equals\",\"column\":\"v0\",\"matchValueType\":\"STRING\",\"matchValue\":\"z\"}}]},"
+ "\"granularity\":{\"type\":\"all\"},"
+ "\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],"
+ "\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}},"
- + "\"signature\":[{\"name\":\"a0\",\"type\":\"LONG\"}],\"columnMappings\":[{\"queryColumn\":\"a0\",\"outputColumn\":\"EXPR$0\"}]"
+ + "\"signature\":[{\"name\":\"a0\",\"type\":\"LONG\"}],"
+ + "\"columnMappings\":[{\"queryColumn\":\"a0\",\"outputColumn\":\"EXPR$0\"}]"
+ "}]";
final String resources = "[{\"name\":\"aview\",\"type\":\"VIEW\"}]";
final String attributes = "{\"statementType\":\"SELECT\"}";
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
index 4831e29492e..7226811dba0 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
@@ -34,6 +34,7 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.dimension.ExtractionDimensionSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
+import org.apache.druid.query.extraction.SubstringDimExtractionFn;
import org.apache.druid.query.filter.InDimFilter;
import org.apache.druid.query.filter.LikeDimFilter;
import org.apache.druid.query.groupby.GroupByQuery;
@@ -2217,4 +2218,39 @@ public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest
}
+ @Test
+ public void testMvContainsFilterWithExtractionFn()
+ {
+ Druids.ScanQueryBuilder builder = newScanQueryBuilder()
+ .dataSource(CalciteTests.DATASOURCE3)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .columns("dim3")
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .limit(5)
+ .context(QUERY_CONTEXT_DEFAULT);
+
+ if (NullHandling.sqlCompatible()) {
+ builder = builder.virtualColumns(expressionVirtualColumn("v0", "substring(\"dim3\", 0, 1)", ColumnType.STRING))
+ .filters(
+ and(
+ equality("v0", "a", ColumnType.STRING),
+ equality("v0", "b", ColumnType.STRING)
+ )
+ );
+ } else {
+ builder = builder.filters(
+ and(
+ selector("dim3", "a", new SubstringDimExtractionFn(0, 1)),
+ selector("dim3", "b", new SubstringDimExtractionFn(0, 1))
+ )
+ );
+ }
+ testQuery(
+ "SELECT dim3 FROM druid.numfoo WHERE MV_CONTAINS(SUBSTRING(dim3, 1, 1), ARRAY['a','b']) LIMIT 5",
+ ImmutableList.of(builder.build()),
+ ImmutableList.of(
+ new Object[]{"[\"a\",\"b\"]"}
+ )
+ );
+ }
}
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
index e5d4105887d..6e918c65e5c 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
@@ -4234,24 +4234,33 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
// cannot vectorize due to substring expression
cannotVectorize();
}
+ Druids.TimeseriesQueryBuilder builder =
+ Druids.newTimeseriesQueryBuilder()
+ .dataSource(CalciteTests.DATASOURCE1)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .granularity(Granularities.ALL)
+ .aggregators(aggregators(new CountAggregatorFactory("a0")))
+ .context(QUERY_CONTEXT_DEFAULT);
+ if (NullHandling.sqlCompatible()) {
+ builder = builder.virtualColumns(expressionVirtualColumn("v0", "substring(\"dim1\", 0, 1)", ColumnType.STRING))
+ .filters(
+ and(
+ equality("dim2", "a", ColumnType.STRING),
+ not(equality("v0", "z", ColumnType.STRING))
+ )
+ );
+ } else {
+ builder = builder.filters(
+ and(
+ equality("dim2", "a", ColumnType.STRING),
+ not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))
+ )
+ );
+ }
testQuery(
"WITH beep (dim1_firstchar) AS (SELECT SUBSTRING(dim1, 1, 1) FROM foo WHERE dim2 = 'a')\n"
+ "SELECT COUNT(*) FROM beep WHERE dim1_firstchar <> 'z'",
- ImmutableList.of(
- Druids.newTimeseriesQueryBuilder()
- .dataSource(CalciteTests.DATASOURCE1)
- .intervals(querySegmentSpec(Filtration.eternity()))
- .filters(and(
- equality("dim2", "a", ColumnType.STRING),
- NullHandling.replaceWithDefault()
- ? not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))
- : expressionFilter("(substring(\"dim1\", 0, 1) != 'z')")
- ))
- .granularity(Granularities.ALL)
- .aggregators(aggregators(new CountAggregatorFactory("a0")))
- .context(QUERY_CONTEXT_DEFAULT)
- .build()
- ),
+ ImmutableList.of(builder.build()),
ImmutableList.of(
// in sql compatible mode, the expression filter causes us to correctly not match null
new Object[]{NullHandling.replaceWithDefault() ? 2L : 1L}
@@ -4266,23 +4275,33 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
// cannot vectorize due to substring expression
cannotVectorize();
}
+
+ Druids.TimeseriesQueryBuilder builder =
+ Druids.newTimeseriesQueryBuilder()
+ .dataSource(CalciteTests.DATASOURCE1)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .granularity(Granularities.ALL)
+ .aggregators(aggregators(new CountAggregatorFactory("a0")))
+ .context(QUERY_CONTEXT_DEFAULT);
+ if (NullHandling.sqlCompatible()) {
+ builder = builder.virtualColumns(expressionVirtualColumn("v0", "substring(\"dim1\", 0, 1)", ColumnType.STRING))
+ .filters(
+ and(
+ equality("dim2", "a", ColumnType.STRING),
+ not(equality("v0", "z", ColumnType.STRING))
+ )
+ );
+ } else {
+ builder = builder.filters(
+ and(
+ equality("dim2", "a", ColumnType.STRING),
+ not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))
+ )
+ );
+ }
testQuery(
"SELECT COUNT(*) FROM view.aview WHERE dim1_firstchar <> 'z'",
- ImmutableList.of(
- Druids.newTimeseriesQueryBuilder()
- .dataSource(CalciteTests.DATASOURCE1)
- .intervals(querySegmentSpec(Filtration.eternity()))
- .filters(and(
- equality("dim2", "a", ColumnType.STRING),
- NullHandling.replaceWithDefault()
- ? not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))
- : expressionFilter("(substring(\"dim1\", 0, 1) != 'z')")
- ))
- .granularity(Granularities.ALL)
- .aggregators(aggregators(new CountAggregatorFactory("a0")))
- .context(QUERY_CONTEXT_DEFAULT)
- .build()
- ),
+ ImmutableList.of(builder.build()),
ImmutableList.of(
// in sql compatible mode, the expression filter causes us to correctly not match null
new Object[]{NullHandling.replaceWithDefault() ? 2L : 1L}
@@ -4297,23 +4316,34 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
// cannot vectorize due to substring expression
cannotVectorize();
}
+ Druids.TimeseriesQueryBuilder builder =
+ Druids.newTimeseriesQueryBuilder()
+ .dataSource(CalciteTests.DATASOURCE1)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .granularity(Granularities.ALL)
+ .aggregators(aggregators(new CountAggregatorFactory("a0")))
+ .context(QUERY_CONTEXT_DEFAULT);
+ if (NullHandling.sqlCompatible()) {
+ builder = builder.virtualColumns(
+ expressionVirtualColumn("v0", "substring(\"dim1\", 0, 1)", ColumnType.STRING)
+ )
+ .filters(
+ and(
+ equality("dim2", "a", ColumnType.STRING),
+ not(equality("v0", "z", ColumnType.STRING))
+ )
+ );
+ } else {
+ builder = builder.filters(
+ and(
+ equality("dim2", "a", ColumnType.STRING),
+ not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))
+ )
+ );
+ }
testQuery(
"SELECT COUNT(*) FROM view.dview as druid WHERE druid.numfoo <> 'z'",
- ImmutableList.of(
- Druids.newTimeseriesQueryBuilder()
- .dataSource(CalciteTests.DATASOURCE1)
- .intervals(querySegmentSpec(Filtration.eternity()))
- .filters(and(
- equality("dim2", "a", ColumnType.STRING),
- NullHandling.replaceWithDefault()
- ? not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))
- : expressionFilter("(substring(\"dim1\", 0, 1) != 'z')")
- ))
- .granularity(Granularities.ALL)
- .aggregators(aggregators(new CountAggregatorFactory("a0")))
- .context(QUERY_CONTEXT_DEFAULT)
- .build()
- ),
+ ImmutableList.of(builder.build()),
ImmutableList.of(
// in sql compatible mode, the expression filter causes us to correctly not match null
new Object[]{NullHandling.replaceWithDefault() ? 2L : 1L}
@@ -4890,6 +4920,151 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
// cannot vectorize due to expression filter
cannotVectorize();
}
+ Druids.TimeseriesQueryBuilder builder =
+ Druids.newTimeseriesQueryBuilder()
+ .dataSource(CalciteTests.DATASOURCE1)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .granularity(Granularities.ALL)
+ .context(QUERY_CONTEXT_DEFAULT);
+ if (NullHandling.sqlCompatible()) {
+ builder = builder.virtualColumns(expressionVirtualColumn("v0", "substring(\"dim1\", 0, 1)", ColumnType.STRING))
+ .aggregators(
+ aggregators(
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a0", "cnt"),
+ equality("dim1", "abc", ColumnType.STRING)
+ ),
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a1", "cnt"),
+ not(istrue(equality("dim1", "abc", ColumnType.STRING)))
+ ),
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a2", "cnt"),
+ equality("v0", "a", ColumnType.STRING)
+ ),
+ new FilteredAggregatorFactory(
+ new CountAggregatorFactory("a3"),
+ and(
+ notNull("dim2"),
+ not(equality("dim1", "1", ColumnType.STRING))
+ )
+ ),
+ new FilteredAggregatorFactory(
+ new CountAggregatorFactory("a4"),
+ not(equality("dim1", "1", ColumnType.STRING))
+ ),
+ new FilteredAggregatorFactory(
+ new CountAggregatorFactory("a5"),
+ not(equality("dim1", "1", ColumnType.STRING))
+ ),
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a6", "cnt"),
+ equality("dim2", "a", ColumnType.STRING)
+ ),
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a7", "cnt"),
+ and(
+ equality("dim2", "a", ColumnType.STRING),
+ not(equality("dim1", "1", ColumnType.STRING))
+ )
+ ),
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a8", "cnt"),
+ not(equality("dim1", "1", ColumnType.STRING))
+ ),
+ new FilteredAggregatorFactory(
+ new LongMaxAggregatorFactory("a9", "cnt"),
+ not(equality("dim1", "1", ColumnType.STRING))
+ ),
+ new FilteredAggregatorFactory(
+ new CardinalityAggregatorFactory(
+ "a10",
+ null,
+ dimensions(new DefaultDimensionSpec("m1", "m1", ColumnType.FLOAT)),
+ false,
+ true
+ ),
+ not(equality("dim1", "1", ColumnType.STRING))
+ ),
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a11", "cnt"),
+ and(
+ equality("dim2", "a", ColumnType.STRING),
+ equality("dim1", "b", ColumnType.STRING)
+ )
+ )
+ )
+ );
+ } else {
+ builder = builder.aggregators(
+ aggregators(
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a0", "cnt"),
+ equality("dim1", "abc", ColumnType.STRING)
+ ),
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a1", "cnt"),
+ not(selector("dim1", "abc"))
+ ),
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a2", "cnt"),
+ selector("dim1", "a", new SubstringDimExtractionFn(0, 1))
+
+ ),
+ new FilteredAggregatorFactory(
+ new CountAggregatorFactory("a3"),
+ and(
+ notNull("dim2"),
+ not(equality("dim1", "1", ColumnType.STRING))
+ )
+ ),
+ new FilteredAggregatorFactory(
+ new CountAggregatorFactory("a4"),
+ not(equality("dim1", "1", ColumnType.STRING))
+ ),
+ new FilteredAggregatorFactory(
+ new CountAggregatorFactory("a5"),
+ not(equality("dim1", "1", ColumnType.STRING))
+ ),
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a6", "cnt"),
+ equality("dim2", "a", ColumnType.STRING)
+ ),
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a7", "cnt"),
+ and(
+ equality("dim2", "a", ColumnType.STRING),
+ not(equality("dim1", "1", ColumnType.STRING))
+ )
+ ),
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a8", "cnt"),
+ not(equality("dim1", "1", ColumnType.STRING))
+ ),
+ new FilteredAggregatorFactory(
+ new LongMaxAggregatorFactory("a9", "cnt"),
+ not(equality("dim1", "1", ColumnType.STRING))
+ ),
+ new FilteredAggregatorFactory(
+ new CardinalityAggregatorFactory(
+ "a10",
+ null,
+ dimensions(new DefaultDimensionSpec("m1", "m1", ColumnType.FLOAT)),
+ false,
+ true
+ ),
+ not(equality("dim1", "1", ColumnType.STRING))
+ ),
+ new FilteredAggregatorFactory(
+ new LongSumAggregatorFactory("a11", "cnt"),
+ and(
+ equality("dim2", "a", ColumnType.STRING),
+ equality("dim1", "b", ColumnType.STRING)
+ )
+ )
+ )
+ );
+ }
testQuery(
"SELECT "
+ "SUM(case dim1 when 'abc' then cnt end), "
@@ -4905,84 +5080,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
+ "COUNT(DISTINCT CASE WHEN dim1 <> '1' THEN m1 END), "
+ "SUM(cnt) filter(WHERE dim2 = 'a' AND dim1 = 'b') "
+ "FROM druid.foo",
- ImmutableList.of(
- Druids.newTimeseriesQueryBuilder()
- .dataSource(CalciteTests.DATASOURCE1)
- .intervals(querySegmentSpec(Filtration.eternity()))
- .granularity(Granularities.ALL)
- .aggregators(aggregators(
- new FilteredAggregatorFactory(
- new LongSumAggregatorFactory("a0", "cnt"),
- equality("dim1", "abc", ColumnType.STRING)
- ),
- new FilteredAggregatorFactory(
- new LongSumAggregatorFactory("a1", "cnt"),
- NullHandling.sqlCompatible()
- ? not(istrue(equality("dim1", "abc", ColumnType.STRING)))
- : not(selector("dim1", "abc"))
- ),
- new FilteredAggregatorFactory(
- new LongSumAggregatorFactory("a2", "cnt"),
- NullHandling.sqlCompatible()
- ? expressionFilter("(substring(\"dim1\", 0, 1) == 'a')")
- : selector("dim1", "a", new SubstringDimExtractionFn(0, 1))
-
- ),
- new FilteredAggregatorFactory(
- new CountAggregatorFactory("a3"),
- and(
- notNull("dim2"),
- not(equality("dim1", "1", ColumnType.STRING))
- )
- ),
- new FilteredAggregatorFactory(
- new CountAggregatorFactory("a4"),
- not(equality("dim1", "1", ColumnType.STRING))
- ),
- new FilteredAggregatorFactory(
- new CountAggregatorFactory("a5"),
- not(equality("dim1", "1", ColumnType.STRING))
- ),
- new FilteredAggregatorFactory(
- new LongSumAggregatorFactory("a6", "cnt"),
- equality("dim2", "a", ColumnType.STRING)
- ),
- new FilteredAggregatorFactory(
- new LongSumAggregatorFactory("a7", "cnt"),
- and(
- equality("dim2", "a", ColumnType.STRING),
- not(equality("dim1", "1", ColumnType.STRING))
- )
- ),
- new FilteredAggregatorFactory(
- new LongSumAggregatorFactory("a8", "cnt"),
- not(equality("dim1", "1", ColumnType.STRING))
- ),
- new FilteredAggregatorFactory(
- new LongMaxAggregatorFactory("a9", "cnt"),
- not(equality("dim1", "1", ColumnType.STRING))
- ),
- new FilteredAggregatorFactory(
- new CardinalityAggregatorFactory(
- "a10",
- null,
- dimensions(new DefaultDimensionSpec("m1", "m1", ColumnType.FLOAT)),
- false,
- true
- ),
- not(equality("dim1", "1", ColumnType.STRING))
- ),
- new FilteredAggregatorFactory(
- new LongSumAggregatorFactory("a11", "cnt"),
- and(
- equality("dim2", "a", ColumnType.STRING),
- equality("dim1", "b", ColumnType.STRING)
- )
- )
- ))
- .context(QUERY_CONTEXT_DEFAULT)
- .build()
- ),
+ ImmutableList.of(builder.build()),
NullHandling.replaceWithDefault() ?
ImmutableList.of(
new Object[]{1L, 5L, 1L, 2L, 5L, 5L, 2L, 1L, 5L, 1L, 5L, 0L}
@@ -7673,39 +7771,46 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
// Cannot vectorize due to extractionFn in dimension spec.
cannotVectorize();
+ GroupByQuery.Builder builder =
+ GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE1)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ dimensions(
+ new ExtractionDimensionSpec(
+ "dim1",
+ "d0",
+ new RegexDimExtractionFn("^.", 0, true, null)
+ ),
+ new ExtractionDimensionSpec(
+ "dim1",
+ "d1",
+ new RegexDimExtractionFn("^(.)", 1, true, null)
+ )
+ )
+ )
+ .setContext(QUERY_CONTEXT_DEFAULT);
+ if (NullHandling.sqlCompatible()) {
+ builder = builder.setVirtualColumns(
+ expressionVirtualColumn("v0", "regexp_extract(\"dim1\",'^(.)',1)", ColumnType.STRING)
+ )
+ .setDimFilter(
+ not(equality("v0", "x", ColumnType.STRING))
+ );
+ } else {
+ builder = builder.setDimFilter(
+ not(selector("dim1", "x", new RegexDimExtractionFn("^(.)", 1, true, null)))
+ );
+ }
+
testQuery(
"SELECT DISTINCT\n"
+ " REGEXP_EXTRACT(dim1, '^.'),\n"
+ " REGEXP_EXTRACT(dim1, '^(.)', 1)\n"
+ "FROM foo\n"
+ "WHERE REGEXP_EXTRACT(dim1, '^(.)', 1) <> 'x'",
- ImmutableList.of(
- GroupByQuery.builder()
- .setDataSource(CalciteTests.DATASOURCE1)
- .setInterval(querySegmentSpec(Filtration.eternity()))
- .setGranularity(Granularities.ALL)
- .setDimFilter(
- NullHandling.replaceWithDefault()
- ? not(selector("dim1", "x", new RegexDimExtractionFn("^(.)", 1, true, null)))
- : expressionFilter("(regexp_extract(\"dim1\",'^(.)',1) != 'x')")
- )
- .setDimensions(
- dimensions(
- new ExtractionDimensionSpec(
- "dim1",
- "d0",
- new RegexDimExtractionFn("^.", 0, true, null)
- ),
- new ExtractionDimensionSpec(
- "dim1",
- "d1",
- new RegexDimExtractionFn("^(.)", 1, true, null)
- )
- )
- )
- .setContext(QUERY_CONTEXT_DEFAULT)
- .build()
- ),
+ ImmutableList.of(builder.build()),
NullHandling.replaceWithDefault()
? ImmutableList.of(
new Object[]{NULL_STRING, NULL_STRING},
@@ -7750,30 +7855,44 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
{
// Cannot vectorize due to extractionFn in dimension spec.
cannotVectorize();
+ Druids.TimeseriesQueryBuilder builder =
+ Druids.newTimeseriesQueryBuilder()
+ .dataSource(CalciteTests.DATASOURCE1)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .granularity(Granularities.ALL)
+ .aggregators(new CountAggregatorFactory("a0"))
+ .context(QUERY_CONTEXT_DEFAULT);
+ if (NullHandling.sqlCompatible()) {
+ builder = builder.virtualColumns(
+ expressionVirtualColumn("v0", "regexp_extract(\"dim1\",'^1')", ColumnType.STRING),
+ expressionVirtualColumn("v1", "regexp_extract(concat('Z',\"dim1\"),'^Z2')", ColumnType.STRING)
+ )
+ .filters(
+ or(
+ notNull("v0"),
+ notNull("v1")
+ )
+ );
+ } else {
+ builder = builder.virtualColumns(
+ expressionVirtualColumn("v0", "regexp_extract(concat('Z',\"dim1\"),'^Z2')", ColumnType.STRING)
+ )
+ .filters(
+ or(
+ not(selector(
+ "dim1",
+ NullHandling.defaultStringValue(),
+ new RegexDimExtractionFn("^1", 0, true, null)
+ )),
+ notNull("v0")
+ )
+ );
+ }
testQuery(
"SELECT COUNT(*)\n"
+ "FROM foo\n"
+ "WHERE REGEXP_EXTRACT(dim1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || dim1, '^Z2') IS NOT NULL",
- ImmutableList.of(
- Druids.newTimeseriesQueryBuilder()
- .dataSource(CalciteTests.DATASOURCE1)
- .intervals(querySegmentSpec(Filtration.eternity()))
- .granularity(Granularities.ALL)
- .virtualColumns(
- expressionVirtualColumn("v0", "regexp_extract(concat('Z',\"dim1\"),'^Z2')", ColumnType.STRING)
- )
- .filters(
- or(
- NullHandling.replaceWithDefault()
- ? not(selector("dim1", NullHandling.defaultStringValue(), new RegexDimExtractionFn("^1", 0, true, null)))
- : expressionFilter("notnull(regexp_extract(\"dim1\",'^1'))"),
- notNull("v0")
- )
- )
- .aggregators(new CountAggregatorFactory("a0"))
- .context(QUERY_CONTEXT_DEFAULT)
- .build()
- ),
+ ImmutableList.of(builder.build()),
ImmutableList.of(
new Object[]{3L}
)
@@ -8687,39 +8806,44 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
null,
true
);
-
+ GroupByQuery.Builder builder =
+ GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE1)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ dimensions(
+ new ExtractionDimensionSpec(
+ "dim1",
+ "d0",
+ ColumnType.STRING,
+ extractionFn
+ )
+ )
+ )
+ .setAggregatorSpecs(
+ aggregators(
+ new CountAggregatorFactory("a0")
+ )
+ )
+ .setContext(QUERY_CONTEXT_DEFAULT);
+ if (NullHandling.sqlCompatible()) {
+ builder = builder.setVirtualColumns(
+ expressionVirtualColumn("v0", "lookup(\"dim1\",'lookyloo')", ColumnType.STRING)
+ )
+ .setDimFilter(
+ not(equality("v0", "xxx", ColumnType.STRING))
+ );
+ } else {
+ builder = builder.setDimFilter(
+ not(selector("dim1", "xxx", extractionFn))
+ );
+ }
testQuery(
"SELECT LOOKUP(dim1, 'lookyloo'), COUNT(*) FROM foo\n"
+ "WHERE LOOKUP(dim1, 'lookyloo') <> 'xxx'\n"
+ "GROUP BY LOOKUP(dim1, 'lookyloo')",
- ImmutableList.of(
- GroupByQuery.builder()
- .setDataSource(CalciteTests.DATASOURCE1)
- .setInterval(querySegmentSpec(Filtration.eternity()))
- .setGranularity(Granularities.ALL)
- .setDimFilter(
- NullHandling.replaceWithDefault()
- ? not(selector("dim1", "xxx", extractionFn))
- : expressionFilter("(lookup(\"dim1\",'lookyloo') != 'xxx')")
- )
- .setDimensions(
- dimensions(
- new ExtractionDimensionSpec(
- "dim1",
- "d0",
- ColumnType.STRING,
- extractionFn
- )
- )
- )
- .setAggregatorSpecs(
- aggregators(
- new CountAggregatorFactory("a0")
- )
- )
- .setContext(QUERY_CONTEXT_DEFAULT)
- .build()
- ),
+ ImmutableList.of(builder.build()),
NullHandling.replaceWithDefault()
? ImmutableList.of(
new Object[]{NULL_STRING, 5L},
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java
index 1e20577b532..460c0dd7638 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java
@@ -43,6 +43,7 @@ import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.query.topn.DimensionTopNMetricSpec;
import org.apache.druid.query.topn.InvertedTopNMetricSpec;
import org.apache.druid.query.topn.TopNQueryBuilder;
+import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
@@ -623,10 +624,17 @@ public class CalciteSelectQueryTest extends BaseCalciteQueryTest
"SELECT distinct dim1 FROM druid.foo WHERE substring(substring(dim1, 2), 1, 1) = 'e' OR dim2 = 'a'",
ImmutableList.of(
GroupByQuery.builder()
- .setDataSource(CalciteTests.DATASOURCE1)
- .setInterval(querySegmentSpec(Filtration.eternity()))
- .setGranularity(Granularities.ALL)
- .setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0")))
+ .setDataSource(CalciteTests.DATASOURCE1)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(dimensions(new DefaultDimensionSpec("dim1", "d0")))
+ .setVirtualColumns(
+ NullHandling.replaceWithDefault()
+ ? VirtualColumns.EMPTY
+ : VirtualColumns.create(
+ expressionVirtualColumn("v0", "substring(substring(\"dim1\", 1, -1), 0, 1)", ColumnType.STRING)
+ )
+ )
.setDimFilter(
or(
NullHandling.replaceWithDefault()
@@ -638,7 +646,7 @@ public class CalciteSelectQueryTest extends BaseCalciteQueryTest
new SubstringDimExtractionFn(0, 1)
)
)
- : expressionFilter("(substring(substring(\"dim1\", 1, -1), 0, 1) == 'e')"),
+ : equality("v0", "e", ColumnType.STRING),
equality("dim2", "a", ColumnType.STRING)
)
)
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java
index 404d3e02767..0f7d86eb411 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java
@@ -51,6 +51,7 @@ import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.scan.ScanQuery;
import org.apache.druid.query.topn.DimensionTopNMetricSpec;
import org.apache.druid.query.topn.TopNQueryBuilder;
+import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.join.JoinType;
import org.apache.druid.sql.calcite.expression.DruidExpression;
@@ -343,10 +344,17 @@ public class CalciteSubqueryTest extends BaseCalciteQueryTest
)
)
.intervals(querySegmentSpec(Filtration.eternity()))
+ .virtualColumns(
+ NullHandling.replaceWithDefault()
+ ? VirtualColumns.EMPTY
+ : VirtualColumns.create(
+ expressionVirtualColumn("v0", "substring(\"dim1\", 0, 1)", ColumnType.STRING)
+ )
+ )
.filters(
NullHandling.replaceWithDefault()
? not(selector("dim1", "z", new SubstringDimExtractionFn(0, 1)))
- : expressionFilter("(substring(\"dim1\", 0, 1) != 'z')")
+ : not(equality("v0", "z", ColumnType.STRING))
)
.granularity(Granularities.ALL)
.aggregators(aggregators(new CountAggregatorFactory("a0")))
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org