You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by jo...@apache.org on 2020/09/30 23:58:25 UTC
[druid] branch 0.20.0 updated: vectorize constant expressions with
optimized selectors (#10440) (#10457)
This is an automated email from the ASF dual-hosted git repository.
jonwei pushed a commit to branch 0.20.0
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/0.20.0 by this push:
new 51a4b1c vectorize constant expressions with optimized selectors (#10440) (#10457)
51a4b1c is described below
commit 51a4b1cde69a8eb6fa523aba7c7e82042ba89254
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Wed Sep 30 16:58:01 2020 -0700
vectorize constant expressions with optimized selectors (#10440) (#10457)
---
.../segment/vector/ConstantVectorSelectors.java | 172 +++++++++++++++++++++
.../druid/segment/virtual/ExpressionPlan.java | 5 +
.../segment/virtual/ExpressionVectorSelectors.java | 34 ++++
.../segment/virtual/ExpressionVirtualColumn.java | 24 ++-
.../virtual/ExpressionVectorSelectorsTest.java | 97 +++++++-----
.../calcite/SqlVectorizedExpressionSanityTest.java | 1 +
6 files changed, 297 insertions(+), 36 deletions(-)
diff --git a/processing/src/main/java/org/apache/druid/segment/vector/ConstantVectorSelectors.java b/processing/src/main/java/org/apache/druid/segment/vector/ConstantVectorSelectors.java
new file mode 100644
index 0000000..c1e3c3b
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/vector/ConstantVectorSelectors.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.vector;
+
+import org.apache.druid.segment.IdLookup;
+
+import javax.annotation.Nullable;
+import java.util.Arrays;
+
+public class ConstantVectorSelectors
+{
+ public static VectorValueSelector vectorValueSelector(VectorSizeInspector inspector, @Nullable Number constant)
+ {
+ if (constant == null) {
+ return NilVectorSelector.create(inspector);
+ }
+ final long[] longVector = new long[inspector.getMaxVectorSize()];
+ final float[] floatVector = new float[inspector.getMaxVectorSize()];
+ final double[] doubleVector = new double[inspector.getMaxVectorSize()];
+ Arrays.fill(longVector, constant.longValue());
+ Arrays.fill(floatVector, constant.floatValue());
+ Arrays.fill(doubleVector, constant.doubleValue());
+ return new VectorValueSelector()
+ {
+ @Override
+ public long[] getLongVector()
+ {
+ return longVector;
+ }
+
+ @Override
+ public float[] getFloatVector()
+ {
+ return floatVector;
+ }
+
+ @Override
+ public double[] getDoubleVector()
+ {
+ return doubleVector;
+ }
+
+ @Nullable
+ @Override
+ public boolean[] getNullVector()
+ {
+ return null;
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return inspector.getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return inspector.getCurrentVectorSize();
+ }
+ };
+ }
+
+ public static VectorObjectSelector vectorObjectSelector(
+ VectorSizeInspector inspector,
+ @Nullable Object object
+ )
+ {
+ if (object == null) {
+ return NilVectorSelector.create(inspector);
+ }
+
+ final Object[] objects = new Object[inspector.getMaxVectorSize()];
+ Arrays.fill(objects, object);
+
+ return new VectorObjectSelector()
+ {
+ @Override
+ public Object[] getObjectVector()
+ {
+ return objects;
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return inspector.getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return inspector.getCurrentVectorSize();
+ }
+ };
+ }
+
+ public static SingleValueDimensionVectorSelector singleValueDimensionVectorSelector(
+ VectorSizeInspector inspector,
+ @Nullable String value
+ )
+ {
+ if (value == null) {
+ return NilVectorSelector.create(inspector);
+ }
+
+ final int[] row = new int[inspector.getMaxVectorSize()];
+ return new SingleValueDimensionVectorSelector()
+ {
+ @Override
+ public int[] getRowVector()
+ {
+ return row;
+ }
+
+ @Override
+ public int getValueCardinality()
+ {
+ return 1;
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ return value;
+ }
+
+ @Override
+ public boolean nameLookupPossibleInAdvance()
+ {
+ return true;
+ }
+
+ @Nullable
+ @Override
+ public IdLookup idLookup()
+ {
+ return null;
+ }
+
+ @Override
+ public int getMaxVectorSize()
+ {
+ return inspector.getMaxVectorSize();
+ }
+
+ @Override
+ public int getCurrentVectorSize()
+ {
+ return inspector.getCurrentVectorSize();
+ }
+ };
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
index 07e6ce3..7dd0d5d 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
@@ -100,6 +100,11 @@ public class ExpressionPlan
this.unappliedInputs = unappliedInputs;
}
+ public boolean isConstant()
+ {
+ return analysis.getRequiredBindings().isEmpty();
+ }
+
public Expr getExpression()
{
return expression;
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java
index 25f2f22..3cb46e8 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java
@@ -23,8 +23,11 @@ import com.google.common.base.Preconditions;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprType;
import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.query.expression.ExprUtils;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.vector.ConstantVectorSelectors;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
import org.apache.druid.segment.vector.VectorObjectSelector;
import org.apache.druid.segment.vector.VectorValueSelector;
@@ -38,6 +41,22 @@ public class ExpressionVectorSelectors
// No instantiation.
}
+ public static SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(
+ VectorColumnSelectorFactory factory,
+ Expr expression
+ )
+ {
+ final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
+ Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
+ // only constant expressions are currently supported, nothing else should get here
+
+ if (plan.isConstant()) {
+ String constant = plan.getExpression().eval(ExprUtils.nilBindings()).asString();
+ return ConstantVectorSelectors.singleValueDimensionVectorSelector(factory.getVectorSizeInspector(), constant);
+ }
+ throw new IllegalStateException("Only constant expressions currently support dimension selectors");
+ }
+
public static VectorValueSelector makeVectorValueSelector(
VectorColumnSelectorFactory factory,
Expr expression
@@ -45,6 +64,13 @@ public class ExpressionVectorSelectors
{
final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
+
+ if (plan.isConstant()) {
+ return ConstantVectorSelectors.vectorValueSelector(
+ factory.getVectorSizeInspector(),
+ (Number) plan.getExpression().eval(ExprUtils.nilBindings()).value()
+ );
+ }
final Expr.VectorInputBinding bindings = createVectorBindings(plan.getAnalysis(), factory);
final ExprVectorProcessor<?> processor = plan.getExpression().buildVectorized(bindings);
return new ExpressionVectorValueSelector(processor, bindings);
@@ -57,6 +83,14 @@ public class ExpressionVectorSelectors
{
final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
+
+ if (plan.isConstant()) {
+ return ConstantVectorSelectors.vectorObjectSelector(
+ factory.getVectorSizeInspector(),
+ plan.getExpression().eval(ExprUtils.nilBindings()).value()
+ );
+ }
+
final Expr.VectorInputBinding bindings = createVectorBindings(plan.getAnalysis(), factory);
final ExprVectorProcessor<?> processor = plan.getExpression().buildVectorized(bindings);
return new ExpressionVectorObjectSelector(processor, bindings);
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
index 8ad46d6..3260afa 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
@@ -42,6 +42,7 @@ import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
import org.apache.druid.segment.vector.VectorObjectSelector;
import org.apache.druid.segment.vector.VectorValueSelector;
@@ -147,6 +148,15 @@ public class ExpressionVirtualColumn implements VirtualColumn
}
@Override
+ public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector(
+ DimensionSpec dimensionSpec,
+ VectorColumnSelectorFactory factory
+ )
+ {
+ return ExpressionVectorSelectors.makeSingleValueDimensionVectorSelector(factory, parsedExpression.get());
+ }
+
+ @Override
public VectorValueSelector makeVectorValueSelector(String columnName, VectorColumnSelectorFactory factory)
{
return ExpressionVectorSelectors.makeVectorValueSelector(factory, parsedExpression.get());
@@ -200,6 +210,9 @@ public class ExpressionVirtualColumn implements VirtualColumn
return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(outputType);
}
+ // array types shouldn't escape the expression system currently, so coerce anything past this point into some
+ // style of string
+
// we don't have to check for unknown input here because output type is unable to be inferred if we don't know
// the complete set of input types
if (plan.any(ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.NEEDS_APPLIED)) {
@@ -207,7 +220,16 @@ public class ExpressionVirtualColumn implements VirtualColumn
return new ColumnCapabilitiesImpl().setType(ValueType.STRING).setHasMultipleValues(true);
}
- // if we got here, lets call it single value string output
+ // constant strings are supported as dimension selectors, set them as dictionary encoded and unique
+ if (plan.isConstant()) {
+ return new ColumnCapabilitiesImpl().setType(ValueType.STRING)
+ .setDictionaryEncoded(true)
+ .setDictionaryValuesUnique(true)
+ .setDictionaryValuesSorted(true)
+ .setHasMultipleValues(false);
+ }
+
+ // if we got here, lets call it single value string output, non-dictionary encoded
return new ColumnCapabilitiesImpl().setType(ValueType.STRING)
.setHasMultipleValues(false)
.setDictionaryEncoded(false);
diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java
index 9c7926f..9e0a7b6 100644
--- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java
@@ -28,6 +28,7 @@ import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.ExprType;
import org.apache.druid.math.expr.Parser;
+import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.ColumnValueSelector;
@@ -39,6 +40,7 @@ import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorCursor;
import org.apache.druid.segment.vector.VectorObjectSelector;
import org.apache.druid.segment.vector.VectorValueSelector;
@@ -75,7 +77,11 @@ public class ExpressionVectorSelectorsTest
"parse_long(string1)",
"parse_long(string1) * double3",
"parse_long(string5) * parse_long(string1)",
- "parse_long(string5) * parse_long(string1) * double3"
+ "parse_long(string5) * parse_long(string1) * double3",
+ "'string constant'",
+ "1",
+ "192412.24124",
+ "null"
);
private static final int ROWS_PER_SEGMENT = 100_000;
@@ -167,7 +173,8 @@ public class ExpressionVectorSelectorsTest
)
)
);
- VectorCursor cursor = new QueryableIndexStorageAdapter(index).makeVectorCursor(
+ final QueryableIndexStorageAdapter storageAdapter = new QueryableIndexStorageAdapter(index);
+ VectorCursor cursor = storageAdapter.makeVectorCursor(
null,
index.getDataInterval(),
virtualColumns,
@@ -176,40 +183,55 @@ public class ExpressionVectorSelectorsTest
null
);
- VectorValueSelector selector = null;
- VectorObjectSelector objectSelector = null;
- if (outputType.isNumeric()) {
- selector = cursor.getColumnSelectorFactory().makeValueSelector("v");
- } else {
- objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v");
- }
+ ColumnCapabilities capabilities = virtualColumns.getColumnCapabilities(storageAdapter, "v");
+
int rowCount = 0;
- while (!cursor.isDone()) {
- boolean[] nulls;
- switch (outputType) {
- case LONG:
- nulls = selector.getNullVector();
- long[] longs = selector.getLongVector();
- for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
- results.add(nulls != null && nulls[i] ? null : longs[i]);
- }
- break;
- case DOUBLE:
- nulls = selector.getNullVector();
- double[] doubles = selector.getDoubleVector();
- for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
- results.add(nulls != null && nulls[i] ? null : doubles[i]);
- }
- break;
- case STRING:
- Object[] objects = objectSelector.getObjectVector();
- for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++, rowCount++) {
- results.add(objects[i]);
- }
- break;
+ if (capabilities.isDictionaryEncoded().isTrue()) {
+ SingleValueDimensionVectorSelector selector = cursor.getColumnSelectorFactory().makeSingleValueDimensionSelector(
+ DefaultDimensionSpec.of("v")
+ );
+ while (!cursor.isDone()) {
+ int[] row = selector.getRowVector();
+ for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
+ results.add(selector.lookupName(row[i]));
+ }
+ cursor.advance();
+ }
+ } else {
+ VectorValueSelector selector = null;
+ VectorObjectSelector objectSelector = null;
+ if (outputType.isNumeric()) {
+ selector = cursor.getColumnSelectorFactory().makeValueSelector("v");
+ } else {
+ objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v");
}
+ while (!cursor.isDone()) {
+ boolean[] nulls;
+ switch (outputType) {
+ case LONG:
+ nulls = selector.getNullVector();
+ long[] longs = selector.getLongVector();
+ for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
+ results.add(nulls != null && nulls[i] ? null : longs[i]);
+ }
+ break;
+ case DOUBLE:
+ nulls = selector.getNullVector();
+ double[] doubles = selector.getDoubleVector();
+ for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
+ results.add(nulls != null && nulls[i] ? null : doubles[i]);
+ }
+ break;
+ case STRING:
+ Object[] objects = objectSelector.getObjectVector();
+ for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++, rowCount++) {
+ results.add(objects[i]);
+ }
+ break;
+ }
- cursor.advance();
+ cursor.advance();
+ }
}
closer.register(cursor);
@@ -224,10 +246,15 @@ public class ExpressionVectorSelectorsTest
int rowCountCursor = cursors
.map(nonVectorized -> {
- final ColumnValueSelector nonSelector = nonVectorized.getColumnSelectorFactory().makeColumnValueSelector("v");
+ final ColumnValueSelector nonSelector = nonVectorized.getColumnSelectorFactory()
+ .makeColumnValueSelector("v");
int rows = 0;
while (!nonVectorized.isDone()) {
- Assert.assertEquals(StringUtils.format("Failed at row %s", rows), nonSelector.getObject(), results.get(rows));
+ Assert.assertEquals(
+ StringUtils.format("Failed at row %s", rows),
+ nonSelector.getObject(),
+ results.get(rows)
+ );
rows++;
nonVectorized.advance();
}
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
index 874e745..4717937 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
@@ -92,6 +92,7 @@ public class SqlVectorizedExpressionSanityTest extends InitializedNullHandlingTe
"SELECT SUM(PARSE_LONG(string1)) FROM foo",
"SELECT SUM(PARSE_LONG(string3)) FROM foo",
"SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
+ "SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM foo WHERE string2 = '10' GROUP BY 1,2 ORDER BY 3",
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1",
"SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2",
"SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1",
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org