You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by jo...@apache.org on 2020/09/30 23:58:25 UTC

[druid] branch 0.20.0 updated: vectorize constant expressions with optimized selectors (#10440) (#10457)

This is an automated email from the ASF dual-hosted git repository.

jonwei pushed a commit to branch 0.20.0
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/0.20.0 by this push:
     new 51a4b1c  vectorize constant expressions with optimized selectors (#10440) (#10457)
51a4b1c is described below

commit 51a4b1cde69a8eb6fa523aba7c7e82042ba89254
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Wed Sep 30 16:58:01 2020 -0700

    vectorize constant expressions with optimized selectors (#10440) (#10457)
---
 .../segment/vector/ConstantVectorSelectors.java    | 172 +++++++++++++++++++++
 .../druid/segment/virtual/ExpressionPlan.java      |   5 +
 .../segment/virtual/ExpressionVectorSelectors.java |  34 ++++
 .../segment/virtual/ExpressionVirtualColumn.java   |  24 ++-
 .../virtual/ExpressionVectorSelectorsTest.java     |  97 +++++++-----
 .../calcite/SqlVectorizedExpressionSanityTest.java |   1 +
 6 files changed, 297 insertions(+), 36 deletions(-)

diff --git a/processing/src/main/java/org/apache/druid/segment/vector/ConstantVectorSelectors.java b/processing/src/main/java/org/apache/druid/segment/vector/ConstantVectorSelectors.java
new file mode 100644
index 0000000..c1e3c3b
--- /dev/null
+++ b/processing/src/main/java/org/apache/druid/segment/vector/ConstantVectorSelectors.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.segment.vector;
+
+import org.apache.druid.segment.IdLookup;
+
+import javax.annotation.Nullable;
+import java.util.Arrays;
+
+public class ConstantVectorSelectors
+{
+  public static VectorValueSelector vectorValueSelector(VectorSizeInspector inspector, @Nullable Number constant)
+  {
+    if (constant == null) {
+      return NilVectorSelector.create(inspector);
+    }
+    final long[] longVector = new long[inspector.getMaxVectorSize()];
+    final float[] floatVector = new float[inspector.getMaxVectorSize()];
+    final double[] doubleVector = new double[inspector.getMaxVectorSize()];
+    Arrays.fill(longVector, constant.longValue());
+    Arrays.fill(floatVector, constant.floatValue());
+    Arrays.fill(doubleVector, constant.doubleValue());
+    return new VectorValueSelector()
+    {
+      @Override
+      public long[] getLongVector()
+      {
+        return longVector;
+      }
+
+      @Override
+      public float[] getFloatVector()
+      {
+        return floatVector;
+      }
+
+      @Override
+      public double[] getDoubleVector()
+      {
+        return doubleVector;
+      }
+
+      @Nullable
+      @Override
+      public boolean[] getNullVector()
+      {
+        return null;
+      }
+
+      @Override
+      public int getMaxVectorSize()
+      {
+        return inspector.getMaxVectorSize();
+      }
+
+      @Override
+      public int getCurrentVectorSize()
+      {
+        return inspector.getCurrentVectorSize();
+      }
+    };
+  }
+
+  public static VectorObjectSelector vectorObjectSelector(
+      VectorSizeInspector inspector,
+      @Nullable Object object
+  )
+  {
+    if (object == null) {
+      return NilVectorSelector.create(inspector);
+    }
+
+    final Object[] objects = new Object[inspector.getMaxVectorSize()];
+    Arrays.fill(objects, object);
+
+    return new VectorObjectSelector()
+    {
+      @Override
+      public Object[] getObjectVector()
+      {
+        return objects;
+      }
+
+      @Override
+      public int getMaxVectorSize()
+      {
+        return inspector.getMaxVectorSize();
+      }
+
+      @Override
+      public int getCurrentVectorSize()
+      {
+        return inspector.getCurrentVectorSize();
+      }
+    };
+  }
+
+  public static SingleValueDimensionVectorSelector singleValueDimensionVectorSelector(
+      VectorSizeInspector inspector,
+      @Nullable String value
+  )
+  {
+    if (value == null) {
+      return NilVectorSelector.create(inspector);
+    }
+
+    final int[] row = new int[inspector.getMaxVectorSize()];
+    return new SingleValueDimensionVectorSelector()
+    {
+      @Override
+      public int[] getRowVector()
+      {
+        return row;
+      }
+
+      @Override
+      public int getValueCardinality()
+      {
+        return 1;
+      }
+
+      @Nullable
+      @Override
+      public String lookupName(int id)
+      {
+        return value;
+      }
+
+      @Override
+      public boolean nameLookupPossibleInAdvance()
+      {
+        return true;
+      }
+
+      @Nullable
+      @Override
+      public IdLookup idLookup()
+      {
+        return null;
+      }
+
+      @Override
+      public int getMaxVectorSize()
+      {
+        return inspector.getMaxVectorSize();
+      }
+
+      @Override
+      public int getCurrentVectorSize()
+      {
+        return inspector.getCurrentVectorSize();
+      }
+    };
+  }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
index 07e6ce3..7dd0d5d 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
@@ -100,6 +100,11 @@ public class ExpressionPlan
     this.unappliedInputs = unappliedInputs;
   }
 
+  public boolean isConstant()
+  {
+    return analysis.getRequiredBindings().isEmpty();
+  }
+
   public Expr getExpression()
   {
     return expression;
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java
index 25f2f22..3cb46e8 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java
@@ -23,8 +23,11 @@ import com.google.common.base.Preconditions;
 import org.apache.druid.math.expr.Expr;
 import org.apache.druid.math.expr.ExprType;
 import org.apache.druid.math.expr.vector.ExprVectorProcessor;
+import org.apache.druid.query.expression.ExprUtils;
 import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.vector.ConstantVectorSelectors;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
 import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
 import org.apache.druid.segment.vector.VectorObjectSelector;
 import org.apache.druid.segment.vector.VectorValueSelector;
@@ -38,6 +41,22 @@ public class ExpressionVectorSelectors
     // No instantiation.
   }
 
+  public static SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(
+      VectorColumnSelectorFactory factory,
+      Expr expression
+  )
+  {
+    final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
+    Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
+    // only constant expressions are currently supported, nothing else should get here
+
+    if (plan.isConstant()) {
+      String constant = plan.getExpression().eval(ExprUtils.nilBindings()).asString();
+      return ConstantVectorSelectors.singleValueDimensionVectorSelector(factory.getVectorSizeInspector(), constant);
+    }
+    throw new IllegalStateException("Only constant expressions currently support dimension selectors");
+  }
+
   public static VectorValueSelector makeVectorValueSelector(
       VectorColumnSelectorFactory factory,
       Expr expression
@@ -45,6 +64,13 @@ public class ExpressionVectorSelectors
   {
     final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
     Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
+
+    if (plan.isConstant()) {
+      return ConstantVectorSelectors.vectorValueSelector(
+          factory.getVectorSizeInspector(),
+          (Number) plan.getExpression().eval(ExprUtils.nilBindings()).value()
+      );
+    }
     final Expr.VectorInputBinding bindings = createVectorBindings(plan.getAnalysis(), factory);
     final ExprVectorProcessor<?> processor = plan.getExpression().buildVectorized(bindings);
     return new ExpressionVectorValueSelector(processor, bindings);
@@ -57,6 +83,14 @@ public class ExpressionVectorSelectors
   {
     final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression);
     Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE));
+
+    if (plan.isConstant()) {
+      return ConstantVectorSelectors.vectorObjectSelector(
+          factory.getVectorSizeInspector(),
+          plan.getExpression().eval(ExprUtils.nilBindings()).value()
+      );
+    }
+
     final Expr.VectorInputBinding bindings = createVectorBindings(plan.getAnalysis(), factory);
     final ExprVectorProcessor<?> processor = plan.getExpression().buildVectorized(bindings);
     return new ExpressionVectorObjectSelector(processor, bindings);
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
index 8ad46d6..3260afa 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
@@ -42,6 +42,7 @@ import org.apache.druid.segment.VirtualColumn;
 import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
 import org.apache.druid.segment.column.ValueType;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
 import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
 import org.apache.druid.segment.vector.VectorObjectSelector;
 import org.apache.druid.segment.vector.VectorValueSelector;
@@ -147,6 +148,15 @@ public class ExpressionVirtualColumn implements VirtualColumn
   }
 
   @Override
+  public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector(
+      DimensionSpec dimensionSpec,
+      VectorColumnSelectorFactory factory
+  )
+  {
+    return ExpressionVectorSelectors.makeSingleValueDimensionVectorSelector(factory, parsedExpression.get());
+  }
+
+  @Override
   public VectorValueSelector makeVectorValueSelector(String columnName, VectorColumnSelectorFactory factory)
   {
     return ExpressionVectorSelectors.makeVectorValueSelector(factory, parsedExpression.get());
@@ -200,6 +210,9 @@ public class ExpressionVirtualColumn implements VirtualColumn
         return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(outputType);
       }
 
+      // array types shouldn't escape the expression system currently, so coerce anything past this point into some
+      // style of string
+
       // we don't have to check for unknown input here because output type is unable to be inferred if we don't know
       // the complete set of input types
       if (plan.any(ExpressionPlan.Trait.NON_SCALAR_OUTPUT, ExpressionPlan.Trait.NEEDS_APPLIED)) {
@@ -207,7 +220,16 @@ public class ExpressionVirtualColumn implements VirtualColumn
         return new ColumnCapabilitiesImpl().setType(ValueType.STRING).setHasMultipleValues(true);
       }
 
-      // if we got here, lets call it single value string output
+      // constant strings are supported as dimension selectors, set them as dictionary encoded and unique
+      if (plan.isConstant()) {
+        return new ColumnCapabilitiesImpl().setType(ValueType.STRING)
+                                           .setDictionaryEncoded(true)
+                                           .setDictionaryValuesUnique(true)
+                                           .setDictionaryValuesSorted(true)
+                                           .setHasMultipleValues(false);
+      }
+
+      // if we got here, lets call it single value string output, non-dictionary encoded
       return new ColumnCapabilitiesImpl().setType(ValueType.STRING)
                                          .setHasMultipleValues(false)
                                          .setDictionaryEncoded(false);
diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java
index 9c7926f..9e0a7b6 100644
--- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java
@@ -28,6 +28,7 @@ import org.apache.druid.math.expr.Expr;
 import org.apache.druid.math.expr.ExprMacroTable;
 import org.apache.druid.math.expr.ExprType;
 import org.apache.druid.math.expr.Parser;
+import org.apache.druid.query.dimension.DefaultDimensionSpec;
 import org.apache.druid.query.expression.TestExprMacroTable;
 import org.apache.druid.segment.ColumnInspector;
 import org.apache.druid.segment.ColumnValueSelector;
@@ -39,6 +40,7 @@ import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.generator.GeneratorBasicSchemas;
 import org.apache.druid.segment.generator.GeneratorSchemaInfo;
 import org.apache.druid.segment.generator.SegmentGenerator;
+import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
 import org.apache.druid.segment.vector.VectorCursor;
 import org.apache.druid.segment.vector.VectorObjectSelector;
 import org.apache.druid.segment.vector.VectorValueSelector;
@@ -75,7 +77,11 @@ public class ExpressionVectorSelectorsTest
       "parse_long(string1)",
       "parse_long(string1) * double3",
       "parse_long(string5) * parse_long(string1)",
-      "parse_long(string5) * parse_long(string1) * double3"
+      "parse_long(string5) * parse_long(string1) * double3",
+      "'string constant'",
+      "1",
+      "192412.24124",
+      "null"
   );
 
   private static final int ROWS_PER_SEGMENT = 100_000;
@@ -167,7 +173,8 @@ public class ExpressionVectorSelectorsTest
             )
         )
     );
-    VectorCursor cursor = new QueryableIndexStorageAdapter(index).makeVectorCursor(
+    final QueryableIndexStorageAdapter storageAdapter = new QueryableIndexStorageAdapter(index);
+    VectorCursor cursor = storageAdapter.makeVectorCursor(
         null,
         index.getDataInterval(),
         virtualColumns,
@@ -176,40 +183,55 @@ public class ExpressionVectorSelectorsTest
         null
     );
 
-    VectorValueSelector selector = null;
-    VectorObjectSelector objectSelector = null;
-    if (outputType.isNumeric()) {
-      selector = cursor.getColumnSelectorFactory().makeValueSelector("v");
-    } else {
-      objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v");
-    }
+    ColumnCapabilities capabilities = virtualColumns.getColumnCapabilities(storageAdapter, "v");
+
     int rowCount = 0;
-    while (!cursor.isDone()) {
-      boolean[] nulls;
-      switch (outputType) {
-        case LONG:
-          nulls = selector.getNullVector();
-          long[] longs = selector.getLongVector();
-          for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
-            results.add(nulls != null && nulls[i] ? null : longs[i]);
-          }
-          break;
-        case DOUBLE:
-          nulls = selector.getNullVector();
-          double[] doubles = selector.getDoubleVector();
-          for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
-            results.add(nulls != null && nulls[i] ? null : doubles[i]);
-          }
-          break;
-        case STRING:
-          Object[] objects = objectSelector.getObjectVector();
-          for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++, rowCount++) {
-            results.add(objects[i]);
-          }
-          break;
+    if (capabilities.isDictionaryEncoded().isTrue()) {
+      SingleValueDimensionVectorSelector selector = cursor.getColumnSelectorFactory().makeSingleValueDimensionSelector(
+          DefaultDimensionSpec.of("v")
+      );
+      while (!cursor.isDone()) {
+        int[] row = selector.getRowVector();
+        for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
+          results.add(selector.lookupName(row[i]));
+        }
+        cursor.advance();
+      }
+    } else {
+      VectorValueSelector selector = null;
+      VectorObjectSelector objectSelector = null;
+      if (outputType.isNumeric()) {
+        selector = cursor.getColumnSelectorFactory().makeValueSelector("v");
+      } else {
+        objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v");
       }
+      while (!cursor.isDone()) {
+        boolean[] nulls;
+        switch (outputType) {
+          case LONG:
+            nulls = selector.getNullVector();
+            long[] longs = selector.getLongVector();
+            for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
+              results.add(nulls != null && nulls[i] ? null : longs[i]);
+            }
+            break;
+          case DOUBLE:
+            nulls = selector.getNullVector();
+            double[] doubles = selector.getDoubleVector();
+            for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
+              results.add(nulls != null && nulls[i] ? null : doubles[i]);
+            }
+            break;
+          case STRING:
+            Object[] objects = objectSelector.getObjectVector();
+            for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++, rowCount++) {
+              results.add(objects[i]);
+            }
+            break;
+        }
 
-      cursor.advance();
+        cursor.advance();
+      }
     }
     closer.register(cursor);
 
@@ -224,10 +246,15 @@ public class ExpressionVectorSelectorsTest
 
     int rowCountCursor = cursors
         .map(nonVectorized -> {
-          final ColumnValueSelector nonSelector = nonVectorized.getColumnSelectorFactory().makeColumnValueSelector("v");
+          final ColumnValueSelector nonSelector = nonVectorized.getColumnSelectorFactory()
+                                                               .makeColumnValueSelector("v");
           int rows = 0;
           while (!nonVectorized.isDone()) {
-            Assert.assertEquals(StringUtils.format("Failed at row %s", rows), nonSelector.getObject(), results.get(rows));
+            Assert.assertEquals(
+                StringUtils.format("Failed at row %s", rows),
+                nonSelector.getObject(),
+                results.get(rows)
+            );
             rows++;
             nonVectorized.advance();
           }
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
index 874e745..4717937 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
@@ -92,6 +92,7 @@ public class SqlVectorizedExpressionSanityTest extends InitializedNullHandlingTe
       "SELECT SUM(PARSE_LONG(string1)) FROM foo",
       "SELECT SUM(PARSE_LONG(string3)) FROM foo",
       "SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
+      "SELECT TIME_FLOOR(__time, 'PT1H'), string2, SUM(long1 * double4) FROM foo WHERE string2 = '10' GROUP BY 1,2 ORDER BY 3",
       "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1",
       "SELECT TIME_FLOOR(__time, 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 2",
       "SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1H'), SUM(long1 * long4) FROM foo GROUP BY 1 ORDER BY 1",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org