You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by GitBox <gi...@apache.org> on 2018/11/13 17:36:34 UTC

[GitHub] gianm closed pull request #6599: Optimization for expressions that hit a single long column.

gianm closed pull request #6599: Optimization for expressions that hit a single long column.
URL: https://github.com/apache/incubator-druid/pull/6599
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java
index 5b6d8a3d9c7..9953c0e3a40 100644
--- a/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java
+++ b/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java
@@ -80,7 +80,16 @@ public void setup()
   {
     final BenchmarkSchemaInfo schemaInfo = new BenchmarkSchemaInfo(
         ImmutableList.of(
-            BenchmarkColumnSchema.makeNormal("n", ValueType.LONG, false, 1, 0d, 0d, 10000d, false),
+            BenchmarkColumnSchema.makeZipf(
+                "n",
+                ValueType.LONG,
+                false,
+                1,
+                0d,
+                1000,
+                10000,
+                3d
+            ),
             BenchmarkColumnSchema.makeZipf(
                 "s",
                 ValueType.STRING,
@@ -146,10 +155,7 @@ public void timeFloorUsingExpression(Blackhole blackhole)
     final List<?> results = cursors
         .map(cursor -> {
           final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
-          while (!cursor.isDone()) {
-            blackhole.consume(selector.getLong());
-            cursor.advance();
-          }
+          consumeLong(cursor, selector, blackhole);
           return null;
         })
         .toList();
@@ -219,6 +225,71 @@ public void timeFloorUsingCursor(Blackhole blackhole)
     blackhole.consume(count);
   }
 
+  @Benchmark
+  public void timeFormatUsingExpression(Blackhole blackhole)
+  {
+    final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
+        null,
+        index.getDataInterval(),
+        VirtualColumns.create(
+            ImmutableList.of(
+                new ExpressionVirtualColumn(
+                    "v",
+                    "timestamp_format(__time, 'yyyy-MM-dd')",
+                    ValueType.STRING,
+                    TestExprMacroTable.INSTANCE
+                )
+            )
+        ),
+        Granularities.ALL,
+        false,
+        null
+    );
+
+    final List<?> results = cursors
+        .map(cursor -> {
+          final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(
+              DefaultDimensionSpec.of("v")
+          );
+          consumeDimension(cursor, selector, blackhole);
+          return null;
+        })
+        .toList();
+
+    blackhole.consume(results);
+  }
+
+  @Benchmark
+  public void timeFormatUsingExtractionFn(Blackhole blackhole)
+  {
+    final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
+        null,
+        index.getDataInterval(),
+        VirtualColumns.EMPTY,
+        Granularities.ALL,
+        false,
+        null
+    );
+
+    final List<?> results = cursors
+        .map(cursor -> {
+          final DimensionSelector selector = cursor
+              .getColumnSelectorFactory()
+              .makeDimensionSelector(
+                  new ExtractionDimensionSpec(
+                      ColumnHolder.TIME_COLUMN_NAME,
+                      "v",
+                      new TimeFormatExtractionFn("yyyy-MM-dd", null, null, null, false)
+                  )
+              );
+          consumeDimension(cursor, selector, blackhole);
+          return null;
+        })
+        .toList();
+
+    blackhole.consume(results);
+  }
+
   @Benchmark
   public void strlenUsingExpressionAsLong(Blackhole blackhole)
   {
@@ -312,6 +383,70 @@ public void strlenUsingExtractionFn(Blackhole blackhole)
     blackhole.consume(results);
   }
 
+  @Benchmark
+  public void arithmeticOnLong(Blackhole blackhole)
+  {
+    final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
+        null,
+        index.getDataInterval(),
+        VirtualColumns.create(
+            ImmutableList.of(
+                new ExpressionVirtualColumn(
+                    "v",
+                    "n + 1",
+                    ValueType.LONG,
+                    TestExprMacroTable.INSTANCE
+                )
+            )
+        ),
+        Granularities.ALL,
+        false,
+        null
+    );
+
+    final List<?> results = cursors
+        .map(cursor -> {
+          final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
+          consumeLong(cursor, selector, blackhole);
+          return null;
+        })
+        .toList();
+
+    blackhole.consume(results);
+  }
+
+  @Benchmark
+  public void stringConcatAndCompareOnLong(Blackhole blackhole)
+  {
+    final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
+        null,
+        index.getDataInterval(),
+        VirtualColumns.create(
+            ImmutableList.of(
+                new ExpressionVirtualColumn(
+                    "v",
+                    "concat(n, ' is my favorite number') == '3 is my favorite number'",
+                    ValueType.LONG,
+                    TestExprMacroTable.INSTANCE
+                )
+            )
+        ),
+        Granularities.ALL,
+        false,
+        null
+    );
+
+    final List<?> results = cursors
+        .map(cursor -> {
+          final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
+          consumeLong(cursor, selector, blackhole);
+          return null;
+        })
+        .toList();
+
+    blackhole.consume(results);
+  }
+
   private void consumeDimension(final Cursor cursor, final DimensionSelector selector, final Blackhole blackhole)
   {
     if (selector.getValueCardinality() >= 0) {
diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java
index 1fb709ec44c..46904205d25 100644
--- a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java
+++ b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java
@@ -27,6 +27,7 @@
 import org.apache.druid.data.input.InputRow;
 import org.apache.druid.data.input.impl.DimensionSchema;
 import org.apache.druid.data.input.impl.DimensionsSpec;
+import org.apache.druid.data.input.impl.DoubleDimensionSchema;
 import org.apache.druid.data.input.impl.FloatDimensionSchema;
 import org.apache.druid.data.input.impl.LongDimensionSchema;
 import org.apache.druid.data.input.impl.StringDimensionSchema;
@@ -99,6 +100,9 @@ public QueryableIndex generate(
           case LONG:
             dimensions.add(new LongDimensionSchema(columnSchema.getName()));
             break;
+          case DOUBLE:
+            dimensions.add(new DoubleDimensionSchema(columnSchema.getName()));
+            break;
           case FLOAT:
             dimensions.add(new FloatDimensionSchema(columnSchema.getName()));
             break;
diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java
index 60009122de1..2b649d47135 100644
--- a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java
+++ b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java
@@ -30,6 +30,10 @@
  */
 public abstract class ExprEval<T>
 {
+  // Cached String values. Protected so they can be used by subclasses.
+  private boolean stringValueValid = false;
+  private String stringValue;
+
   public static ExprEval ofLong(@Nullable Number longValue)
   {
     return new LongExprEval(longValue);
@@ -89,7 +93,7 @@ public static ExprEval bestEffortOf(@Nullable Object val)
   @Nullable
   final T value;
 
-  private ExprEval(T value)
+  private ExprEval(@Nullable T value)
   {
     this.value = value;
   }
@@ -115,7 +119,17 @@ public Object value()
   @Nullable
   public String asString()
   {
-    return value == null ? null : String.valueOf(value);
+    if (!stringValueValid) {
+      if (value == null) {
+        stringValue = null;
+      } else {
+        stringValue = String.valueOf(value);
+      }
+
+      stringValueValid = true;
+    }
+
+    return stringValue;
   }
 
   public abstract boolean asBoolean();
@@ -126,7 +140,6 @@ public String asString()
 
   private abstract static class NumericExprEval extends ExprEval<Number>
   {
-
     private NumericExprEval(@Nullable Number value)
     {
       super(value);
@@ -247,6 +260,16 @@ public Expr toExpr()
 
   private static class StringExprEval extends ExprEval<String>
   {
+    // Cached primitive values.
+    private boolean intValueValid = false;
+    private boolean longValueValid = false;
+    private boolean doubleValueValid = false;
+    private boolean booleanValueValid = false;
+    private int intValue;
+    private long longValue;
+    private double doubleValue;
+    private boolean booleanValue;
+
     private static final StringExprEval OF_NULL = new StringExprEval(null);
 
     private Number numericVal;
@@ -263,9 +286,48 @@ public final ExprType type()
     }
 
     @Override
-    public final int asInt()
+    public int asInt()
+    {
+      if (!intValueValid) {
+        intValue = computeInt();
+        intValueValid = true;
+      }
+
+      return intValue;
+    }
+
+    @Override
+    public long asLong()
+    {
+      if (!longValueValid) {
+        longValue = computeLong();
+        longValueValid = true;
+      }
+
+      return longValue;
+    }
+
+    @Override
+    public double asDouble()
+    {
+      if (!doubleValueValid) {
+        doubleValue = computeDouble();
+        doubleValueValid = true;
+      }
+
+      return doubleValue;
+    }
+
+    @Nullable
+    @Override
+    public String asString()
     {
-      Number number = asNumber();
+      return value;
+    }
+
+    private int computeInt()
+    {
+      Number number = computeNumber();
       if (number == null) {
         assert NullHandling.replaceWithDefault();
         return 0;
@@ -273,10 +335,9 @@ public final int asInt()
       return number.intValue();
     }
 
-    @Override
-    public final long asLong()
+    private long computeLong()
     {
-      Number number = asNumber();
+      Number number = computeNumber();
       if (number == null) {
         assert NullHandling.replaceWithDefault();
         return 0L;
@@ -284,10 +345,9 @@ public final long asLong()
       return number.longValue();
     }
 
-    @Override
-    public final double asDouble()
+    private double computeDouble()
     {
-      Number number = asNumber();
+      Number number = computeNumber();
       if (number == null) {
         assert NullHandling.replaceWithDefault();
         return 0.0d;
@@ -296,7 +356,7 @@ public final double asDouble()
     }
 
     @Nullable
-    private Number asNumber()
+    private Number computeNumber()
     {
       if (value == null) {
         return null;
@@ -321,13 +381,18 @@ private Number asNumber()
     @Override
     public boolean isNumericNull()
     {
-      return asNumber() == null;
+      return computeNumber() == null;
     }
 
     @Override
     public final boolean asBoolean()
     {
-      return Evals.asBoolean(value);
+      if (!booleanValueValid) {
+        booleanValue = Evals.asBoolean(value);
+        booleanValueValid = true;
+      }
+
+      return booleanValue;
     }
 
     @Override
@@ -335,9 +400,9 @@ public final ExprEval castTo(ExprType castTo)
     {
       switch (castTo) {
         case DOUBLE:
-          return ExprEval.ofDouble(asNumber());
+          return ExprEval.ofDouble(computeNumber());
         case LONG:
-          return ExprEval.ofLong(asNumber());
+          return ExprEval.ofLong(computeNumber());
         case STRING:
           return this;
       }
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
index 14429032ceb..b0d49bad47b 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
@@ -137,12 +137,12 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
       final String column = Iterables.getOnlyElement(columns);
       final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column);
 
-      if (column.equals(ColumnHolder.TIME_COLUMN_NAME)) {
-        // Optimization for expressions that hit the __time column and nothing else.
-        // May be worth applying this optimization to all long columns?
+      if (capabilities != null && capabilities.getType() == ValueType.LONG) {
+        // Optimization for expressions that hit one long column and nothing else.
         return new SingleLongInputCachingExpressionColumnValueSelector(
-            columnSelectorFactory.makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME),
-            expression
+            columnSelectorFactory.makeColumnValueSelector(column),
+            expression,
+            !ColumnHolder.TIME_COLUMN_NAME.equals(column) // __time doesn't need an LRU cache since it is sorted.
         );
       } else if (capabilities != null
                  && capabilities.getType() == ValueType.STRING
diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java
index f3ffe831861..fa002c02203 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java
@@ -20,6 +20,7 @@
 package org.apache.druid.segment.virtual;
 
 import com.google.common.base.Preconditions;
+import it.unimi.dsi.fastutil.longs.Long2ObjectLinkedOpenHashMap;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.math.expr.Expr;
 import org.apache.druid.math.expr.ExprEval;
@@ -28,6 +29,7 @@
 import org.apache.druid.segment.ColumnValueSelector;
 
 import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
 
 /**
  * Like {@link ExpressionColumnValueSelector}, but caches the most recently computed value and re-uses it in the case
@@ -35,30 +37,26 @@
  */
 public class SingleLongInputCachingExpressionColumnValueSelector implements ColumnValueSelector<ExprEval>
 {
-  enum Validity
-  {
-    NONE,
-    DOUBLE,
-    LONG,
-    EVAL
-  }
+  private static final int CACHE_SIZE = 1000;
 
   private final ColumnValueSelector selector;
   private final Expr expression;
   private final SingleInputBindings bindings = new SingleInputBindings();
 
-  // Last read input value
+  @Nullable
+  private final LruEvalCache lruEvalCache;
+
+  // Last read input value.
   private long lastInput;
 
-  // Last computed output values (validity determined by "validity" field)
-  private Validity validity = Validity.NONE;
-  private double lastDoubleOutput;
-  private long lastLongOutput;
-  private ExprEval lastEvalOutput;
+  // Last computed output value, or null if there is none.
+  @Nullable
+  private ExprEval lastOutput;
 
   public SingleLongInputCachingExpressionColumnValueSelector(
       final ColumnValueSelector selector,
-      final Expr expression
+      final Expr expression,
+      final boolean useLruCache
   )
   {
     // Verify expression has just one binding.
@@ -68,6 +66,7 @@ public SingleLongInputCachingExpressionColumnValueSelector(
 
     this.selector = Preconditions.checkNotNull(selector, "selector");
     this.expression = Preconditions.checkNotNull(expression, "expression");
+    this.lruEvalCache = useLruCache ? new LruEvalCache() : null;
   }
 
   @Override
@@ -80,59 +79,41 @@ public void inspectRuntimeShape(final RuntimeShapeInspector inspector)
   @Override
   public double getDouble()
   {
-    // No Assert for null handling as delegate selector already have it.
-    final long currentInput = selector.getLong();
-
-    if (lastInput == currentInput && validity == Validity.DOUBLE) {
-      return lastDoubleOutput;
-    } else {
-      final double output = eval(currentInput).asDouble();
-      lastInput = currentInput;
-      lastDoubleOutput = output;
-      validity = Validity.DOUBLE;
-      return output;
-    }
+    return getObject().asDouble();
   }
 
   @Override
   public float getFloat()
   {
-    // No Assert for null handling as delegate selector already have it.
-    return (float) getDouble();
+    return (float) getObject().asDouble();
   }
 
   @Override
   public long getLong()
   {
-    // No Assert for null handling as delegate selector already have it.
-    final long currentInput = selector.getLong();
-
-    if (lastInput == currentInput && validity == Validity.LONG) {
-      return lastLongOutput;
-    } else {
-      final long output = eval(currentInput).asLong();
-      lastInput = currentInput;
-      lastLongOutput = output;
-      validity = Validity.LONG;
-      return output;
-    }
+    return getObject().asLong();
   }
 
   @Nonnull
   @Override
   public ExprEval getObject()
   {
-    final long currentInput = selector.getLong();
-
-    if (lastInput == currentInput && validity == Validity.EVAL) {
-      return lastEvalOutput;
-    } else {
-      final ExprEval output = eval(currentInput);
-      lastInput = currentInput;
-      lastEvalOutput = output;
-      validity = Validity.EVAL;
-      return output;
+    // No assert for null handling, as the delegate selector already has it.
+    final long input = selector.getLong();
+    final boolean cached = input == lastInput && lastOutput != null;
+
+    if (!cached) {
+      if (lruEvalCache == null) {
+        bindings.set(input);
+        lastOutput = expression.eval(bindings);
+      } else {
+        lastOutput = lruEvalCache.compute(input);
+      }
+
+      lastInput = input;
     }
+
+    return lastOutput;
   }
 
   @Override
@@ -141,12 +122,6 @@ public ExprEval getObject()
     return ExprEval.class;
   }
 
-  private ExprEval eval(final long value)
-  {
-    bindings.set(value);
-    return expression.eval(bindings);
-  }
-
   @Override
   public boolean isNull()
   {
@@ -155,4 +130,26 @@ public boolean isNull()
     // ExprEval.isNumericNull checks whether the parsed primitive value is null or not.
     return getObject().isNumericNull();
   }
+
+  public class LruEvalCache
+  {
+    private final Long2ObjectLinkedOpenHashMap<ExprEval> m = new Long2ObjectLinkedOpenHashMap<>();
+
+    public ExprEval compute(final long n)
+    {
+      ExprEval value = m.getAndMoveToFirst(n);
+
+      if (value == null) {
+        bindings.set(n);
+        value = expression.eval(bindings);
+        m.putAndMoveToFirst(n, value);
+
+        if (m.size() > CACHE_SIZE) {
+          m.removeLast();
+        }
+      }
+
+      return value;
+    }
+  }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org