You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by li...@apache.org on 2022/03/04 13:24:24 UTC

[calcite] 20/41: [CALCITE-4994] SQL-to-RelNode conversion is slow if table contains hundreds of fields

This is an automated email from the ASF dual-hosted git repository.

liyafan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git

commit bb89b92b6aa78f777fb385b9ba28ec27bd5c1974
Author: Jay Narale <ja...@dremio.com>
AuthorDate: Tue Jan 25 18:14:42 2022 +0900

    [CALCITE-4994] SQL-to-RelNode conversion is slow if table contains hundreds of fields
    
    If a table contains hundreds or thousands of fields,
    SQL-to-RelNode conversion is slow because the operation to
    lookup a field within a record type is O(n) in the number of
    fields and is called O(n) times. This manifests in the method
    SqlToRelConverter.Blackboard.lookupExp.
    
    The solution we adopted is to add a map in RelRecordType
    from field names (case-sensitive) to fields (Julian Hyde).
    
    We hope that this map will improve performance in other
    parts of the planning process besides SQL-to-RelNode (e.g.
    validation and rewrite rules).
    
    If the record has 20 (RelRecordType.THRESHOLD) or fewer
    fields, the map is not populated. We believe that this saves
    memory and effort for the common case, small to medium-sized
    records.
    
    In SqlToRelConverter.Blackboard, change the contract of
    method lookupExp. It would previously return null if a field
    was not found, whereupon the caller would throw; now the
    method throws, and is declared not-nullable. The method
    previously returned a Map from field names to field ordinals,
    and now returns a Function that can convert a field name to
    an expression accessing that field; the new contract is
    easier to implement efficiently with available knowledge.
    
    Add a benchmark, RelNodeConversionBenchmark, that
    demonstrates improvement for tables with 100 and 1,000
    fields (Jay Narale).
    
    Close apache/calcite#2701
    
    Co-authored-by: Julian Hyde <jh...@apache.org>
---
 .../calcite/rel/type/RelDataTypeFieldImpl.java     |   8 +-
 .../apache/calcite/rel/type/RelDataTypeImpl.java   |  44 ++++-
 .../org/apache/calcite/rel/type/RelRecordType.java |  28 +++
 .../apache/calcite/sql2rel/SqlToRelConverter.java  |  51 +++--
 ubenchmark/README.md                               |  16 +-
 .../benchmarks/RelNodeConversionBenchmark.java     | 208 +++++++++++++++++++++
 6 files changed, 307 insertions(+), 48 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeFieldImpl.java b/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeFieldImpl.java
index 8ea986b..f11f9ec 100644
--- a/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeFieldImpl.java
+++ b/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeFieldImpl.java
@@ -23,6 +23,8 @@ import org.checkerframework.checker.nullness.qual.Nullable;
 import java.io.Serializable;
 import java.util.Objects;
 
+import static java.util.Objects.requireNonNull;
+
 /**
  * Default implementation of {@link RelDataTypeField}.
  */
@@ -42,11 +44,9 @@ public class RelDataTypeFieldImpl implements RelDataTypeField, Serializable {
       String name,
       int index,
       RelDataType type) {
-    assert name != null;
-    assert type != null;
-    this.name = name;
+    this.name = requireNonNull(name, "name");
     this.index = index;
-    this.type = type;
+    this.type = requireNonNull(type, "type");
   }
 
   //~ Methods ----------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeImpl.java b/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeImpl.java
index 181cf53..0b0bbac 100644
--- a/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeImpl.java
+++ b/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeImpl.java
@@ -35,6 +35,7 @@ import java.io.Serializable;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 
 import static org.apache.calcite.linq4j.Nullness.castNonNull;
@@ -91,16 +92,24 @@ public abstract class RelDataTypeImpl
 
   //~ Methods ----------------------------------------------------------------
 
-  @Override public @Nullable RelDataTypeField getField(String fieldName, boolean caseSensitive,
-      boolean elideRecord) {
+  @Override public @Nullable RelDataTypeField getField(String fieldName,
+      boolean caseSensitive, boolean elideRecord) {
     if (fieldList == null) {
       throw new IllegalStateException("Trying to access field " + fieldName
           + " in a type with no fields: " + this);
     }
-    for (RelDataTypeField field : fieldList) {
-      if (Util.matches(caseSensitive, field.getName(), fieldName)) {
+    final Map<String, RelDataTypeField> fieldMap = getFieldMap();
+    if (caseSensitive && fieldMap != null) {
+      RelDataTypeField field = fieldMap.get(fieldName);
+      if (field != null) {
         return field;
       }
+    } else {
+      for (RelDataTypeField field : fieldList) {
+        if (Util.matches(caseSensitive, field.getName(), fieldName)) {
+          return field;
+        }
+      }
     }
     if (elideRecord) {
       final List<Slot> slots = new ArrayList<>();
@@ -127,16 +136,35 @@ public abstract class RelDataTypeImpl
     }
 
     // a dynamic * field will match any field name.
-    for (RelDataTypeField field : fieldList) {
-      if (field.isDynamicStar()) {
-        // the requested field could be in the unresolved star
-        return field;
+    if (fieldMap != null) {
+      return fieldMap.get("");
+    } else {
+      for (RelDataTypeField field : fieldList) {
+        if (field.isDynamicStar()) {
+          // the requested field could be in the unresolved star
+          return field;
+        }
       }
     }
 
     return null;
   }
 
+  /** Returns a map from field names to fields.
+   *
+   * <p>Matching is case-sensitive.
+   *
+   * <p>If several fields have the same name, the map contains the first.
+   *
+   * <p>A {@link RelDataTypeField#isDynamicStar() dynamic star field} is indexed
+   * under its own name and "" (the empty string).
+   *
+   * <p>If the map is null, the type must do lookup the long way.
+   */
+  protected @Nullable Map<String, RelDataTypeField> getFieldMap() {
+    return null;
+  }
+
   private static void getFieldRecurse(List<Slot> slots, RelDataType type,
       int depth, String fieldName, boolean caseSensitive) {
     while (slots.size() <= depth) {
diff --git a/core/src/main/java/org/apache/calcite/rel/type/RelRecordType.java b/core/src/main/java/org/apache/calcite/rel/type/RelRecordType.java
index c6cd873..7fc4ebb 100644
--- a/core/src/main/java/org/apache/calcite/rel/type/RelRecordType.java
+++ b/core/src/main/java/org/apache/calcite/rel/type/RelRecordType.java
@@ -19,8 +19,14 @@ package org.apache.calcite.rel.type;
 import org.apache.calcite.linq4j.Ord;
 import org.apache.calcite.sql.type.SqlTypeName;
 
+import com.google.common.collect.ImmutableMap;
+
+import org.checkerframework.checker.nullness.qual.Nullable;
+
 import java.io.Serializable;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 import static java.util.Objects.requireNonNull;
 
@@ -31,6 +37,11 @@ public class RelRecordType extends RelDataTypeImpl implements Serializable {
   /** Name resolution policy; usually {@link StructKind#FULLY_QUALIFIED}. */
   private final StructKind kind;
   private final boolean nullable;
+  private final @Nullable Map<String, RelDataTypeField> fieldNameMap;
+
+  /** Minimum number of fields where it is worth populating {@link #fieldNameMap}
+   * to accelerate lookups by field name. */
+  private static final int THRESHOLD = 20;
 
   //~ Constructors -----------------------------------------------------------
 
@@ -45,6 +56,19 @@ public class RelRecordType extends RelDataTypeImpl implements Serializable {
     super(fields);
     this.nullable = nullable;
     this.kind = requireNonNull(kind, "kind");
+    if (fields.size() > THRESHOLD) {
+      final Map<String, RelDataTypeField> map = new HashMap<>();
+      for (RelDataTypeField f : fields) {
+        map.putIfAbsent(f.getName(), f);
+        if (f.isDynamicStar()) {
+          // the first dynamic star field is cached with a special name
+          map.putIfAbsent("", f);
+        }
+      }
+      this.fieldNameMap = ImmutableMap.copyOf(map);
+    } else {
+      this.fieldNameMap = null;
+    }
     computeDigest();
   }
 
@@ -85,6 +109,10 @@ public class RelRecordType extends RelDataTypeImpl implements Serializable {
     return kind;
   }
 
+  @Override protected @Nullable Map<String, RelDataTypeField> getFieldMap() {
+    return fieldNameMap;
+  }
+
   @Override protected void generateTypeString(StringBuilder sb, boolean withDetail) {
     sb.append("RecordType");
     switch (kind) {
diff --git a/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java b/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java
index 673c6ac..9d06f37 100644
--- a/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java
+++ b/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java
@@ -4106,16 +4106,12 @@ public class SqlToRelConverter {
     } else {
       qualified = SqlQualified.create(null, 1, null, identifier);
     }
-    final Pair<RexNode, @Nullable Map<String, Integer>> e0 = requireNonNull(
-        bb.lookupExp(qualified),
-        () -> "no expression found for " + qualified);
+    final Pair<RexNode, @Nullable BiFunction<RexNode, String, RexNode>> e0 =
+        bb.lookupExp(qualified);
     RexNode e = e0.left;
     for (String name : qualified.suffix()) {
       if (e == e0.left && e0.right != null) {
-        Integer i = requireNonNull(
-            e0.right.get(name),
-            () -> "e0.right.get(name) produced null for " + name);
-        e = rexBuilder.makeFieldAccess(e, i);
+        e = e0.right.apply(e, name);
       } else {
         final boolean caseSensitive = true; // name already fully-qualified
         if (identifier.isStar() && bb.scope instanceof MatchRecognizeScope) {
@@ -4779,13 +4775,14 @@ public class SqlToRelConverter {
     }
 
     /**
-     * Returns an expression with which to reference a from-list item.
+     * Returns an expression with which to reference a from-list item;
+     * throws if not found.
      *
-     * @param qualified the alias of the from item
-     * @return a {@link RexFieldAccess} or {@link RexRangeRef}, or null if
-     * not found
+     * @param qualified The alias of the FROM item
+     * @return a {@link RexFieldAccess} or {@link RexRangeRef}, never null
      */
-    @Nullable Pair<RexNode, @Nullable Map<String, Integer>> lookupExp(SqlQualified qualified) {
+    Pair<RexNode, @Nullable BiFunction<RexNode, String, RexNode>> lookupExp(
+        SqlQualified qualified) {
       if (nameToNodeMap != null && qualified.prefixLength == 1) {
         RexNode node = nameToNodeMap.get(qualified.identifier.names.get(0));
         if (node == null) {
@@ -4799,8 +4796,9 @@ public class SqlToRelConverter {
       final SqlValidatorScope.ResolvedImpl resolved =
           new SqlValidatorScope.ResolvedImpl();
       scope().resolve(qualified.prefix(), nameMatcher, false, resolved);
-      if (!(resolved.count() == 1)) {
-        return null;
+      if (resolved.count() != 1) {
+        throw new AssertionError("no unique expression found for " + qualified
+            + "; count is " + resolved.count());
       }
       final SqlValidatorScope.Resolve resolve = resolved.only();
       final RelDataType rowType = resolve.rowType();
@@ -4814,18 +4812,13 @@ public class SqlToRelConverter {
         final LookupContext rels =
             new LookupContext(this, inputs, systemFieldList.size());
         final RexNode node = lookup(resolve.path.steps().get(0).i, rels);
-        if (node == null) {
-          return null;
-        } else {
-          final Map<String, Integer> fieldOffsets = new HashMap<>();
-          for (RelDataTypeField f : resolve.rowType().getFieldList()) {
-            if (!fieldOffsets.containsKey(f.getName())) {
-              fieldOffsets.put(f.getName(), f.getIndex());
-            }
-          }
-          final Map<String, Integer> map = ImmutableMap.copyOf(fieldOffsets);
-          return Pair.of(node, map);
-        }
+        assert node != null;
+        return Pair.of(node, (e, fieldName) -> {
+          final RelDataTypeField field =
+              requireNonNull(rowType.getField(fieldName, true, false),
+                  () -> "field " + fieldName);
+          return rexBuilder.makeFieldAccess(e, field.getIndex());
+        });
       } else {
         // We're referencing a relational expression which has not been
         // converted yet. This occurs when from items are correlated,
@@ -4857,7 +4850,11 @@ public class SqlToRelConverter {
           }
           final RexNode c =
               rexBuilder.makeCorrel(builder.uniquify().build(), correlId);
-          return Pair.of(c, fields.build());
+          final ImmutableMap<String, Integer> fieldMap = fields.build();
+          return Pair.of(c, (e, fieldName) -> {
+            final int j = requireNonNull(fieldMap.get(fieldName), "field " + fieldName);
+            return rexBuilder.makeFieldAccess(e, j);
+          });
         }
       }
     }
diff --git a/ubenchmark/README.md b/ubenchmark/README.md
index e7673ab..17b1163 100644
--- a/ubenchmark/README.md
+++ b/ubenchmark/README.md
@@ -28,27 +28,23 @@ Calcite artifacts. (Besides, jmh's license does not allow that.)
 
 To run all benchmarks:
 
-{noformat}bash
-$ cd calcite
-$ ./gradlew :ubenchmark:jmh
-{noformat}
+    cd calcite
+    ./gradlew :ubenchmark:jmh
 
 ## Running one benchmark from the command line
 
 To run just one benchmark, modify `ubenchmark/build.gradle.kts` and add the
 following task:
 
-{noformat}kotlin
+```kotlin
 jmh {
     include = listOf("removeAllVertices.*Benchmark")
 }
-{noformat}
+```
 
 and run
 
-{noformat}bash
-$ ./gradlew :ubenchmark:jmh
-{noformat}
+    ./gradlew :ubenchmark:jmh
 
 as before. In this case, `removeAllVertices.*Benchmark` is a
 regular expression that matches a few methods -- benchmarks -- in
@@ -74,3 +70,5 @@ case and link them here:
   [3836](https://issues.apache.org/jira/browse/CALCITE-3836)
 * ReflectVisitorDispatcherTest:
   [3873](https://issues.apache.org/jira/browse/CALCITE-3873)
+* RelNodeConversionBenchmark:
+  [4994](https://issues.apache.org/jira/browse/CALCITE-4994)
diff --git a/ubenchmark/src/jmh/java/org/apache/calcite/benchmarks/RelNodeConversionBenchmark.java b/ubenchmark/src/jmh/java/org/apache/calcite/benchmarks/RelNodeConversionBenchmark.java
new file mode 100644
index 0000000..a6efb55
--- /dev/null
+++ b/ubenchmark/src/jmh/java/org/apache/calcite/benchmarks/RelNodeConversionBenchmark.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.benchmarks;
+
+import org.apache.calcite.adapter.java.AbstractQueryableTable;
+import org.apache.calcite.config.Lex;
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.Linq4j;
+import org.apache.calcite.linq4j.QueryProvider;
+import org.apache.calcite.linq4j.Queryable;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.schema.impl.AbstractTable;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.parser.SqlParser;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.tools.FrameworkConfig;
+import org.apache.calcite.tools.Frameworks;
+import org.apache.calcite.tools.Planner;
+import org.apache.calcite.tools.Programs;
+
+import com.google.common.collect.ImmutableList;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.profile.GCProfiler;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.util.List;
+import java.util.Locale;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Benchmarks Conversion of Sql To RelNode and conversion of SqlNode to RelNode.
+ */
+@Fork(value = 1, jvmArgsPrepend = "-Xmx2048m")
+@Measurement(iterations = 10, time = 100, timeUnit = TimeUnit.MILLISECONDS)
+@Warmup(iterations = 10, time = 100, timeUnit = TimeUnit.MILLISECONDS)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+@Threads(1)
+public class RelNodeConversionBenchmark {
+
+  /**
+   * A common state needed for this benchmark.
+   */
+  public abstract static class RelNodeConversionBenchmarkState {
+    String sql;
+    Planner p;
+
+    public void setup(int length, int columnLength) {
+      // Create Sql
+      StringBuilder sb = new StringBuilder();
+      sb.append("select 1 ");
+      Random rnd = new Random();
+      rnd.setSeed(424242);
+      for (int i = 0; i < length; i++) {
+        sb.append(", ");
+        sb.append(
+            String.format(Locale.ROOT, "c%s / CASE WHEN c%s > %d THEN c%s ELSE c%s END ",
+                String.valueOf(rnd.nextInt(columnLength)), String.valueOf(i % columnLength),
+                rnd.nextInt(columnLength), String.valueOf(rnd.nextInt(columnLength)),
+                String.valueOf(rnd.nextInt(columnLength)))
+        );
+      }
+      sb.append(" FROM test1");
+      sql = sb.toString();
+
+      // Create Schema and Table
+
+      AbstractTable t = new AbstractQueryableTable(Integer.class) {
+        List<Integer> items = ImmutableList.of();
+        final Enumerable<Integer> enumerable = Linq4j.asEnumerable(items);
+
+        @Override public <E> Queryable<E> asQueryable(
+            QueryProvider queryProvider, SchemaPlus schema, String tableName) {
+          return (Queryable<E>) enumerable.asQueryable();
+        }
+
+        @Override public RelDataType getRowType(RelDataTypeFactory typeFactory) {
+          RelDataTypeFactory.Builder builder = typeFactory.builder();
+          for (int i = 0; i < columnLength; i++) {
+            builder.add(String.format(Locale.ROOT, "c%d", i), SqlTypeName.INTEGER);
+          }
+          return builder.build();
+        }
+      };
+
+      // Create Planner
+      final SchemaPlus schema = Frameworks.createRootSchema(true);
+      schema.add("test1", t);
+
+      final FrameworkConfig config = Frameworks.newConfigBuilder()
+          .parserConfig(SqlParser.config().withLex(Lex.MYSQL))
+          .defaultSchema(schema)
+          .programs(Programs.ofRules(Programs.RULE_SET))
+          .build();
+      p = Frameworks.getPlanner(config);
+    }
+  }
+
+  /**
+   * A state holding information needed to parse.
+   */
+  @State(Scope.Thread)
+  public static class SqlToRelNodeBenchmarkState extends RelNodeConversionBenchmarkState {
+    @Param({"10000"})
+    int length;
+
+    @Param({"10", "100", "1000"})
+    int columnLength;
+
+    @Setup(Level.Iteration)
+    public void setUp() {
+      super.setup(length, columnLength);
+    }
+
+    public RelNode parse() throws Exception {
+      SqlNode n = p.parse(sql);
+      n = p.validate(n);
+      RelNode rel = p.rel(n).project();
+      p.close();
+      p.reset();
+      return rel;
+    }
+  }
+
+  @Benchmark
+  public RelNode parse(SqlToRelNodeBenchmarkState state) throws Exception {
+    return state.parse();
+  }
+
+  /**
+   * A state holding information needed to convert To Rel.
+   */
+  @State(Scope.Thread)
+  public static class SqlNodeToRelNodeBenchmarkState extends RelNodeConversionBenchmarkState {
+    @Param({"10000"})
+    int length;
+
+    @Param({"10", "100", "1000"})
+    int columnLength;
+    SqlNode sqlNode;
+
+    @Setup(Level.Iteration)
+    public void setUp() {
+      super.setup(length, columnLength);
+      try {
+        sqlNode = p.validate(p.parse(sql));
+      } catch (Exception e) {
+        e.printStackTrace();
+      }
+    }
+
+    public RelNode convertToRel() throws Exception {
+      return p.rel(sqlNode).project();
+    }
+  }
+
+  @Benchmark
+  public RelNode convertToRel(SqlNodeToRelNodeBenchmarkState state) throws Exception {
+    return state.convertToRel();
+  }
+
+  public static void main(String[] args) throws RunnerException {
+    Options opt = new OptionsBuilder()
+        .include(RelNodeConversionBenchmark.class.getSimpleName())
+        .addProfiler(GCProfiler.class)
+        .addProfiler(FlightRecorderProfiler.class)
+        .detectJvmArgs()
+        .build();
+
+    new Runner(opt).run();
+  }
+
+}