You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2020/03/11 23:09:09 UTC
[hive] branch master updated: HIVE-22962: Reuse HiveRelFieldTrimmer
instance across queries (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 0884899 HIVE-22962: Reuse HiveRelFieldTrimmer instance across queries (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
0884899 is described below
commit 0884899a7ba3c6b488a78a95428188df5c611bee
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Mon Mar 2 17:09:59 2020 -0800
HIVE-22962: Reuse HiveRelFieldTrimmer instance across queries (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
Close apache/hive#943
---
.../java/org/apache/hadoop/hive/cli/CliDriver.java | 4 +-
.../hive/benchmark/calcite/FieldTrimmerBench.java | 242 +++++
.../calcite/HiveDefaultRelMetadataProvider.java | 57 +-
.../ql/optimizer/calcite/HiveRelFactories.java | 3 +-
.../calcite/rules/HiveFieldTrimmerRule.java | 5 +-
.../optimizer/calcite/rules/HiveReflectUtil.java | 334 ++++++
.../calcite/rules/HiveRelFieldTrimmer.java | 132 ++-
.../optimizer/calcite/rules/RelFieldTrimmer.java | 1130 ++++++++++++++++++++
.../hadoop/hive/ql/parse/CalcitePlanner.java | 69 +-
.../calcite/TestCBORuleFiredOnlyOnce.java | 2 +-
.../apache/hive/service/server/HiveServer2.java | 4 +-
11 files changed, 1895 insertions(+), 87 deletions(-)
diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
index cfea602..0475264 100644
--- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
+++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
@@ -788,8 +788,8 @@ public class CliDriver {
ss.updateThreadName();
- // Initialize metadata provider class
- CalcitePlanner.initializeMetadataProviderClass();
+ // Initialize metadata provider class and trimmer
+ CalcitePlanner.warmup();
// Create views registry
HiveMaterializedViewsRegistry.get().init();
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/calcite/FieldTrimmerBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/calcite/FieldTrimmerBench.java
new file mode 100644
index 0000000..d98e251
--- /dev/null
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/calcite/FieldTrimmerBench.java
@@ -0,0 +1,242 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.benchmark.calcite;
+
+import com.google.common.collect.Lists;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.AbstractRelNode;
+import org.apache.calcite.rel.BiRel;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.SingleRel;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelRecordType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer;
+import org.apache.hadoop.hive.ql.parse.CalcitePlanner;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * This test measures the performance for field trimmer.
+ * <p/>
+ * This test uses JMH framework for benchmarking.
+ * You may execute this benchmark tool using JMH command line in different ways:
+ * <p/>
+ * To use the settings shown in the main() function, use:
+ * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.calcite.FieldTrimmerBench
+ * <p/>
+ * To use the default settings used by JMH, use:
+ * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.calcite.FieldTrimmerBench
+ */
+@State(Scope.Thread)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Fork(1)
+public class FieldTrimmerBench {
+
+ RelOptCluster relOptCluster;
+ RelBuilder relBuilder;
+ RelNode root;
+ org.apache.calcite.sql2rel.RelFieldTrimmer cft;
+ HiveRelFieldTrimmer ft;
+ HiveRelFieldTrimmer hft;
+
+ @Setup(Level.Trial)
+ public void initTrial() {
+ // Init cluster and builder
+ final RelOptPlanner planner = CalcitePlanner.createPlanner(new HiveConf());
+ final RexBuilder rexBuilder = new RexBuilder(
+ new JavaTypeFactoryImpl(new HiveTypeSystemImpl()));
+ relOptCluster = RelOptCluster.create(planner, rexBuilder);
+ relBuilder = HiveRelFactories.HIVE_BUILDER.create(relOptCluster, null);
+ // Create operator tree
+ DummyNode0 d0 = new DummyNode0(relOptCluster, relOptCluster.traitSet());
+ DummyNode1 d1 = new DummyNode1(relOptCluster, relOptCluster.traitSet());
+ DummyNode2 d2 = new DummyNode2(relOptCluster, relOptCluster.traitSet());
+ DummyNode3 d3 = new DummyNode3(relOptCluster, relOptCluster.traitSet());
+ DummyNode4 d4 = new DummyNode4(relOptCluster, relOptCluster.traitSet(), d0);
+ DummyNode5 d5 = new DummyNode5(relOptCluster, relOptCluster.traitSet(), d1);
+ DummyNode6 d6 = new DummyNode6(relOptCluster, relOptCluster.traitSet(), d2);
+ DummyNode7 d7 = new DummyNode7(relOptCluster, relOptCluster.traitSet(), d3);
+ DummyNode8 d8 = new DummyNode8(relOptCluster, relOptCluster.traitSet(), d4, d5);
+ DummyNode9 d9 = new DummyNode9(relOptCluster, relOptCluster.traitSet(), d6, d7);
+ root = new DummyNode9(relOptCluster, relOptCluster.traitSet(), d8, d9);
+ }
+
+ @Benchmark
+ @BenchmarkMode({Mode.Throughput, Mode.AverageTime})
+ @Warmup(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 10, time = 2, timeUnit = TimeUnit.SECONDS)
+ public void baseRelFieldTrimmer() {
+ // We initialize the field trimmer for every execution of the benchmark
+ cft = new org.apache.calcite.sql2rel.RelFieldTrimmer(null, relBuilder);
+ cft.trim(root);
+ cft = null;
+ }
+
+ @Benchmark
+ @BenchmarkMode({Mode.Throughput, Mode.AverageTime})
+ @Warmup(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 10, time = 2, timeUnit = TimeUnit.SECONDS)
+ public void modBaseRelFieldTrimmer() {
+ // We initialize the field trimmer for every execution of the benchmark
+ ft = HiveRelFieldTrimmer.get(false, false);
+ ft.trim(relBuilder, root);
+ ft = null;
+ }
+
+ @Benchmark
+ @BenchmarkMode({Mode.Throughput, Mode.AverageTime})
+ @Warmup(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 10, time = 2, timeUnit = TimeUnit.SECONDS)
+ public void hiveRelFieldTrimmer() {
+ // We initialize the field trimmer for every execution of the benchmark
+ hft = HiveRelFieldTrimmer.get(false);
+ hft.trim(relBuilder, root);
+ hft = null;
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder().include(".*" + FieldTrimmerBench.class.getSimpleName() +
+ ".*").build();
+ new Runner(opt).run();
+ }
+
+ // ~ 10 rel node classes to use in the benchmark.
+
+ private class DummyNode0 extends AbstractRelNode {
+ protected DummyNode0(RelOptCluster cluster, RelTraitSet traits) {
+ super(cluster, cluster.traitSet());
+ }
+
+ protected RelDataType deriveRowType() {
+ return new RelRecordType(Lists.newArrayList());
+ }
+ }
+
+ private class DummyNode1 extends AbstractRelNode {
+ protected DummyNode1(RelOptCluster cluster, RelTraitSet traits) {
+ super(cluster, cluster.traitSet());
+ }
+
+ protected RelDataType deriveRowType() {
+ return new RelRecordType(Lists.newArrayList());
+ }
+ }
+
+ private class DummyNode2 extends AbstractRelNode {
+ protected DummyNode2(RelOptCluster cluster, RelTraitSet traits) {
+ super(cluster, cluster.traitSet());
+ }
+
+ protected RelDataType deriveRowType() {
+ return new RelRecordType(Lists.newArrayList());
+ }
+ }
+
+ private class DummyNode3 extends AbstractRelNode {
+ protected DummyNode3(RelOptCluster cluster, RelTraitSet traits) {
+ super(cluster, cluster.traitSet());
+ }
+
+ protected RelDataType deriveRowType() {
+ return new RelRecordType(Lists.newArrayList());
+ }
+ }
+
+ private class DummyNode4 extends SingleRel {
+ protected DummyNode4(RelOptCluster cluster, RelTraitSet traits, RelNode input) {
+ super(cluster, cluster.traitSet(), input);
+ }
+
+ protected RelDataType deriveRowType() {
+ return new RelRecordType(Lists.newArrayList());
+ }
+ }
+
+ private class DummyNode5 extends SingleRel {
+ protected DummyNode5(RelOptCluster cluster, RelTraitSet traits, RelNode input) {
+ super(cluster, cluster.traitSet(), input);
+ }
+
+ protected RelDataType deriveRowType() {
+ return new RelRecordType(Lists.newArrayList());
+ }
+ }
+
+ private class DummyNode6 extends SingleRel {
+ protected DummyNode6(RelOptCluster cluster, RelTraitSet traits, RelNode input) {
+ super(cluster, cluster.traitSet(), input);
+ }
+
+ protected RelDataType deriveRowType() {
+ return new RelRecordType(Lists.newArrayList());
+ }
+ }
+
+ private class DummyNode7 extends SingleRel {
+ protected DummyNode7(RelOptCluster cluster, RelTraitSet traits, RelNode input) {
+ super(cluster, cluster.traitSet(), input);
+ }
+
+ protected RelDataType deriveRowType() {
+ return new RelRecordType(Lists.newArrayList());
+ }
+ }
+
+ private class DummyNode8 extends BiRel {
+ protected DummyNode8(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right) {
+ super(cluster, cluster.traitSet(), left, right);
+ }
+
+ protected RelDataType deriveRowType() {
+ return new RelRecordType(Lists.newArrayList());
+ }
+ }
+
+ private class DummyNode9 extends BiRel {
+ protected DummyNode9(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right) {
+ super(cluster, cluster.traitSet(), left, right);
+ }
+
+ protected RelDataType deriveRowType() {
+ return new RelRecordType(Lists.newArrayList());
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
index 7d55f64..7ad3214 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
@@ -94,53 +94,14 @@ public class HiveDefaultRelMetadataProvider {
HiveRelMdPredicates.SOURCE,
JaninoRelMetadataProvider.DEFAULT)));
- /**
- * This is the list of operators that are specifically used in Hive and
- * should be loaded by the metadata providers.
- */
- private static final List<Class<? extends RelNode>> HIVE_REL_NODE_CLASSES =
- ImmutableList.of(
- RelNode.class,
- AbstractRelNode.class,
- RelSubset.class,
- HepRelVertex.class,
- ConverterImpl.class,
- AbstractConverter.class,
-
- HiveTableScan.class,
- HiveAggregate.class,
- HiveExcept.class,
- HiveFilter.class,
- HiveIntersect.class,
- HiveJoin.class,
- HiveMultiJoin.class,
- HiveProject.class,
- HiveRelNode.class,
- HiveSemiJoin.class,
- HiveSortExchange.class,
- HiveSortLimit.class,
- HiveTableFunctionScan.class,
- HiveUnion.class,
-
- DruidQuery.class,
-
- HiveJdbcConverter.class,
- JdbcHiveTableScan.class,
- JdbcAggregate.class,
- JdbcFilter.class,
- JdbcJoin.class,
- JdbcProject.class,
- JdbcSort.class,
- JdbcUnion.class);
-
private final RelMetadataProvider metadataProvider;
- public HiveDefaultRelMetadataProvider(HiveConf hiveConf) {
- this.metadataProvider = init(hiveConf);
+ public HiveDefaultRelMetadataProvider(HiveConf hiveConf, List<Class<? extends RelNode>> nodeClasses) {
+ this.metadataProvider = init(hiveConf, nodeClasses);
}
- private RelMetadataProvider init(HiveConf hiveConf) {
+ private RelMetadataProvider init(HiveConf hiveConf, List<Class<? extends RelNode>> nodeClasses) {
// Create cost metadata provider
if (HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")
&& HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_EXTENDED_COST_MODEL)) {
@@ -167,7 +128,10 @@ public class HiveDefaultRelMetadataProvider {
HiveRelMdPredicates.SOURCE,
JaninoRelMetadataProvider.DEFAULT)));
- metadataProvider.register(HIVE_REL_NODE_CLASSES);
+ if (nodeClasses != null) {
+ // If classes were passed, pre-register them
+ metadataProvider.register(nodeClasses);
+ }
return metadataProvider;
}
@@ -184,11 +148,10 @@ public class HiveDefaultRelMetadataProvider {
* additional Hive classes (compared to Calcite core classes) that may
* be visited during the planning phase.
*/
- public static void initializeMetadataProviderClass() {
+ public static void initializeMetadataProviderClass(List<Class<? extends RelNode>> nodeClasses) {
// This will register the classes in the default Janino implementation
- JaninoRelMetadataProvider.DEFAULT.register(
- HiveDefaultRelMetadataProvider.HIVE_REL_NODE_CLASSES);
+ JaninoRelMetadataProvider.DEFAULT.register(nodeClasses);
// This will register the classes in the default Hive implementation
- DEFAULT.register(HiveDefaultRelMetadataProvider.HIVE_REL_NODE_CLASSES);
+ DEFAULT.register(nodeClasses);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
index f71d3f0..04b3888 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
@@ -79,7 +79,8 @@ public class HiveRelFactories {
public static final RelBuilderFactory HIVE_BUILDER =
HiveRelBuilder.proto(
- Contexts.of(HIVE_PROJECT_FACTORY,
+ Contexts.of(
+ HIVE_PROJECT_FACTORY,
HIVE_FILTER_FACTORY,
HIVE_JOIN_FACTORY,
HIVE_SEMI_JOIN_FACTORY,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java
index ac050df..73ff1bc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java
@@ -63,9 +63,8 @@ public class HiveFieldTrimmerRule extends RelOptRule {
final HepPlanner tmpPlanner = new HepPlanner(PROGRAM);
tmpPlanner.setRoot(node);
node = tmpPlanner.findBestExp();
- final HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null,
- relBuilderFactory.create(node.getCluster(), null), fetchStats);
- call.transformTo(fieldTrimmer.trim(node));
+ call.transformTo(
+ HiveRelFieldTrimmer.get(fetchStats).trim(call.builder(), node));
triggered = true;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReflectUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReflectUtil.java
new file mode 100644
index 0000000..5e327da
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReflectUtil.java
@@ -0,0 +1,334 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import com.google.common.collect.ImmutableList;
+import java.lang.invoke.CallSite;
+import java.lang.invoke.LambdaMetafactory;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import org.apache.calcite.util.ReflectUtil;
+import org.apache.calcite.util.ReflectUtil.MethodDispatcher;
+import org.apache.calcite.util.ReflectiveVisitor;
+
+/**
+ * Static utilities for Java reflection. This is based on Calcite
+ * {@link ReflectUtil}. It contains methods to wrap a Calcite dispatcher
+ * (based on reflection) into a Hive dispatcher as well as a Hive
+ * dispatcher implementation based on {@link LambdaMetafactory}.
+ */
+public class HiveReflectUtil {
+
+ /**
+ * Creates a Hive dispatcher that wraps a Calcite one.
+ */
+ protected static <T, E> MethodDispatcherWrapper<T, E> createCalciteMethodDispatcherWrapper(
+ final MethodDispatcher<T> methodDispatcher) {
+ return new MethodDispatcherWrapper<>(methodDispatcher);
+ }
+
+ /**
+ * Creates a dispatcher for calls to a single multi-method on a particular
+ * object.
+ *
+ * <p>Calls to that multi-method are resolved by looking for a method on
+ * the runtime type of that object, with the required name, and with
+ * the correct type or a subclass for the first argument, and precisely the
+ * same types for other arguments.
+ *
+ * <p>For instance, a dispatcher created for the method
+ *
+ * <blockquote>String foo(Vehicle, int, List)</blockquote>
+ *
+ * <p>could be used to call the methods
+ *
+ * <blockquote>String foo(Car, int, List)<br>
+ * String foo(Bus, int, List)</blockquote>
+ *
+ * <p>(because Car and Bus are subclasses of Vehicle, and they occur in the
+ * polymorphic first argument) but not the method
+ *
+ * <blockquote>String foo(Car, int, ArrayList)</blockquote>
+ *
+ * <p>(only the first argument is polymorphic).
+ *
+ * <p>You must create an implementation of the method for the base class.
+ * Otherwise throws {@link IllegalArgumentException}.
+ *
+ * @param returnClazz Return type of method
+ * @param visitor Object on which to invoke the method
+ * @param methodName Name of method
+ * @param arg0Clazz Base type of argument zero
+ * @param otherArgClasses Types of remaining arguments
+ */
+ protected static <E, T> HiveMethodDispatcher<T, E> createMethodDispatcher(
+ final Class<T> returnClazz,
+ final ReflectiveVisitor visitor,
+ final String methodName,
+ final Class<E> arg0Clazz,
+ final Class... otherArgClasses) {
+ final List<Class> otherArgClassList =
+ ImmutableList.copyOf(otherArgClasses);
+ final VisitDispatcher<ReflectiveVisitor, E> dispatcher =
+ createDispatcher((Class<ReflectiveVisitor>) visitor.getClass(), arg0Clazz);
+ return new HiveMethodDispatcher<>(dispatcher, returnClazz, visitor, methodName,
+ arg0Clazz, otherArgClassList);
+ }
+
+ /**
+ * Creates a dispatcher for calls to {@link VisitDispatcher#lookupVisitFunc}. The
+ * dispatcher caches methods between invocations and it is thread-safe.
+ *
+ * @param visitorBaseClazz Visitor base class
+ * @param visiteeBaseClazz Visitee base class
+ * @return cache of methods
+ */
+ private static <R extends ReflectiveVisitor, E> VisitDispatcher<R, E> createDispatcher(
+ final Class<R> visitorBaseClazz,
+ final Class<E> visiteeBaseClazz) {
+ assert ReflectiveVisitor.class.isAssignableFrom(visitorBaseClazz);
+ assert Object.class.isAssignableFrom(visiteeBaseClazz);
+ return new VisitDispatcher<>();
+ }
+
+ private static Class<? extends VarArgsFunc> getVarArgsFuncClass(int length) {
+ switch (length) {
+ case 1:
+ return VarArgsFunc1.class;
+ case 2:
+ return VarArgsFunc2.class;
+ case 3:
+ return VarArgsFunc3.class;
+ case 4:
+ return VarArgsFunc4.class;
+ default:
+ throw new RuntimeException("Unsupported function with length " + length);
+ }
+ }
+
+ private static VarArgsFunc getVarArgsFunc(int length, CallSite site) throws Throwable {
+ switch (length) {
+ case 1:
+ return (VarArgsFunc1) site.getTarget().invokeExact();
+ case 2:
+ return (VarArgsFunc2) site.getTarget().invokeExact();
+ case 3:
+ return (VarArgsFunc3) site.getTarget().invokeExact();
+ case 4:
+ return (VarArgsFunc4) site.getTarget().invokeExact();
+ default:
+ throw new RuntimeException("Unsupported function with length " + length);
+ }
+ }
+
+ protected static class VisitDispatcher<R extends ReflectiveVisitor, E> {
+ final Map<List<Object>, VarArgsFunc> map = new ConcurrentHashMap<>();
+
+ public VarArgsFunc lookupVisitFunc(
+ Class<? extends R> visitorClass,
+ Class<? extends E> visiteeClass,
+ String visitMethodName,
+ List<Class> additionalParameterTypes)
+ throws Throwable {
+ final List<Object> key =
+ ImmutableList.of(
+ visitorClass,
+ visiteeClass,
+ visitMethodName,
+ additionalParameterTypes);
+ VarArgsFunc method = map.get(key);
+ if (method == null) {
+ if (map.containsKey(key)) {
+ // We already looked for the method and found nothing.
+ } else {
+ Method method1 =
+ ReflectUtil.lookupVisitMethod(
+ visitorClass,
+ visiteeClass,
+ visitMethodName,
+ additionalParameterTypes);
+ MethodHandles.Lookup lookup = MethodHandles.lookup();
+ MethodHandle methodHandle = lookup.unreflect(method1);
+ int argsLength = 1 + method1.getParameterTypes().length;
+ MethodType invokedType = MethodType.methodType(
+ getVarArgsFuncClass(argsLength));
+ MethodType functionMethodType = MethodType.methodType(
+ method1.getReturnType(), visitorClass, method1.getParameterTypes());
+ CallSite site = LambdaMetafactory.metafactory(
+ lookup,
+ "apply",
+ invokedType,
+ functionMethodType.generic(),
+ methodHandle,
+ methodHandle.type());
+ method = getVarArgsFunc(argsLength, site);
+ map.put(key, method);
+ }
+ }
+ return method;
+ }
+ }
+
+ protected static class HiveMethodDispatcher<T, E> implements ClassMethodDispatcher<T, E> {
+
+ private final VisitDispatcher<ReflectiveVisitor, E> dispatcher;
+ private final Class<T> returnClazz;
+ private final ReflectiveVisitor visitor;
+ private final String methodName;
+ private final Class<E> arg0Clazz;
+ private final List<Class> otherArgClassList;
+
+ public HiveMethodDispatcher (
+ final VisitDispatcher<ReflectiveVisitor, E> dispatcher,
+ final Class<T> returnClazz,
+ final ReflectiveVisitor visitor,
+ final String methodName,
+ final Class<E> arg0Clazz,
+ final List<Class> otherArgClassList) {
+ this.dispatcher = dispatcher;
+ this.returnClazz = returnClazz;
+ this.visitor = visitor;
+ this.methodName = methodName;
+ this.arg0Clazz = arg0Clazz;
+ this.otherArgClassList = otherArgClassList;
+ }
+
+ @Override
+ public T invoke(Object... args) {
+ VarArgsFunc method = null;
+ try {
+ method = lookupVisitFunc(args[0]);
+ final Object o = method.apply(visitor, args[0], args[1], args[2]);
+ return returnClazz.cast(o);
+ } catch (Throwable e) {
+ throw new RuntimeException("While invoking method " +
+ (method != null ? "'" + method + "'" : ""),
+ e);
+ }
+ }
+
+ private VarArgsFunc lookupVisitFunc(final Object arg0) throws Throwable {
+ if (!arg0Clazz.isInstance(arg0)) {
+ throw new IllegalArgumentException();
+ }
+ VarArgsFunc method =
+ dispatcher.lookupVisitFunc(
+ visitor.getClass(),
+ (Class<? extends E>) arg0.getClass(),
+ methodName,
+ otherArgClassList);
+ if (method == null) {
+ List<Class> classList = new ArrayList<>();
+ classList.add(arg0Clazz);
+ classList.addAll(otherArgClassList);
+ throw new IllegalArgumentException("Method not found: " + methodName
+ + "(" + classList + ")");
+ }
+ return method;
+ }
+
+ @Override
+ public void register(Iterable<Class<? extends E>> classes)
+ throws Throwable {
+ for (Class<? extends E> c : classes) {
+ VarArgsFunc method =
+ dispatcher.lookupVisitFunc(
+ visitor.getClass(),
+ c,
+ methodName,
+ otherArgClassList);
+ if (method == null) {
+ List<Class> classList = new ArrayList<>();
+ classList.add(arg0Clazz);
+ classList.addAll(otherArgClassList);
+ throw new IllegalArgumentException("Method not found: " + methodName
+ + "(" + classList + ")");
+ }
+ }
+ }
+ }
+
+ private static class MethodDispatcherWrapper<T, E> implements ClassMethodDispatcher<T, E> {
+
+ private final MethodDispatcher<T> methodDispatcher;
+
+ public MethodDispatcherWrapper (
+ final MethodDispatcher<T> methodDispatcher) {
+ this.methodDispatcher = methodDispatcher;
+ }
+
+ @Override
+ public T invoke(Object... args) {
+ return this.methodDispatcher.invoke(args);
+ }
+ }
+
+ public interface ClassMethodDispatcher<T, E> extends MethodDispatcher<T> {
+ default void register(Iterable<Class<? extends E>> classes) throws Throwable {
+ // Do nothing by default
+ }
+ }
+
+ @FunctionalInterface
+ private interface VarArgsFunc1<T, R> extends VarArgsFunc<R> {
+ default R apply(Object... args) {
+ return apply((T) args[0]);
+ }
+
+ R apply(T t);
+ }
+
+ @FunctionalInterface
+ private interface VarArgsFunc2<T, U, R> extends VarArgsFunc<R> {
+ default R apply(Object... args) {
+ return apply((T) args[0], (U) args[1]);
+ }
+
+ R apply(T t, U u);
+ }
+
+ @FunctionalInterface
+ private interface VarArgsFunc3<T, U, V, R> extends VarArgsFunc<R> {
+ default R apply(Object... args) {
+ return apply((T) args[0], (U) args[1], (V) args[2]);
+ }
+
+ R apply(T t, U u, V v);
+ }
+
+ @FunctionalInterface
+ private interface VarArgsFunc4<T, U, V, W, R> extends VarArgsFunc<R> {
+ default R apply(Object... args) {
+ return apply((T) args[0], (U) args[1], (V) args[2], (W) args[3]);
+ }
+
+ R apply(T t, U u, V v, W w);
+ }
+
+ private interface VarArgsFunc<R> {
+ default R apply(Object... args) {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
index d218fac..53d68e8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
@@ -52,9 +52,7 @@ import org.apache.calcite.rex.RexTableInputRef;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexVisitor;
import org.apache.calcite.sql.SqlKind;
-import org.apache.calcite.sql.validate.SqlValidator;
import org.apache.calcite.sql2rel.CorrelationReferenceFinder;
-import org.apache.calcite.sql2rel.RelFieldTrimmer;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Pair;
@@ -81,29 +79,92 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
protected static final Logger LOG = LoggerFactory.getLogger(HiveRelFieldTrimmer.class);
- private ColumnAccessInfo columnAccessInfo;
- private Map<HiveProject, Table> viewProjectToTableSchema;
- private final RelBuilder relBuilder;
+ // We initialize the field trimmer statically here and we will reuse it across
+ // queries. The reason is that otherwise we will create a new dispatcher with
+ // each instantiation, thus effectively removing the caching mechanism that is
+ // built within the dispatcher.
+ private static final HiveRelFieldTrimmer FIELD_TRIMMER_STATS =
+ new HiveRelFieldTrimmer(true);
+ private static final HiveRelFieldTrimmer FIELD_TRIMMER_NO_STATS =
+ new HiveRelFieldTrimmer(false);
+ // For testing
+ private static final HiveRelFieldTrimmer FIELD_TRIMMER_STATS_METHOD_DISPATCHER =
+ new HiveRelFieldTrimmer(true, false);
+ private static final HiveRelFieldTrimmer FIELD_TRIMMER_NO_STATS_METHOD_DISPATCHER =
+ new HiveRelFieldTrimmer(false, false);
+
private final boolean fetchStats;
- public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder) {
- this(validator, relBuilder, false);
- }
+ private static final ThreadLocal<ColumnAccessInfo> COLUMN_ACCESS_INFO =
+ new ThreadLocal<>();
+ private static final ThreadLocal<Map<HiveProject, Table>> VIEW_PROJECT_TO_TABLE_SCHEMA =
+ new ThreadLocal<>();
- public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder,
- ColumnAccessInfo columnAccessInfo, Map<HiveProject, Table> viewToTableSchema) {
- this(validator, relBuilder, false);
- this.columnAccessInfo = columnAccessInfo;
- this.viewProjectToTableSchema = viewToTableSchema;
+
+ private HiveRelFieldTrimmer(boolean fetchStats) {
+ this(fetchStats, true);
}
- public HiveRelFieldTrimmer(SqlValidator validator, RelBuilder relBuilder, boolean fetchStats) {
- super(validator, relBuilder);
- this.relBuilder = relBuilder;
+ private HiveRelFieldTrimmer(boolean fetchStats, boolean useLMFBasedDispatcher) {
+ super(useLMFBasedDispatcher);
this.fetchStats = fetchStats;
}
/**
+ * Returns a HiveRelFieldTrimmer instance that does not retrieve
+ * stats.
+ */
+ public static HiveRelFieldTrimmer get() {
+ return get(false);
+ }
+
+ /**
+ * Returns a HiveRelFieldTrimmer instance that can retrieve stats.
+ */
+ public static HiveRelFieldTrimmer get(boolean fetchStats) {
+ return get(fetchStats, true);
+ }
+
+ /**
+ * Returns a HiveRelFieldTrimmer instance that can retrieve stats and use
+ * a custom dispatcher.
+ */
+ public static HiveRelFieldTrimmer get(boolean fetchStats, boolean useLMFBasedDispatcher) {
+ return fetchStats ?
+ (useLMFBasedDispatcher ? FIELD_TRIMMER_STATS : FIELD_TRIMMER_STATS_METHOD_DISPATCHER) :
+ (useLMFBasedDispatcher ? FIELD_TRIMMER_NO_STATS : FIELD_TRIMMER_NO_STATS_METHOD_DISPATCHER);
+ }
+
+ /**
+ * Trims unused fields from a relational expression.
+ *
+ * <p>We presume that all fields of the relational expression are wanted by
+ * its consumer, so only trim fields that are not used within the tree.
+ *
+ * @param root Root node of relational expression
+ * @return Trimmed relational expression
+ */
+ @Override
+ public RelNode trim(RelBuilder relBuilder, RelNode root) {
+ return trim(relBuilder, root, null, null);
+ }
+
+ public RelNode trim(RelBuilder relBuilder, RelNode root,
+ ColumnAccessInfo columnAccessInfo, Map<HiveProject, Table> viewToTableSchema) {
+ try {
+ // Set local thread variables
+ COLUMN_ACCESS_INFO.set(columnAccessInfo);
+ VIEW_PROJECT_TO_TABLE_SCHEMA.set(viewToTableSchema);
+ // Execute pruning
+ return super.trim(relBuilder, root);
+ } finally {
+ // Always remove the local thread variables to avoid leaks
+ COLUMN_ACCESS_INFO.remove();
+ VIEW_PROJECT_TO_TABLE_SCHEMA.remove();
+ }
+ }
+
+ /**
* Trims the fields of an input relational expression.
*
* @param rel Relational expression
@@ -251,7 +312,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
return trimFields(
(RelNode) dq, fieldsUsed, extraFields);
}
- final RelNode newTableAccessRel = project(dq, fieldsUsed, extraFields, relBuilder);
+ final RelNode newTableAccessRel = project(dq, fieldsUsed, extraFields, REL_BUILDER.get());
// Some parts of the system can't handle rows with zero fields, so
// pretend that one field is used.
@@ -512,6 +573,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
newProjects.add(rexBuilder.makeInputRef(input, i));
}
}
+ final RelBuilder relBuilder = REL_BUILDER.get();
relBuilder.push(input);
relBuilder.project(newProjects);
Aggregate newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(),
@@ -641,6 +703,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
}
// Now create new agg calls, and populate mapping for them.
+ final RelBuilder relBuilder = REL_BUILDER.get();
relBuilder.push(newInput);
final List<RelBuilder.AggCall> newAggCallList = new ArrayList<>();
j = originalGroupCount; // because lookup in fieldsUsed is done using original group count
@@ -675,12 +738,14 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
// set columnAccessInfo for ViewColumnAuthorization
- if (this.columnAccessInfo != null && this.viewProjectToTableSchema != null
- && this.viewProjectToTableSchema.containsKey(project)) {
+ final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
+ final Map<HiveProject, Table> viewProjectToTableSchema = VIEW_PROJECT_TO_TABLE_SCHEMA.get();
+ if (columnAccessInfo != null && viewProjectToTableSchema != null
+ && viewProjectToTableSchema.containsKey(project)) {
for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
if (fieldsUsed.get(ord.i)) {
- Table tab = this.viewProjectToTableSchema.get(project);
- this.columnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName());
+ Table tab = viewProjectToTableSchema.get(project);
+ columnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName());
}
}
}
@@ -690,7 +755,8 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
public TrimResult trimFields(HiveTableScan tableAccessRel, ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
final TrimResult result = super.trimFields(tableAccessRel, fieldsUsed, extraFields);
- if (this.columnAccessInfo != null) {
+ final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
+ if (columnAccessInfo != null) {
// Store information about column accessed by the table so it can be used
// to send only this information for column masking
final RelOptHiveTable tab = (RelOptHiveTable) tableAccessRel.getTable();
@@ -789,4 +855,26 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
Mapping mapping = Mappings.createIdentity(fieldCount);
return result(newTabFun, mapping);
}
+
+ /**
+ * This method can be called to pre-register all the classes that may be
+ * visited during the planning phase.
+ */
+ protected void register(List<Class<? extends RelNode>> nodeClasses) throws Throwable {
+ this.trimFieldsDispatcher.register(nodeClasses);
+ }
+
+ /**
+ * This method can be called at startup time to pre-register all the
+ * Hive classes that may be visited during the planning phase.
+ */
+ public static void initializeFieldTrimmerClass(List<Class<? extends RelNode>> nodeClasses) {
+ try {
+ FIELD_TRIMMER_STATS.register(nodeClasses);
+ FIELD_TRIMMER_NO_STATS.register(nodeClasses);
+ } catch (Throwable t) {
+ // LOG it but do not fail
+ LOG.warn("Error initializing field trimmer instance", t);
+ }
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java
new file mode 100644
index 0000000..77cf75b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java
@@ -0,0 +1,1130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.linq4j.Ord;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollations;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.core.CorrelationId;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.SetOp;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.logical.LogicalTableFunctionScan;
+import org.apache.calcite.rel.logical.LogicalTableModify;
+import org.apache.calcite.rel.logical.LogicalValues;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rel.type.RelDataTypeImpl;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCorrelVariable;
+import org.apache.calcite.rex.RexDynamicParam;
+import org.apache.calcite.rex.RexFieldAccess;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexPermuteInputsShuttle;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.rex.RexVisitor;
+import org.apache.calcite.sql.SqlExplainFormat;
+import org.apache.calcite.sql.SqlExplainLevel;
+import org.apache.calcite.sql2rel.CorrelationReferenceFinder;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.Bug;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
+import org.apache.calcite.util.ReflectUtil;
+import org.apache.calcite.util.ReflectiveVisitor;
+import org.apache.calcite.util.Util;
+import org.apache.calcite.util.mapping.IntPair;
+import org.apache.calcite.util.mapping.Mapping;
+import org.apache.calcite.util.mapping.MappingType;
+import org.apache.calcite.util.mapping.Mappings;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class comes from Calcite almost as-is. The only change concerns
+ * the dispatcher and the builder, so the trimmer is thread-safe and can
+ * be reused across different queries. Definition follows.
+ *
+ * <p>Transformer that walks over a tree of relational expressions, replacing each
+ * {@link RelNode} with a 'slimmed down' relational expression that projects
+ * only the columns required by its consumer.
+ *
+ * <p>Uses multi-methods to fire the right rule for each type of relational
+ * expression. This allows the transformer to be extended without having to
+ * add a new method to RelNode, and without requiring a collection of rule
+ * classes scattered to the four winds.
+ */
+public class RelFieldTrimmer implements ReflectiveVisitor {
+ //~ Static fields/initializers ---------------------------------------------
+
+ private static final Logger LOG = LoggerFactory.getLogger(RelFieldTrimmer.class);
+
+ protected static final ThreadLocal<RelBuilder> REL_BUILDER =
+ new ThreadLocal<>();
+
+ //~ Instance fields --------------------------------------------------------
+
+ protected final HiveReflectUtil.ClassMethodDispatcher<TrimResult, RelNode> trimFieldsDispatcher;
+
+ //~ Constructors -----------------------------------------------------------
+
+ /**
+ * Creates a RelFieldTrimmer.
+ * @param useLMFBasedDispatcher True if we want to create a dispatcher based on
+ * {@link java.lang.invoke.LambdaMetafactory} that is
+ * thread-safe, or false if we want to create a
+ * Calcite dispatcher based on reflection that is not
+ * thread-safe. False should only be used for
+ * testing/benchmarking purposes
+ */
+ protected RelFieldTrimmer(boolean useLMFBasedDispatcher) {
+ if (useLMFBasedDispatcher) {
+ this.trimFieldsDispatcher =
+ HiveReflectUtil.createMethodDispatcher(
+ TrimResult.class,
+ this,
+ "trimFields",
+ RelNode.class,
+ ImmutableBitSet.class,
+ Set.class);
+ } else {
+ this.trimFieldsDispatcher =
+ HiveReflectUtil.createCalciteMethodDispatcherWrapper(
+ ReflectUtil.createMethodDispatcher(
+ TrimResult.class,
+ this,
+ "trimFields",
+ RelNode.class,
+ ImmutableBitSet.class,
+ Set.class));
+ }
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ /**
+ * Trims unused fields from a relational expression.
+ *
+ * <p>We presume that all fields of the relational expression are wanted by
+ * its consumer, so only trim fields that are not used within the tree.
+ *
+ * @param relBuilder Rel builder
+ * @param root Root node of relational expression
+ * @return Trimmed relational expression
+ */
+ public RelNode trim(RelBuilder relBuilder, RelNode root) {
+ try {
+ REL_BUILDER.set(relBuilder);
+ final int fieldCount = root.getRowType().getFieldCount();
+ final ImmutableBitSet fieldsUsed = ImmutableBitSet.range(fieldCount);
+ final Set<RelDataTypeField> extraFields = Collections.emptySet();
+ final TrimResult trimResult =
+ dispatchTrimFields(root, fieldsUsed, extraFields);
+ if (!trimResult.right.isIdentity()) {
+ throw new IllegalArgumentException();
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ RelOptUtil.dumpPlan("Plan after trimming unused fields",
+ trimResult.left, SqlExplainFormat.TEXT,
+ SqlExplainLevel.EXPPLAN_ATTRIBUTES));
+ }
+ return trimResult.left;
+ } finally {
+ REL_BUILDER.remove();
+ }
+ }
+
+ /**
+ * Trims the fields of an input relational expression.
+ *
+ * @param rel Relational expression
+ * @param input Input relational expression, whose fields to trim
+ * @param fieldsUsed Bitmap of fields needed by the consumer
+ * @return New relational expression and its field mapping
+ */
+ protected TrimResult trimChild(
+ RelNode rel,
+ RelNode input,
+ final ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ final ImmutableBitSet.Builder fieldsUsedBuilder = fieldsUsed.rebuild();
+
+ // Fields that define the collation cannot be discarded.
+ final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
+ final ImmutableList<RelCollation> collations = mq.collations(input);
+ for (RelCollation collation : collations) {
+ for (RelFieldCollation fieldCollation : collation.getFieldCollations()) {
+ fieldsUsedBuilder.set(fieldCollation.getFieldIndex());
+ }
+ }
+
+ // Correlating variables are a means for other relational expressions to use
+ // fields.
+ for (final CorrelationId correlation : rel.getVariablesSet()) {
+ rel.accept(
+ new CorrelationReferenceFinder() {
+ protected RexNode handle(RexFieldAccess fieldAccess) {
+ final RexCorrelVariable v =
+ (RexCorrelVariable) fieldAccess.getReferenceExpr();
+ if (v.id.equals(correlation)) {
+ fieldsUsedBuilder.set(fieldAccess.getField().getIndex());
+ }
+ return fieldAccess;
+ }
+ });
+ }
+
+ return dispatchTrimFields(input, fieldsUsedBuilder.build(), extraFields);
+ }
+
+ /**
+ * Trims a child relational expression, then adds back a dummy project to
+ * restore the fields that were removed.
+ *
+ * <p>Sounds pointless? It causes unused fields to be removed
+ * further down the tree (towards the leaves), but it ensure that the
+ * consuming relational expression continues to see the same fields.
+ *
+ * @param rel Relational expression
+ * @param input Input relational expression, whose fields to trim
+ * @param fieldsUsed Bitmap of fields needed by the consumer
+ * @return New relational expression and its field mapping
+ */
+ protected TrimResult trimChildRestore(
+ RelNode rel,
+ RelNode input,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ TrimResult trimResult = trimChild(rel, input, fieldsUsed, extraFields);
+ if (trimResult.right.isIdentity()) {
+ return trimResult;
+ }
+ final RelDataType rowType = input.getRowType();
+ List<RelDataTypeField> fieldList = rowType.getFieldList();
+ final List<RexNode> exprList = new ArrayList<>();
+ final List<String> nameList = rowType.getFieldNames();
+ RexBuilder rexBuilder = rel.getCluster().getRexBuilder();
+ assert trimResult.right.getSourceCount() == fieldList.size();
+ for (int i = 0; i < fieldList.size(); i++) {
+ int source = trimResult.right.getTargetOpt(i);
+ RelDataTypeField field = fieldList.get(i);
+ exprList.add(
+ source < 0
+ ? rexBuilder.makeZeroLiteral(field.getType())
+ : rexBuilder.makeInputRef(field.getType(), source));
+ }
+ final RelBuilder relBuilder = REL_BUILDER.get();
+ relBuilder.push(trimResult.left)
+ .project(exprList, nameList);
+ return result(relBuilder.build(),
+ Mappings.createIdentity(fieldList.size()));
+ }
+
+ /**
+ * Invokes {@link #trimFields}, or the appropriate method for the type
+ * of the rel parameter, using multi-method dispatch.
+ *
+ * @param rel Relational expression
+ * @param fieldsUsed Bitmap of fields needed by the consumer
+ * @return New relational expression and its field mapping
+ */
+ protected final TrimResult dispatchTrimFields(
+ RelNode rel,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ final TrimResult trimResult =
+ trimFieldsDispatcher.invoke(rel, fieldsUsed, extraFields);
+ final RelNode newRel = trimResult.left;
+ final Mapping mapping = trimResult.right;
+ final int fieldCount = rel.getRowType().getFieldCount();
+ assert mapping.getSourceCount() == fieldCount
+ : "source: " + mapping.getSourceCount() + " != " + fieldCount;
+ final int newFieldCount = newRel.getRowType().getFieldCount();
+ assert mapping.getTargetCount() + extraFields.size() == newFieldCount
+ || Bug.TODO_FIXED
+ : "target: " + mapping.getTargetCount()
+ + " + " + extraFields.size()
+ + " != " + newFieldCount;
+ if (Bug.TODO_FIXED) {
+ assert newFieldCount > 0 : "rel has no fields after trim: " + rel;
+ }
+ if (newRel.equals(rel)) {
+ return result(rel, mapping);
+ }
+ return trimResult;
+ }
+
+ protected TrimResult result(RelNode r, final Mapping mapping) {
+ final RelBuilder relBuilder = REL_BUILDER.get();
+ final RexBuilder rexBuilder = relBuilder.getRexBuilder();
+ for (final CorrelationId correlation : r.getVariablesSet()) {
+ r = r.accept(
+ new CorrelationReferenceFinder() {
+ protected RexNode handle(RexFieldAccess fieldAccess) {
+ final RexCorrelVariable v =
+ (RexCorrelVariable) fieldAccess.getReferenceExpr();
+ if (v.id.equals(correlation)
+ && v.getType().getFieldCount() == mapping.getSourceCount()) {
+ final int old = fieldAccess.getField().getIndex();
+ final int new_ = mapping.getTarget(old);
+ final RelDataTypeFactory.Builder typeBuilder =
+ relBuilder.getTypeFactory().builder();
+ for (int target : Util.range(mapping.getTargetCount())) {
+ typeBuilder.add(
+ v.getType().getFieldList().get(mapping.getSource(target)));
+ }
+ final RexNode newV =
+ rexBuilder.makeCorrel(typeBuilder.build(), v.id);
+ if (old != new_) {
+ return rexBuilder.makeFieldAccess(newV, new_);
+ }
+ }
+ return fieldAccess;
+ }
+ });
+ }
+ return new TrimResult(r, mapping);
+ }
+
+ /**
+ * Visit method, per {@link org.apache.calcite.util.ReflectiveVisitor}.
+ *
+ * <p>This method is invoked reflectively, so there may not be any apparent
+ * calls to it. The class (or derived classes) may contain overloads of
+ * this method with more specific types for the {@code rel} parameter.
+ *
+ * <p>Returns a pair: the relational expression created, and the mapping
+ * between the original fields and the fields of the newly created
+ * relational expression.
+ *
+ * @param rel Relational expression
+ * @param fieldsUsed Fields needed by the consumer
+ * @return relational expression and mapping
+ */
+ public TrimResult trimFields(
+ RelNode rel,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ // We don't know how to trim this kind of relational expression, so give
+ // it back intact.
+ Util.discard(fieldsUsed);
+ return result(rel,
+ Mappings.createIdentity(rel.getRowType().getFieldCount()));
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+ * {@link org.apache.calcite.rel.logical.LogicalProject}.
+ */
+ public TrimResult trimFields(
+ Project project,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ final RelDataType rowType = project.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ final RelNode input = project.getInput();
+
+ // Which fields are required from the input?
+ final Set<RelDataTypeField> inputExtraFields =
+ new LinkedHashSet<>(extraFields);
+ RelOptUtil.InputFinder inputFinder =
+ new RelOptUtil.InputFinder(inputExtraFields);
+ for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
+ if (fieldsUsed.get(ord.i)) {
+ ord.e.accept(inputFinder);
+ }
+ }
+ ImmutableBitSet inputFieldsUsed = inputFinder.inputBitSet.build();
+
+ // Create input with trimmed columns.
+ TrimResult trimResult =
+ trimChild(project, input, inputFieldsUsed, inputExtraFields);
+ RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+
+ // If the input is unchanged, and we need to project all columns,
+ // there's nothing we can do.
+ if (newInput == input
+ && fieldsUsed.cardinality() == fieldCount) {
+ return result(project, Mappings.createIdentity(fieldCount));
+ }
+
+ // Some parts of the system can't handle rows with zero fields, so
+ // pretend that one field is used.
+ if (fieldsUsed.cardinality() == 0) {
+ return dummyProject(fieldCount, newInput);
+ }
+
+ // Build new project expressions, and populate the mapping.
+ final List<RexNode> newProjects = new ArrayList<>();
+ final RexVisitor<RexNode> shuttle =
+ new RexPermuteInputsShuttle(
+ inputMapping, newInput);
+ final Mapping mapping =
+ Mappings.create(
+ MappingType.INVERSE_SURJECTION,
+ fieldCount,
+ fieldsUsed.cardinality());
+ for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
+ if (fieldsUsed.get(ord.i)) {
+ mapping.set(ord.i, newProjects.size());
+ RexNode newProjectExpr = ord.e.accept(shuttle);
+ newProjects.add(newProjectExpr);
+ }
+ }
+
+ final RelDataType newRowType =
+ RelOptUtil.permute(project.getCluster().getTypeFactory(), rowType,
+ mapping);
+
+ final RelBuilder relBuilder = REL_BUILDER.get();
+ relBuilder.push(newInput);
+ relBuilder.project(newProjects, newRowType.getFieldNames());
+ return result(relBuilder.build(), mapping);
+ }
+
+ /** Creates a project with a dummy column, to protect the parts of the system
+ * that cannot handle a relational expression with no columns.
+ *
+ * @param fieldCount Number of fields in the original relational expression
+ * @param input Trimmed input
+ * @return Dummy project, or null if no dummy is required
+ */
+ protected TrimResult dummyProject(int fieldCount, RelNode input) {
+ final RelOptCluster cluster = input.getCluster();
+ final Mapping mapping =
+ Mappings.create(MappingType.INVERSE_SURJECTION, fieldCount, 1);
+ if (input.getRowType().getFieldCount() == 1) {
+ // Input already has one field (and may in fact be a dummy project we
+ // created for the child). We can't do better.
+ return result(input, mapping);
+ }
+ final RexLiteral expr =
+ cluster.getRexBuilder().makeExactLiteral(BigDecimal.ZERO);
+ final RelBuilder relBuilder = REL_BUILDER.get();
+ relBuilder.push(input);
+ relBuilder.project(ImmutableList.<RexNode>of(expr), ImmutableList.of("DUMMY"));
+ return result(relBuilder.build(), mapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+ * {@link org.apache.calcite.rel.logical.LogicalFilter}.
+ */
+ public TrimResult trimFields(
+ Filter filter,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ final RelDataType rowType = filter.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ final RexNode conditionExpr = filter.getCondition();
+ final RelNode input = filter.getInput();
+
+ // We use the fields used by the consumer, plus any fields used in the
+ // filter.
+ final Set<RelDataTypeField> inputExtraFields =
+ new LinkedHashSet<>(extraFields);
+ RelOptUtil.InputFinder inputFinder =
+ new RelOptUtil.InputFinder(inputExtraFields);
+ inputFinder.inputBitSet.addAll(fieldsUsed);
+ conditionExpr.accept(inputFinder);
+ final ImmutableBitSet inputFieldsUsed = inputFinder.inputBitSet.build();
+
+ // Create input with trimmed columns.
+ TrimResult trimResult =
+ trimChild(filter, input, inputFieldsUsed, inputExtraFields);
+ RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+
+ // If the input is unchanged, and we need to project all columns,
+ // there's nothing we can do.
+ if (newInput == input
+ && fieldsUsed.cardinality() == fieldCount) {
+ return result(filter, Mappings.createIdentity(fieldCount));
+ }
+
+ // Build new project expressions, and populate the mapping.
+ final RexVisitor<RexNode> shuttle =
+ new RexPermuteInputsShuttle(inputMapping, newInput);
+ RexNode newConditionExpr =
+ conditionExpr.accept(shuttle);
+
+ // Build new filter with trimmed input and condition.
+ final RelBuilder relBuilder = REL_BUILDER.get();
+ relBuilder.push(newInput)
+ .filter(filter.getVariablesSet(), newConditionExpr);
+
+ // The result has the same mapping as the input gave us. Sometimes we
+ // return fields that the consumer didn't ask for, because the filter
+ // needs them for its condition.
+ return result(relBuilder.build(), inputMapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+ * {@link org.apache.calcite.rel.core.Sort}.
+ */
+ public TrimResult trimFields(
+ Sort sort,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ final RelDataType rowType = sort.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ final RelCollation collation = sort.getCollation();
+ final RelNode input = sort.getInput();
+
+ // We use the fields used by the consumer, plus any fields used as sort
+ // keys.
+ final ImmutableBitSet.Builder inputFieldsUsed = fieldsUsed.rebuild();
+ for (RelFieldCollation field : collation.getFieldCollations()) {
+ inputFieldsUsed.set(field.getFieldIndex());
+ }
+
+ // Create input with trimmed columns.
+ final Set<RelDataTypeField> inputExtraFields = Collections.emptySet();
+ TrimResult trimResult =
+ trimChild(sort, input, inputFieldsUsed.build(), inputExtraFields);
+ RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+
+ // If the input is unchanged, and we need to project all columns,
+ // there's nothing we can do.
+ if (newInput == input
+ && inputMapping.isIdentity()
+ && fieldsUsed.cardinality() == fieldCount) {
+ return result(sort, Mappings.createIdentity(fieldCount));
+ }
+
+ // leave the Sort unchanged in case we have dynamic limits
+ if (sort.offset instanceof RexDynamicParam
+ || sort.fetch instanceof RexDynamicParam) {
+ return result(sort, inputMapping);
+ }
+
+ final RelBuilder relBuilder = REL_BUILDER.get();
+ relBuilder.push(newInput);
+ final int offset =
+ sort.offset == null ? 0 : RexLiteral.intValue(sort.offset);
+ final int fetch =
+ sort.fetch == null ? -1 : RexLiteral.intValue(sort.fetch);
+ final ImmutableList<RexNode> fields =
+ relBuilder.fields(RexUtil.apply(inputMapping, collation));
+ relBuilder.sortLimit(offset, fetch, fields);
+
+ // The result has the same mapping as the input gave us. Sometimes we
+ // return fields that the consumer didn't ask for, because the filter
+ // needs them for its condition.
+ return result(relBuilder.build(), inputMapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+ * {@link org.apache.calcite.rel.logical.LogicalJoin}.
+ */
+ public TrimResult trimFields(
+ Join join,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ final int fieldCount = join.getSystemFieldList().size()
+ + join.getLeft().getRowType().getFieldCount()
+ + join.getRight().getRowType().getFieldCount();
+ final RexNode conditionExpr = join.getCondition();
+ final int systemFieldCount = join.getSystemFieldList().size();
+
+ // Add in fields used in the condition.
+ final Set<RelDataTypeField> combinedInputExtraFields =
+ new LinkedHashSet<>(extraFields);
+ RelOptUtil.InputFinder inputFinder =
+ new RelOptUtil.InputFinder(combinedInputExtraFields);
+ inputFinder.inputBitSet.addAll(fieldsUsed);
+ conditionExpr.accept(inputFinder);
+ final ImmutableBitSet fieldsUsedPlus = inputFinder.inputBitSet.build();
+
+ // If no system fields are used, we can remove them.
+ int systemFieldUsedCount = 0;
+ for (int i = 0; i < systemFieldCount; ++i) {
+ if (fieldsUsed.get(i)) {
+ ++systemFieldUsedCount;
+ }
+ }
+ final int newSystemFieldCount;
+ if (systemFieldUsedCount == 0) {
+ newSystemFieldCount = 0;
+ } else {
+ newSystemFieldCount = systemFieldCount;
+ }
+
+ int offset = systemFieldCount;
+ int changeCount = 0;
+ int newFieldCount = newSystemFieldCount;
+ final List<RelNode> newInputs = new ArrayList<>(2);
+ final List<Mapping> inputMappings = new ArrayList<>();
+ final List<Integer> inputExtraFieldCounts = new ArrayList<>();
+ for (RelNode input : join.getInputs()) {
+ final RelDataType inputRowType = input.getRowType();
+ final int inputFieldCount = inputRowType.getFieldCount();
+
+ // Compute required mapping.
+ ImmutableBitSet.Builder inputFieldsUsed = ImmutableBitSet.builder();
+ for (int bit : fieldsUsedPlus) {
+ if (bit >= offset && bit < offset + inputFieldCount) {
+ inputFieldsUsed.set(bit - offset);
+ }
+ }
+
+ // If there are system fields, we automatically use the
+ // corresponding field in each input.
+ inputFieldsUsed.set(0, newSystemFieldCount);
+
+ // FIXME: We ought to collect extra fields for each input
+ // individually. For now, we assume that just one input has
+ // on-demand fields.
+ Set<RelDataTypeField> inputExtraFields =
+ RelDataTypeImpl.extra(inputRowType) == null
+ ? Collections.emptySet()
+ : combinedInputExtraFields;
+ inputExtraFieldCounts.add(inputExtraFields.size());
+ TrimResult trimResult =
+ trimChild(join, input, inputFieldsUsed.build(), inputExtraFields);
+ newInputs.add(trimResult.left);
+ if (trimResult.left != input) {
+ ++changeCount;
+ }
+
+ final Mapping inputMapping = trimResult.right;
+ inputMappings.add(inputMapping);
+
+ // Move offset to point to start of next input.
+ offset += inputFieldCount;
+ newFieldCount +=
+ inputMapping.getTargetCount() + inputExtraFields.size();
+ }
+
+ Mapping mapping =
+ Mappings.create(
+ MappingType.INVERSE_SURJECTION,
+ fieldCount,
+ newFieldCount);
+ for (int i = 0; i < newSystemFieldCount; ++i) {
+ mapping.set(i, i);
+ }
+ offset = systemFieldCount;
+ int newOffset = newSystemFieldCount;
+ for (int i = 0; i < inputMappings.size(); i++) {
+ Mapping inputMapping = inputMappings.get(i);
+ for (IntPair pair : inputMapping) {
+ mapping.set(pair.source + offset, pair.target + newOffset);
+ }
+ offset += inputMapping.getSourceCount();
+ newOffset += inputMapping.getTargetCount()
+ + inputExtraFieldCounts.get(i);
+ }
+
+ if (changeCount == 0
+ && mapping.isIdentity()) {
+ return result(join, Mappings.createIdentity(fieldCount));
+ }
+
+ // Build new join.
+ final RexVisitor<RexNode> shuttle =
+ new RexPermuteInputsShuttle(
+ mapping, newInputs.get(0), newInputs.get(1));
+ RexNode newConditionExpr =
+ conditionExpr.accept(shuttle);
+
+ final RelBuilder relBuilder = REL_BUILDER.get();
+ relBuilder.push(newInputs.get(0));
+ relBuilder.push(newInputs.get(1));
+
+ switch (join.getJoinType()) {
+ case SEMI:
+ case ANTI:
+ // For SemiJoins and AntiJoins only map fields from the left-side
+ if (join.getJoinType() == JoinRelType.SEMI) {
+ relBuilder.semiJoin(newConditionExpr);
+ } else {
+ relBuilder.antiJoin(newConditionExpr);
+ }
+ Mapping inputMapping = inputMappings.get(0);
+ mapping = Mappings.create(MappingType.INVERSE_SURJECTION,
+ join.getRowType().getFieldCount(),
+ newSystemFieldCount + inputMapping.getTargetCount());
+ for (int i = 0; i < newSystemFieldCount; ++i) {
+ mapping.set(i, i);
+ }
+ offset = systemFieldCount;
+ newOffset = newSystemFieldCount;
+ for (IntPair pair : inputMapping) {
+ mapping.set(pair.source + offset, pair.target + newOffset);
+ }
+ break;
+ default:
+ relBuilder.join(join.getJoinType(), newConditionExpr);
+ }
+
+ return result(relBuilder.build(), mapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+ * {@link org.apache.calcite.rel.core.SetOp} (including UNION and UNION ALL).
+ */
+ public TrimResult trimFields(
+ SetOp setOp,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ final RelDataType rowType = setOp.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ int changeCount = 0;
+
+ // Fennel abhors an empty row type, so pretend that the parent rel
+ // wants the last field. (The last field is the least likely to be a
+ // system field.)
+ if (fieldsUsed.isEmpty()) {
+ fieldsUsed = ImmutableBitSet.of(rowType.getFieldCount() - 1);
+ }
+
+ // Compute the desired field mapping. Give the consumer the fields they
+ // want, in the order that they appear in the bitset.
+ final Mapping mapping = createMapping(fieldsUsed, fieldCount);
+
+ // Create input with trimmed columns.
+ for (RelNode input : setOp.getInputs()) {
+ TrimResult trimResult =
+ trimChild(setOp, input, fieldsUsed, extraFields);
+
+ // We want "mapping", the input gave us "inputMapping", compute
+ // "remaining" mapping.
+ // | | |
+ // |---------------- mapping ---------->|
+ // |-- inputMapping -->| |
+ // | |-- remaining -->|
+ //
+ // For instance, suppose we have columns [a, b, c, d],
+ // the consumer asked for mapping = [b, d],
+ // and the transformed input has columns inputMapping = [d, a, b].
+ // remaining will permute [b, d] to [d, a, b].
+ Mapping remaining = Mappings.divide(mapping, trimResult.right);
+
+ // Create a projection; does nothing if remaining is identity.
+ final RelBuilder relBuilder = REL_BUILDER.get();
+ relBuilder.push(trimResult.left);
+ relBuilder.permute(remaining);
+
+ if (input != relBuilder.peek()) {
+ ++changeCount;
+ }
+ }
+
+ final RelBuilder relBuilder = REL_BUILDER.get();
+ // If the input is unchanged, and we need to project all columns,
+ // there's to do.
+ if (changeCount == 0
+ && mapping.isIdentity()) {
+ for (RelNode input : setOp.getInputs()) {
+ relBuilder.build();
+ }
+ return result(setOp, mapping);
+ }
+
+ switch (setOp.kind) {
+ case UNION:
+ relBuilder.union(setOp.all, setOp.getInputs().size());
+ break;
+ case INTERSECT:
+ relBuilder.intersect(setOp.all, setOp.getInputs().size());
+ break;
+ case EXCEPT:
+ assert setOp.getInputs().size() == 2;
+ relBuilder.minus(setOp.all);
+ break;
+ default:
+ throw new AssertionError("unknown setOp " + setOp);
+ }
+ return result(relBuilder.build(), mapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+ * {@link org.apache.calcite.rel.logical.LogicalAggregate}.
+ */
+ public TrimResult trimFields(
+ Aggregate aggregate,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ // Fields:
+ //
+ // | sys fields | group fields | indicator fields | agg functions |
+ //
+ // Two kinds of trimming:
+ //
+ // 1. If agg rel has system fields but none of these are used, create an
+ // agg rel with no system fields.
+ //
+ // 2. If aggregate functions are not used, remove them.
+ //
+ // But group and indicator fields stay, even if they are not used.
+
+ final RelDataType rowType = aggregate.getRowType();
+
+ // Compute which input fields are used.
+ // 1. group fields are always used
+ final ImmutableBitSet.Builder inputFieldsUsed =
+ aggregate.getGroupSet().rebuild();
+ // 2. agg functions
+ for (AggregateCall aggCall : aggregate.getAggCallList()) {
+ inputFieldsUsed.addAll(aggCall.getArgList());
+ if (aggCall.filterArg >= 0) {
+ inputFieldsUsed.set(aggCall.filterArg);
+ }
+ inputFieldsUsed.addAll(RelCollations.ordinals(aggCall.collation));
+ }
+
+ // Create input with trimmed columns.
+ final RelNode input = aggregate.getInput();
+ final Set<RelDataTypeField> inputExtraFields = Collections.emptySet();
+ final TrimResult trimResult =
+ trimChild(aggregate, input, inputFieldsUsed.build(), inputExtraFields);
+ final RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+
+ // We have to return group keys and (if present) indicators.
+ // So, pretend that the consumer asked for them.
+ final int groupCount = aggregate.getGroupSet().cardinality();
+ fieldsUsed =
+ fieldsUsed.union(ImmutableBitSet.range(groupCount));
+
+ // If the input is unchanged, and we need to project all columns,
+ // there's nothing to do.
+ if (input == newInput
+ && fieldsUsed.equals(ImmutableBitSet.range(rowType.getFieldCount()))) {
+ return result(aggregate,
+ Mappings.createIdentity(rowType.getFieldCount()));
+ }
+
+ // Which agg calls are used by our consumer?
+ int j = groupCount;
+ int usedAggCallCount = 0;
+ for (int i = 0; i < aggregate.getAggCallList().size(); i++) {
+ if (fieldsUsed.get(j++)) {
+ ++usedAggCallCount;
+ }
+ }
+
+ // Offset due to the number of system fields having changed.
+ Mapping mapping =
+ Mappings.create(
+ MappingType.INVERSE_SURJECTION,
+ rowType.getFieldCount(),
+ groupCount + usedAggCallCount);
+
+ final ImmutableBitSet newGroupSet =
+ Mappings.apply(inputMapping, aggregate.getGroupSet());
+
+ final ImmutableList<ImmutableBitSet> newGroupSets =
+ ImmutableList.copyOf(
+ Iterables.transform(aggregate.getGroupSets(),
+ input1 -> Mappings.apply(inputMapping, input1)));
+
+ // Populate mapping of where to find the fields. System, group key and
+ // indicator fields first.
+ for (j = 0; j < groupCount; j++) {
+ mapping.set(j, j);
+ }
+
+ // Now create new agg calls, and populate mapping for them.
+ final RelBuilder relBuilder = REL_BUILDER.get();
+ relBuilder.push(newInput);
+ final List<RelBuilder.AggCall> newAggCallList = new ArrayList<>();
+ j = groupCount;
+ for (AggregateCall aggCall : aggregate.getAggCallList()) {
+ if (fieldsUsed.get(j)) {
+ final ImmutableList<RexNode> args =
+ relBuilder.fields(
+ Mappings.apply2(inputMapping, aggCall.getArgList()));
+ final RexNode filterArg = aggCall.filterArg < 0 ? null
+ : relBuilder.field(Mappings.apply(inputMapping, aggCall.filterArg));
+ RelBuilder.AggCall newAggCall =
+ relBuilder.aggregateCall(aggCall.getAggregation(), args)
+ .distinct(aggCall.isDistinct())
+ .filter(filterArg)
+ .approximate(aggCall.isApproximate())
+ .sort(relBuilder.fields(aggCall.collation))
+ .as(aggCall.name);
+ mapping.set(j, groupCount + newAggCallList.size());
+ newAggCallList.add(newAggCall);
+ }
+ ++j;
+ }
+
+ final RelBuilder.GroupKey groupKey =
+ relBuilder.groupKey(newGroupSet, newGroupSets);
+ relBuilder.aggregate(groupKey, newAggCallList);
+
+ return result(relBuilder.build(), mapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+ * {@link org.apache.calcite.rel.logical.LogicalTableModify}.
+ */
+ public TrimResult trimFields(
+ LogicalTableModify modifier,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ // Ignore what consumer wants. We always project all columns.
+ Util.discard(fieldsUsed);
+
+ final RelDataType rowType = modifier.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ RelNode input = modifier.getInput();
+
+ // We want all fields from the child.
+ final int inputFieldCount = input.getRowType().getFieldCount();
+ final ImmutableBitSet inputFieldsUsed =
+ ImmutableBitSet.range(inputFieldCount);
+
+ // Create input with trimmed columns.
+ final Set<RelDataTypeField> inputExtraFields = Collections.emptySet();
+ TrimResult trimResult =
+ trimChild(modifier, input, inputFieldsUsed, inputExtraFields);
+ RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+ if (!inputMapping.isIdentity()) {
+ // We asked for all fields. Can't believe that the child decided
+ // to permute them!
+ throw new AssertionError(
+ "Expected identity mapping, got " + inputMapping);
+ }
+
+ LogicalTableModify newModifier = modifier;
+ if (newInput != input) {
+ newModifier =
+ modifier.copy(
+ modifier.getTraitSet(),
+ Collections.singletonList(newInput));
+ }
+ assert newModifier.getClass() == modifier.getClass();
+
+ // Always project all fields.
+ Mapping mapping = Mappings.createIdentity(fieldCount);
+ return result(newModifier, mapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+ * {@link org.apache.calcite.rel.logical.LogicalTableFunctionScan}.
+ */
+ public TrimResult trimFields(
+ LogicalTableFunctionScan tabFun,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ final RelDataType rowType = tabFun.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+ final List<RelNode> newInputs = new ArrayList<>();
+
+ for (RelNode input : tabFun.getInputs()) {
+ final int inputFieldCount = input.getRowType().getFieldCount();
+ ImmutableBitSet inputFieldsUsed = ImmutableBitSet.range(inputFieldCount);
+
+ // Create input with trimmed columns.
+ final Set<RelDataTypeField> inputExtraFields =
+ Collections.emptySet();
+ TrimResult trimResult =
+ trimChildRestore(
+ tabFun, input, inputFieldsUsed, inputExtraFields);
+ assert trimResult.right.isIdentity();
+ newInputs.add(trimResult.left);
+ }
+
+ LogicalTableFunctionScan newTabFun = tabFun;
+ if (!tabFun.getInputs().equals(newInputs)) {
+ newTabFun = tabFun.copy(tabFun.getTraitSet(), newInputs,
+ tabFun.getCall(), tabFun.getElementType(), tabFun.getRowType(),
+ tabFun.getColumnMappings());
+ }
+ assert newTabFun.getClass() == tabFun.getClass();
+
+ // Always project all fields.
+ Mapping mapping = Mappings.createIdentity(fieldCount);
+ return result(newTabFun, mapping);
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+ * {@link org.apache.calcite.rel.logical.LogicalValues}.
+ */
+ public TrimResult trimFields(
+ LogicalValues values,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ final RelDataType rowType = values.getRowType();
+ final int fieldCount = rowType.getFieldCount();
+
+ // If they are asking for no fields, we can't give them what they want,
+ // because zero-column records are illegal. Give them the last field,
+ // which is unlikely to be a system field.
+ if (fieldsUsed.isEmpty()) {
+ fieldsUsed = ImmutableBitSet.range(fieldCount - 1, fieldCount);
+ }
+
+ // If all fields are used, return unchanged.
+ if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount))) {
+ Mapping mapping = Mappings.createIdentity(fieldCount);
+ return result(values, mapping);
+ }
+
+ final ImmutableList.Builder<ImmutableList<RexLiteral>> newTuples =
+ ImmutableList.builder();
+ for (ImmutableList<RexLiteral> tuple : values.getTuples()) {
+ ImmutableList.Builder<RexLiteral> newTuple = ImmutableList.builder();
+ for (int field : fieldsUsed) {
+ newTuple.add(tuple.get(field));
+ }
+ newTuples.add(newTuple.build());
+ }
+
+ final Mapping mapping = createMapping(fieldsUsed, fieldCount);
+ final RelDataType newRowType =
+ RelOptUtil.permute(values.getCluster().getTypeFactory(), rowType,
+ mapping);
+ final LogicalValues newValues =
+ LogicalValues.create(values.getCluster(), newRowType,
+ newTuples.build());
+ return result(newValues, mapping);
+ }
+
+ protected Mapping createMapping(ImmutableBitSet fieldsUsed, int fieldCount) {
+ final Mapping mapping =
+ Mappings.create(
+ MappingType.INVERSE_SURJECTION,
+ fieldCount,
+ fieldsUsed.cardinality());
+ int i = 0;
+ for (int field : fieldsUsed) {
+ mapping.set(field, i++);
+ }
+ return mapping;
+ }
+
+ /**
+ * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
+ * {@link org.apache.calcite.rel.logical.LogicalTableScan}.
+ */
+ public TrimResult trimFields(
+ final TableScan tableAccessRel,
+ ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ final int fieldCount = tableAccessRel.getRowType().getFieldCount();
+ if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount))
+ && extraFields.isEmpty()) {
+ // if there is nothing to project or if we are projecting everything
+ // then no need to introduce another RelNode
+ return trimFields(
+ (RelNode) tableAccessRel, fieldsUsed, extraFields);
+ }
+ final RelNode newTableAccessRel =
+ tableAccessRel.project(fieldsUsed, extraFields, REL_BUILDER.get());
+
+ // Some parts of the system can't handle rows with zero fields, so
+ // pretend that one field is used.
+ if (fieldsUsed.cardinality() == 0) {
+ RelNode input = newTableAccessRel;
+ if (input instanceof Project) {
+ // The table has implemented the project in the obvious way - by
+ // creating project with 0 fields. Strip it away, and create our own
+ // project with one field.
+ Project project = (Project) input;
+ if (project.getRowType().getFieldCount() == 0) {
+ input = project.getInput();
+ }
+ }
+ return dummyProject(fieldCount, input);
+ }
+
+ final Mapping mapping = createMapping(fieldsUsed, fieldCount);
+ return result(newTableAccessRel, mapping);
+ }
+
+ //~ Inner Classes ----------------------------------------------------------
+
+ /**
+ * Result of an attempt to trim columns from a relational expression.
+ *
+ * <p>The mapping describes where to find the columns wanted by the parent
+ * of the current relational expression.
+ *
+ * <p>The mapping is a
+ * {@link org.apache.calcite.util.mapping.Mappings.SourceMapping}, which means
+ * that no column can be used more than once, and some columns are not used.
+ * {@code columnsUsed.getSource(i)} returns the source of the i'th output
+ * field.
+ *
+ * <p>For example, consider the mapping for a relational expression that
+ * has 4 output columns but only two are being used. The mapping
+ * {2 → 1, 3 → 0} would give the following behavior:
+ *
+ * <ul>
+ * <li>columnsUsed.getSourceCount() returns 4
+ * <li>columnsUsed.getTargetCount() returns 2
+ * <li>columnsUsed.getSource(0) returns 3
+ * <li>columnsUsed.getSource(1) returns 2
+ * <li>columnsUsed.getSource(2) throws IndexOutOfBounds
+ * <li>columnsUsed.getTargetOpt(3) returns 0
+ * <li>columnsUsed.getTargetOpt(0) returns -1
+ * </ul>
+ */
+ protected static class TrimResult extends Pair<RelNode, Mapping> {
+ /**
+ * Creates a TrimResult.
+ *
+ * @param left New relational expression
+ * @param right Mapping of fields onto original fields
+ */
+ public TrimResult(RelNode left, Mapping right) {
+ super(left, right);
+ assert right.getTargetCount() == left.getRowType().getFieldCount()
+ : "rowType: " + left.getRowType() + ", mapping: " + right;
+ }
+ }
+}
+
+// End RelFieldTrimmer.java
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 537355f..6589eeb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -38,6 +38,12 @@ import org.apache.calcite.adapter.druid.DruidTable;
import org.apache.calcite.adapter.java.JavaTypeFactory;
import org.apache.calcite.adapter.jdbc.JdbcConvention;
import org.apache.calcite.adapter.jdbc.JdbcImplementor;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcAggregate;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcFilter;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcJoin;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcProject;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcSort;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcUnion;
import org.apache.calcite.adapter.jdbc.JdbcSchema;
import org.apache.calcite.adapter.jdbc.JdbcTable;
import org.apache.calcite.config.CalciteConnectionConfig;
@@ -57,12 +63,17 @@ import org.apache.calcite.plan.hep.HepMatchOrder;
import org.apache.calcite.plan.hep.HepPlanner;
import org.apache.calcite.plan.hep.HepProgram;
import org.apache.calcite.plan.hep.HepProgramBuilder;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.AbstractConverter;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.AbstractRelNode;
import org.apache.calcite.rel.RelCollation;
import org.apache.calcite.rel.RelCollationImpl;
import org.apache.calcite.rel.RelCollations;
import org.apache.calcite.rel.RelFieldCollation;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelVisitor;
+import org.apache.calcite.rel.convert.ConverterImpl;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.Filter;
@@ -148,9 +159,11 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
@@ -338,6 +351,44 @@ public class CalcitePlanner extends SemanticAnalyzer {
private static final Pattern PATTERN_TIMESTAMP =
Pattern.compile("TIMESTAMP\\(9\\)");
+ /**
+ * This is the list of operators that are specifically used in Hive.
+ */
+ private static final List<Class<? extends RelNode>> HIVE_REL_NODE_CLASSES =
+ ImmutableList.of(
+ RelNode.class,
+ AbstractRelNode.class,
+ RelSubset.class,
+ HepRelVertex.class,
+ ConverterImpl.class,
+ AbstractConverter.class,
+
+ HiveTableScan.class,
+ HiveAggregate.class,
+ HiveExcept.class,
+ HiveFilter.class,
+ HiveIntersect.class,
+ HiveJoin.class,
+ HiveMultiJoin.class,
+ HiveProject.class,
+ HiveRelNode.class,
+ HiveSemiJoin.class,
+ HiveSortExchange.class,
+ HiveSortLimit.class,
+ HiveTableFunctionScan.class,
+ HiveUnion.class,
+
+ DruidQuery.class,
+
+ HiveJdbcConverter.class,
+ JdbcHiveTableScan.class,
+ JdbcAggregate.class,
+ JdbcFilter.class,
+ JdbcJoin.class,
+ JdbcProject.class,
+ JdbcSort.class,
+ JdbcUnion.class);
+
public CalcitePlanner(QueryState queryState) throws SemanticException {
super(queryState);
@@ -1794,14 +1845,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
calciteGenPlan.getCluster().getPlanner().setExecutor(executorProvider);
// We need to get the ColumnAccessInfo and viewToTableSchema for views.
- HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null,
- HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
- this.columnAccessInfo, this.viewProjectToTableSchema);
-
- fieldTrimmer.trim(calciteGenPlan);
+ HiveRelFieldTrimmer.get()
+ .trim(HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
+ calciteGenPlan, this.columnAccessInfo, this.viewProjectToTableSchema);
// Create and set MD provider
- HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf);
+ HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf, HIVE_REL_NODE_CLASSES);
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider()));
//Remove subquery
@@ -5214,10 +5263,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
/**
* This method can be called at startup time to pre-register all the
* additional Hive classes (compared to Calcite core classes) that may
- * be visited during the planning phase.
+ * be visited during the planning phase in the metadata providers
+ * and the field trimmer.
*/
- public static void initializeMetadataProviderClass() {
- HiveDefaultRelMetadataProvider.initializeMetadataProviderClass();
+ public static void warmup() {
+ HiveDefaultRelMetadataProvider.initializeMetadataProviderClass(HIVE_REL_NODE_CLASSES);
+ HiveRelFieldTrimmer.initializeFieldTrimmerClass(HIVE_REL_NODE_CLASSES);
}
private enum TableType {
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java
index e8dd572..081d5f8 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java
@@ -70,7 +70,7 @@ public class TestCBORuleFiredOnlyOnce {
RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder);
// Create MD provider
- HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf);
+ HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf, null);
List<RelMetadataProvider> list = Lists.newArrayList();
list.add(mdProvider.getMetadataProvider());
planner.registerMetadataProviders(list);
diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java
index 8f73c60..fece82e 100644
--- a/service/src/java/org/apache/hive/service/server/HiveServer2.java
+++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java
@@ -250,8 +250,8 @@ public class HiveServer2 extends CompositeService {
LlapRegistryService.getClient(hiveConf);
}
- // Initialize metadata provider class
- CalcitePlanner.initializeMetadataProviderClass();
+ // Initialize metadata provider class and trimmer
+ CalcitePlanner.warmup();
try {
sessionHive = Hive.get(hiveConf);