You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jp...@apache.org on 2014/12/10 00:02:24 UTC
svn commit: r1644224 [2/5] - in /hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/
ql/src/java/org...
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.RelFactories.ProjectFactory;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.util.Util;
+import org.apache.calcite.util.mapping.Mapping;
+import org.apache.calcite.util.mapping.MappingType;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveProject extends Project implements HiveRelNode {
+
+ public static final ProjectFactory DEFAULT_PROJECT_FACTORY = new HiveProjectFactoryImpl();
+
+ private final List<Integer> virtualCols;
+
+ /**
+ * Creates a HiveProject.
+ *
+ * @param cluster
+ * Cluster this relational expression belongs to
+ * @param child
+ * input relational expression
+ * @param exps
+ * List of expressions for the input columns
+ * @param rowType
+ * output row type
+ * @param flags
+ * values as in {@link Project.Flags}
+ */
+ public HiveProject(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+ List<? extends RexNode> exps, RelDataType rowType, int flags) {
+ super(cluster, traitSet, child, exps, rowType, flags);
+ virtualCols = ImmutableList.copyOf(HiveCalciteUtil.getVirtualCols(exps));
+ }
+
+ /**
+ * Creates a HiveProject with no sort keys.
+ *
+ * @param child
+ * input relational expression
+ * @param exps
+ * set of expressions for the input columns
+ * @param fieldNames
+ * aliases of the expressions
+ */
+ public static HiveProject create(RelNode child, List<? extends RexNode> exps,
+ List<String> fieldNames) throws CalciteSemanticException{
+ RelOptCluster cluster = child.getCluster();
+
+ // 1 Ensure columnNames are unique - CALCITE-411
+ if (fieldNames != null && !Util.isDistinct(fieldNames)) {
+ String msg = "Select list contains multiple expressions with the same name." + fieldNames;
+ throw new CalciteSemanticException(msg);
+ }
+ RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), exps, fieldNames);
+ return create(cluster, child, exps, rowType, Collections.<RelCollation> emptyList());
+ }
+
+ /**
+ * Creates a HiveProject.
+ */
+ public static HiveProject create(RelOptCluster cluster, RelNode child, List<? extends RexNode> exps,
+ RelDataType rowType, final List<RelCollation> collationList) {
+ RelTraitSet traitSet = TraitsUtil.getDefaultTraitSet(cluster);
+ return new HiveProject(cluster, traitSet, child, exps, rowType, Flags.BOXED);
+ }
+
+ /**
+ * Creates a HiveProject.
+ */
+ public static HiveProject create(RelOptCluster cluster, RelNode child, List<? extends RexNode> exps,
+ RelDataType rowType, RelTraitSet traitSet, final List<RelCollation> collationList) {
+ return new HiveProject(cluster, traitSet, child, exps, rowType, Flags.BOXED);
+ }
+
+ /**
+ * Creates a relational expression which projects the output fields of a
+ * relational expression according to a partial mapping.
+ *
+ * <p>
+ * A partial mapping is weaker than a permutation: every target has one
+ * source, but a source may have 0, 1 or more than one targets. Usually the
+ * result will have fewer fields than the source, unless some source fields
+ * are projected multiple times.
+ *
+ * <p>
+ * This method could optimize the result as {@link #permute} does, but does
+ * not at present.
+ *
+ * @param rel
+ * Relational expression
+ * @param mapping
+ * Mapping from source fields to target fields. The mapping type must
+ * obey the constraints {@link MappingType#isMandatorySource()} and
+ * {@link MappingType#isSingleSource()}, as does
+ * {@link MappingType#INVERSE_FUNCTION}.
+ * @param fieldNames
+ * Field names; if null, or if a particular entry is null, the name
+ * of the permuted field is used
+ * @return relational expression which projects a subset of the input fields
+ * @throws CalciteSemanticException
+ */
+ public static RelNode projectMapping(RelNode rel, Mapping mapping, List<String> fieldNames) throws CalciteSemanticException {
+ assert mapping.getMappingType().isSingleSource();
+ assert mapping.getMappingType().isMandatorySource();
+
+ if (mapping.isIdentity()) {
+ return rel;
+ }
+
+ final List<String> outputNameList = new ArrayList<String>();
+ final List<RexNode> outputProjList = new ArrayList<RexNode>();
+ final List<RelDataTypeField> fields = rel.getRowType().getFieldList();
+ final RexBuilder rexBuilder = rel.getCluster().getRexBuilder();
+
+ for (int i = 0; i < mapping.getTargetCount(); i++) {
+ int source = mapping.getSource(i);
+ final RelDataTypeField sourceField = fields.get(source);
+ outputNameList
+ .add(((fieldNames == null) || (fieldNames.size() <= i) || (fieldNames.get(i) == null)) ? sourceField
+ .getName() : fieldNames.get(i));
+ outputProjList.add(rexBuilder.makeInputRef(rel, source));
+ }
+
+ return create(rel, outputProjList, outputNameList);
+ }
+
+ @Override
+ public Project copy(RelTraitSet traitSet, RelNode input, List<RexNode> exps,
+ RelDataType rowType) {
+ assert traitSet.containsIfApplicable(HiveRelNode.CONVENTION);
+ return new HiveProject(getCluster(), traitSet, input, exps, rowType, getFlags());
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ public List<Integer> getVirtualCols() {
+ return virtualCols;
+ }
+
+ /**
+ * Implementation of {@link ProjectFactory} that returns
+ * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject}
+ * .
+ */
+ private static class HiveProjectFactoryImpl implements ProjectFactory {
+
+ @Override
+ public RelNode createProject(RelNode child,
+ List<? extends RexNode> childExprs, List<String> fieldNames) {
+ RelOptCluster cluster = child.getCluster();
+ RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), childExprs, fieldNames);
+ RelNode project = HiveProject.create(cluster, child,
+ childExprs, rowType,
+ child.getTraitSet(), Collections.<RelCollation> emptyList());
+
+ return project;
+ }
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRelNode.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.plan.Convention;
+import org.apache.calcite.rel.RelNode;
+
+public interface HiveRelNode extends RelNode {
+ void implement(Implementor implementor);
+
+ /** Calling convention for relational operations that occur in Hive. */
+ final Convention CONVENTION = new Convention.Impl("HIVE", HiveRelNode.class);
+
+ class Implementor {
+
+ public void visitChild(int ordinal, RelNode input) {
+ assert ordinal == 0;
+ ((HiveRelNode) input).implement(this);
+ }
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.Map;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rex.RexNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+
+import com.google.common.collect.ImmutableMap;
+
+public class HiveSort extends Sort implements HiveRelNode {
+
+ public static final HiveSortRelFactory HIVE_SORT_REL_FACTORY = new HiveSortRelFactory();
+
+ // NOTE: this is to work around Hive Calcite Limitations w.r.t OB.
+ // 1. Calcite can not accept expressions in OB; instead it needs to be expressed
+ // as VC in input Select.
+ // 2. Hive can not preserve ordering through select boundaries.
+ // 3. This map is used for outermost OB to migrate the VC corresponding OB
+ // expressions from input select.
+ // 4. This is used by ASTConverter after we are done with Calcite Planning
+ private ImmutableMap<Integer, RexNode> mapOfInputRefToRexCall;
+
+ public HiveSort(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
+ RelCollation collation, RexNode offset, RexNode fetch) {
+ super(cluster, TraitsUtil.getSortTraitSet(cluster, traitSet, collation), child, collation,
+ offset, fetch);
+ }
+
+ @Override
+ public HiveSort copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation,
+ RexNode offset, RexNode fetch) {
+ // TODO: can we blindly copy sort trait? What if inputs changed and we
+ // are now sorting by different cols
+ RelCollation canonizedCollation = traitSet.canonize(newCollation);
+ return new HiveSort(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch);
+ }
+
+ public RexNode getFetchExpr() {
+ return fetch;
+ }
+
+ public void setInputRefToCallMap(ImmutableMap<Integer, RexNode> refToCall) {
+ this.mapOfInputRefToRexCall = refToCall;
+ }
+
+ public Map<Integer, RexNode> getInputRefToCallMap() {
+ return this.mapOfInputRefToRexCall;
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+ }
+
+ private static class HiveSortRelFactory implements RelFactories.SortFactory {
+
+ @Override
+ public RelNode createSort(RelTraitSet traits, RelNode child, RelCollation collation,
+ RexNode offset, RexNode fetch) {
+ return new HiveSort(child.getCluster(), traits, child, collation, offset, fetch);
+ }
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+
+/**
+ * Relational expression representing a scan of a HiveDB collection.
+ *
+ * <p>
+ * Additional operations might be applied, using the "find" or "aggregate"
+ * methods.
+ * </p>
+ */
+public class HiveTableScan extends TableScan implements HiveRelNode {
+
+ /**
+ * Creates a HiveTableScan.
+ *
+ * @param cluster
+ * Cluster
+ * @param traitSet
+ * Traits
+ * @param table
+ * Table
+ * @param table
+ * HiveDB table
+ */
+ public HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table,
+ RelDataType rowtype) {
+ super(cluster, TraitsUtil.getDefaultTraitSet(cluster), table);
+ assert getConvention() == HiveRelNode.CONVENTION;
+ }
+
+ @Override
+ public RelNode copy(RelTraitSet traitSet, List<RelNode> inputs) {
+ assert inputs.isEmpty();
+ return this;
+ }
+
+ @Override
+ public RelOptCost computeSelfCost(RelOptPlanner planner) {
+ return HiveCost.FACTORY.makeZeroCost();
+ }
+
+ @Override
+ public void register(RelOptPlanner planner) {
+
+ }
+
+ @Override
+ public void implement(Implementor implementor) {
+
+ }
+
+ @Override
+ public double getRows() {
+ return ((RelOptHiveTable) table).getRowCount();
+ }
+
+ public List<ColStatistics> getColStat(List<Integer> projIndxLst) {
+ return ((RelOptHiveTable) table).getColStat(projIndxLst);
+ }
+}
\ No newline at end of file
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.core.SetOp;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode.Implementor;
+
+public class HiveUnion extends Union {
+
+ public static final HiveUnionRelFactory UNION_REL_FACTORY = new HiveUnionRelFactory();
+
+ public HiveUnion(RelOptCluster cluster, RelTraitSet traits, List<RelNode> inputs) {
+ super(cluster, traits, inputs, true);
+ }
+
+ @Override
+ public SetOp copy(RelTraitSet traitSet, List<RelNode> inputs, boolean all) {
+ return new HiveUnion(this.getCluster(), traitSet, inputs);
+ }
+
+ public void implement(Implementor implementor) {
+ }
+
+ private static class HiveUnionRelFactory implements RelFactories.SetOpFactory {
+
+ @Override
+ public RelNode createSetOp(SqlKind kind, List<RelNode> inputs, boolean all) {
+ if (kind != SqlKind.UNION) {
+ throw new IllegalStateException("Expected to get Set operator of type Union. Found : " + kind);
+ }
+ return new HiveUnion(inputs.get(0).getCluster(), inputs.get(0).getTraitSet(), inputs);
+ }
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import java.util.BitSet;
+import java.util.List;
+import java.util.ListIterator;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptRuleOperand;
+import org.apache.calcite.plan.RelOptUtil.InputFinder;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.rules.FilterJoinRule;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+public abstract class HiveFilterJoinRule extends FilterJoinRule {
+
+ public static final HiveFilterJoinRule FILTER_ON_JOIN = new HiveFilterJoinMergeRule();
+
+ public static final HiveFilterJoinRule JOIN = new HiveFilterJoinTransposeRule();
+
+ /**
+ * Creates a PushFilterPastJoinRule with an explicit root operand.
+ */
+ protected HiveFilterJoinRule(RelOptRuleOperand operand, String id, boolean smart,
+ RelFactories.FilterFactory filterFactory, RelFactories.ProjectFactory projectFactory) {
+ super(operand, id, smart, filterFactory, projectFactory);
+ }
+
+ /**
+ * Rule that tries to push filter expressions into a join condition and into
+ * the inputs of the join.
+ */
+ public static class HiveFilterJoinMergeRule extends HiveFilterJoinRule {
+ public HiveFilterJoinMergeRule() {
+ super(RelOptRule.operand(Filter.class,
+ RelOptRule.operand(Join.class, RelOptRule.any())),
+ "HiveFilterJoinRule:filter", true, HiveFilter.DEFAULT_FILTER_FACTORY,
+ HiveProject.DEFAULT_PROJECT_FACTORY);
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ Filter filter = call.rel(0);
+ Join join = call.rel(1);
+ super.perform(call, filter, join);
+ }
+ }
+
+ public static class HiveFilterJoinTransposeRule extends HiveFilterJoinRule {
+ public HiveFilterJoinTransposeRule() {
+ super(RelOptRule.operand(Join.class, RelOptRule.any()),
+ "HiveFilterJoinRule:no-filter", true, HiveFilter.DEFAULT_FILTER_FACTORY,
+ HiveProject.DEFAULT_PROJECT_FACTORY);
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ Join join = call.rel(0);
+ super.perform(call, null, join);
+ }
+ }
+
+ /*
+ * Any predicates pushed down to joinFilters that aren't equality conditions:
+ * put them back as aboveFilters because Hive doesn't support not equi join
+ * conditions.
+ */
+ @Override
+ protected void validateJoinFilters(List<RexNode> aboveFilters, List<RexNode> joinFilters,
+ Join join, JoinRelType joinType) {
+ if (joinType.equals(JoinRelType.INNER)) {
+ ListIterator<RexNode> filterIter = joinFilters.listIterator();
+ while (filterIter.hasNext()) {
+ RexNode exp = filterIter.next();
+
+ if (exp instanceof RexCall) {
+ RexCall c = (RexCall) exp;
+ boolean validHiveJoinFilter = false;
+
+ if ((c.getOperator().getKind() == SqlKind.EQUALS)) {
+ validHiveJoinFilter = true;
+ for (RexNode rn : c.getOperands()) {
+ // NOTE: Hive dis-allows projections from both left & right side
+ // of join condition. Example: Hive disallows
+ // (r1.x +r2.x)=(r1.y+r2.y) on join condition.
+ if (filterRefersToBothSidesOfJoin(rn, join)) {
+ validHiveJoinFilter = false;
+ break;
+ }
+ }
+ } else if ((c.getOperator().getKind() == SqlKind.LESS_THAN)
+ || (c.getOperator().getKind() == SqlKind.GREATER_THAN)
+ || (c.getOperator().getKind() == SqlKind.LESS_THAN_OR_EQUAL)
+ || (c.getOperator().getKind() == SqlKind.GREATER_THAN_OR_EQUAL)) {
+ validHiveJoinFilter = true;
+ // NOTE: Hive dis-allows projections from both left & right side of
+ // join in in equality condition. Example: Hive disallows (r1.x <
+ // r2.x) on join condition.
+ if (filterRefersToBothSidesOfJoin(c, join)) {
+ validHiveJoinFilter = false;
+ }
+ }
+
+ if (validHiveJoinFilter)
+ continue;
+ }
+
+ aboveFilters.add(exp);
+ filterIter.remove();
+ }
+ }
+ }
+
+ private boolean filterRefersToBothSidesOfJoin(RexNode filter, Join j) {
+ boolean refersToBothSides = false;
+
+ int joinNoOfProjects = j.getRowType().getFieldCount();
+ ImmutableBitSet filterProjs = ImmutableBitSet.FROM_BIT_SET.apply(
+ new BitSet(joinNoOfProjects));
+ ImmutableBitSet allLeftProjs = filterProjs.union(
+ ImmutableBitSet.range(0, j.getInput(0).getRowType().getFieldCount()));
+ ImmutableBitSet allRightProjs = filterProjs.union(
+ ImmutableBitSet.range(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects));
+
+ filterProjs = filterProjs.union(InputFinder.bits(filter));
+
+ if (allLeftProjs.intersects(filterProjs) && allRightProjs.intersects(filterProjs))
+ refersToBothSides = true;
+
+ return refersToBothSides;
+ }
+}
+
+// End PushFilterPastJoinRule.java
+
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.Pair;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+public class HivePartitionPruneRule extends RelOptRule {
+
+ HiveConf conf;
+
+ public HivePartitionPruneRule(HiveConf conf) {
+ super(operand(HiveFilter.class, operand(HiveTableScan.class, none())));
+ this.conf = conf;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ HiveFilter filter = call.rel(0);
+ HiveTableScan tScan = call.rel(1);
+ perform(call, filter, tScan);
+ }
+
+ protected void perform(RelOptRuleCall call, Filter filter,
+ HiveTableScan tScan) {
+
+ RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable();
+ RexNode predicate = filter.getCondition();
+
+ Pair<RexNode, RexNode> predicates = PartitionPrune
+ .extractPartitionPredicates(filter.getCluster(), hiveTable, predicate);
+ RexNode partColExpr = predicates.left;
+ hiveTable.computePartitionList(conf, partColExpr);
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.rel.rules.ProjectMergeRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+//Currently not used, turn this on later
+public class HiveProjectMergeRule extends ProjectMergeRule {
+ public static final HiveProjectMergeRule INSTANCE = new HiveProjectMergeRule();
+
+ public HiveProjectMergeRule() {
+ super(true, HiveProject.DEFAULT_PROJECT_FACTORY);
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/PartitionPrune.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.util.Pair;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+
+public class PartitionPrune {
+
+ /**
+ * Breaks the predicate into 2 pieces. The first piece is the expressions that
+ * only contain partition columns and can be used for Partition Pruning; the
+ * second piece is the predicates that are left.
+ *
+ * @param cluster
+ * @param hiveTable
+ * @param predicate
+ * @return a Pair of expressions, each of which maybe null. The 1st predicate
+ * is expressions that only contain partition columns; the 2nd
+ * predicate contains the remaining predicates.
+ */
+ public static Pair<RexNode, RexNode> extractPartitionPredicates(
+ RelOptCluster cluster, RelOptHiveTable hiveTable, RexNode predicate) {
+ RexNode partitionPruningPred = predicate
+ .accept(new ExtractPartPruningPredicate(cluster, hiveTable));
+ RexNode remainingPred = predicate.accept(new ExtractRemainingPredicate(
+ cluster, partitionPruningPred));
+ return new Pair<RexNode, RexNode>(partitionPruningPred, remainingPred);
+ }
+
+ public static class ExtractPartPruningPredicate extends
+ RexVisitorImpl<RexNode> {
+
+ final RelOptHiveTable hiveTable;
+ final RelDataType rType;
+ final Set<String> partCols;
+ final RelOptCluster cluster;
+
+ public ExtractPartPruningPredicate(RelOptCluster cluster,
+ RelOptHiveTable hiveTable) {
+ super(true);
+ this.hiveTable = hiveTable;
+ rType = hiveTable.getRowType();
+ List<FieldSchema> pfs = hiveTable.getHiveTableMD().getPartCols();
+ partCols = new HashSet<String>();
+ for (FieldSchema pf : pfs) {
+ partCols.add(pf.getName());
+ }
+ this.cluster = cluster;
+ }
+
+ @Override
+ public RexNode visitLiteral(RexLiteral literal) {
+ return literal;
+ }
+
+ @Override
+ public RexNode visitInputRef(RexInputRef inputRef) {
+ RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex());
+ if (partCols.contains(f.getName())) {
+ return inputRef;
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public RexNode visitCall(RexCall call) {
+ if (!deep) {
+ return null;
+ }
+
+ List<RexNode> args = new LinkedList<RexNode>();
+ boolean argsPruned = false;
+
+ GenericUDF hiveUDF = SqlFunctionConverter.getHiveUDF(call.getOperator(),
+ call.getType(), call.operands.size());
+ if (hiveUDF != null &&
+ !FunctionRegistry.isDeterministic(hiveUDF)) {
+ return null;
+ }
+
+ for (RexNode operand : call.operands) {
+ RexNode n = operand.accept(this);
+ if (n != null) {
+ args.add(n);
+ } else {
+ argsPruned = true;
+ }
+ }
+
+ if (call.getOperator() != SqlStdOperatorTable.AND) {
+ return argsPruned ? null : call;
+ } else {
+ if (args.size() == 0) {
+ return null;
+ } else if (args.size() == 1) {
+ return args.get(0);
+ } else {
+ return cluster.getRexBuilder().makeCall(call.getOperator(), args);
+ }
+ }
+ }
+
+ }
+
+ public static class ExtractRemainingPredicate extends RexVisitorImpl<RexNode> {
+
+ List<RexNode> pruningPredicates;
+ final RelOptCluster cluster;
+
+ public ExtractRemainingPredicate(RelOptCluster cluster,
+ RexNode partPruningExpr) {
+ super(true);
+ this.cluster = cluster;
+ pruningPredicates = new ArrayList<RexNode>();
+ flattenPredicates(partPruningExpr);
+ }
+
+ private void flattenPredicates(RexNode r) {
+ if (r instanceof RexCall
+ && ((RexCall) r).getOperator() == SqlStdOperatorTable.AND) {
+ for (RexNode c : ((RexCall) r).getOperands()) {
+ flattenPredicates(c);
+ }
+ } else {
+ pruningPredicates.add(r);
+ }
+ }
+
+ @Override
+ public RexNode visitLiteral(RexLiteral literal) {
+ return literal;
+ }
+
+ @Override
+ public RexNode visitInputRef(RexInputRef inputRef) {
+ return inputRef;
+ }
+
+ @Override
+ public RexNode visitCall(RexCall call) {
+ if (!deep) {
+ return null;
+ }
+
+ if (call.getOperator() != SqlStdOperatorTable.AND) {
+ if (pruningPredicates.contains(call)) {
+ return null;
+ } else {
+ return call;
+ }
+ }
+
+ List<RexNode> args = new LinkedList<RexNode>();
+
+ for (RexNode operand : call.operands) {
+ RexNode n = operand.accept(this);
+ if (n != null) {
+ args.add(n);
+ }
+ }
+
+ if (args.size() == 0) {
+ return null;
+ } else if (args.size() == 1) {
+ return args.get(0);
+ } else {
+ return cluster.getRexBuilder().makeCall(call.getOperator(), args);
+ }
+ }
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,255 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.type.SqlTypeUtil;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
+ private final RelNode childRel;
+ private final double childCardinality;
+
+ protected FilterSelectivityEstimator(RelNode childRel) {
+ super(true);
+ this.childRel = childRel;
+ this.childCardinality = RelMetadataQuery.getRowCount(childRel);
+ }
+
+ public Double estimateSelectivity(RexNode predicate) {
+ return predicate.accept(this);
+ }
+
+ public Double visitCall(RexCall call) {
+ if (!deep) {
+ return 1.0;
+ }
+
+ /*
+ * Ignore any predicates on partition columns because we have already
+ * accounted for these in the Table row count.
+ */
+ if (isPartitionPredicate(call, this.childRel)) {
+ return 1.0;
+ }
+
+ Double selectivity = null;
+ SqlKind op = getOp(call);
+
+ switch (op) {
+ case AND: {
+ selectivity = computeConjunctionSelectivity(call);
+ break;
+ }
+
+ case OR: {
+ selectivity = computeDisjunctionSelectivity(call);
+ break;
+ }
+
+ case NOT:
+ case NOT_EQUALS: {
+ selectivity = computeNotEqualitySelectivity(call);
+ break;
+ }
+
+ case LESS_THAN_OR_EQUAL:
+ case GREATER_THAN_OR_EQUAL:
+ case LESS_THAN:
+ case GREATER_THAN: {
+ selectivity = ((double) 1 / (double) 3);
+ break;
+ }
+
+ case IN: {
+ // TODO: 1) check for duplicates 2) We assume in clause values to be
+ // present in NDV which may not be correct (Range check can find it) 3) We
+ // assume values in NDV set is uniformly distributed over col values
+ // (account for skewness - histogram).
+ selectivity = computeFunctionSelectivity(call) * (call.operands.size() - 1);
+ if (selectivity <= 0.0) {
+ selectivity = 0.10;
+ } else if (selectivity >= 1.0) {
+ selectivity = 1.0;
+ }
+ break;
+ }
+
+ default:
+ selectivity = computeFunctionSelectivity(call);
+ }
+
+ return selectivity;
+ }
+
+ /**
+ * NDV of "f1(x, y, z) != f2(p, q, r)" ->
+ * "(maxNDV(x,y,z,p,q,r) - 1)/maxNDV(x,y,z,p,q,r)".
+ * <p>
+ *
+ * @param call
+ * @return
+ */
+ private Double computeNotEqualitySelectivity(RexCall call) {
+ double tmpNDV = getMaxNDV(call);
+
+ if (tmpNDV > 1)
+ return (tmpNDV - (double) 1) / tmpNDV;
+ else
+ return 1.0;
+ }
+
+ /**
+ * Selectivity of f(X,y,z) -> 1/maxNDV(x,y,z).
+ * <p>
+ * Note that >, >=, <, <=, = ... are considered generic functions and uses
+ * this method to find their selectivity.
+ *
+ * @param call
+ * @return
+ */
+ private Double computeFunctionSelectivity(RexCall call) {
+ return 1 / getMaxNDV(call);
+ }
+
+ /**
+ * Disjunction Selectivity -> (1 D(1-m1/n)(1-m2/n)) where n is the total
+ * number of tuples from child and m1 and m2 is the expected number of tuples
+ * from each part of the disjunction predicate.
+ * <p>
+ * Note we compute m1. m2.. by applying selectivity of the disjunctive element
+ * on the cardinality from child.
+ *
+ * @param call
+ * @return
+ */
+ private Double computeDisjunctionSelectivity(RexCall call) {
+ Double tmpCardinality;
+ Double tmpSelectivity;
+ double selectivity = 1;
+
+ for (RexNode dje : call.getOperands()) {
+ tmpSelectivity = dje.accept(this);
+ if (tmpSelectivity == null) {
+ tmpSelectivity = 0.99;
+ }
+ tmpCardinality = childCardinality * tmpSelectivity;
+
+ if (tmpCardinality > 1 && tmpCardinality < childCardinality) {
+ tmpSelectivity = (1 - tmpCardinality / childCardinality);
+ } else {
+ tmpSelectivity = 1.0;
+ }
+
+ selectivity *= tmpSelectivity;
+ }
+
+ if (selectivity < 0.0)
+ selectivity = 0.0;
+
+ return (1 - selectivity);
+ }
+
+ /**
+ * Selectivity of conjunctive predicate -> (selectivity of conjunctive
+ * element1) * (selectivity of conjunctive element2)...
+ *
+ * @param call
+ * @return
+ */
+ private Double computeConjunctionSelectivity(RexCall call) {
+ Double tmpSelectivity;
+ double selectivity = 1;
+
+ for (RexNode cje : call.getOperands()) {
+ tmpSelectivity = cje.accept(this);
+ if (tmpSelectivity != null) {
+ selectivity *= tmpSelectivity;
+ }
+ }
+
+ return selectivity;
+ }
+
+ private Double getMaxNDV(RexCall call) {
+ double tmpNDV;
+ double maxNDV = 1.0;
+ InputReferencedVisitor irv;
+
+ for (RexNode op : call.getOperands()) {
+ if (op instanceof RexInputRef) {
+ tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel,
+ ((RexInputRef) op).getIndex());
+ if (tmpNDV > maxNDV)
+ maxNDV = tmpNDV;
+ } else {
+ irv = new InputReferencedVisitor();
+ irv.apply(op);
+ for (Integer childProjIndx : irv.inputPosReferenced) {
+ tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, childProjIndx);
+ if (tmpNDV > maxNDV)
+ maxNDV = tmpNDV;
+ }
+ }
+ }
+
+ return maxNDV;
+ }
+
+ private boolean isPartitionPredicate(RexNode expr, RelNode r) {
+ if (r instanceof Project) {
+ expr = RelOptUtil.pushFilterPastProject(expr, (Project) r);
+ return isPartitionPredicate(expr, ((Project) r).getInput());
+ } else if (r instanceof Filter) {
+ return isPartitionPredicate(expr, ((Filter) r).getInput());
+ } else if (r instanceof HiveTableScan) {
+ RelOptHiveTable table = (RelOptHiveTable) ((HiveTableScan) r).getTable();
+ ImmutableBitSet cols = RelOptUtil.InputFinder.bits(expr);
+ return table.containsPartitionColumnsOnly(cols);
+ }
+ return false;
+ }
+
+ private SqlKind getOp(RexCall call) {
+ SqlKind op = call.getKind();
+
+ if (call.getKind().equals(SqlKind.OTHER_FUNCTION)
+ && SqlTypeUtil.inBooleanFamily(call.getType())) {
+ SqlOperator sqlOp = call.getOperator();
+ String opName = (sqlOp != null) ? sqlOp.getName() : "";
+ if (opName.equalsIgnoreCase("in")) {
+ op = SqlKind.IN;
+ }
+ }
+
+ return op;
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdDistinctRowCount;
+import org.apache.calcite.rel.metadata.RelMdUtil;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveRelMdDistinctRowCount extends RelMdDistinctRowCount {
+
+ private static final HiveRelMdDistinctRowCount INSTANCE =
+ new HiveRelMdDistinctRowCount();
+
+ public static final RelMetadataProvider SOURCE = ChainedRelMetadataProvider
+ .of(ImmutableList.of(
+
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.DISTINCT_ROW_COUNT.method, INSTANCE),
+
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.CUMULATIVE_COST.method, INSTANCE)));
+
+ private HiveRelMdDistinctRowCount() {
+ }
+
+ // Catch-all rule when none of the others apply.
+ @Override
+ public Double getDistinctRowCount(RelNode rel, ImmutableBitSet groupKey,
+ RexNode predicate) {
+ if (rel instanceof HiveTableScan) {
+ return getDistinctRowCount((HiveTableScan) rel, groupKey, predicate);
+ }
+ /*
+ * For now use Calcite' default formulas for propagating NDVs up the Query
+ * Tree.
+ */
+ return super.getDistinctRowCount(rel, groupKey, predicate);
+ }
+
+ private Double getDistinctRowCount(HiveTableScan htRel, ImmutableBitSet groupKey,
+ RexNode predicate) {
+ List<Integer> projIndxLst = HiveCalciteUtil
+ .translateBitSetToProjIndx(groupKey);
+ List<ColStatistics> colStats = htRel.getColStat(projIndxLst);
+ Double noDistinctRows = 1.0;
+ for (ColStatistics cStat : colStats) {
+ noDistinctRows *= cStat.getCountDistint();
+ }
+
+ return Math.min(noDistinctRows, htRel.getRows());
+ }
+
+ public static Double getDistinctRowCount(RelNode r, int indx) {
+ ImmutableBitSet bitSetOfRqdProj = ImmutableBitSet.of(indx);
+ return RelMetadataQuery.getDistinctRowCount(r, bitSetOfRqdProj, r
+ .getCluster().getRexBuilder().makeLiteral(true));
+ }
+
+ @Override
+ public Double getDistinctRowCount(Join rel, ImmutableBitSet groupKey,
+ RexNode predicate) {
+ if (rel instanceof HiveJoin) {
+ HiveJoin hjRel = (HiveJoin) rel;
+ //TODO: Improve this
+ if (hjRel.isLeftSemiJoin()) {
+ return RelMetadataQuery.getDistinctRowCount(hjRel.getLeft(), groupKey,
+ rel.getCluster().getRexBuilder().makeLiteral(true));
+ } else {
+ return RelMdUtil.getJoinDistinctRowCount(rel, rel.getJoinType(),
+ groupKey, predicate, true);
+ }
+ }
+
+ return RelMetadataQuery.getDistinctRowCount(rel, groupKey, predicate);
+ }
+
+ /*
+ * Favor Broad Plans over Deep Plans.
+ */
+ public RelOptCost getCumulativeCost(HiveJoin rel) {
+ RelOptCost cost = RelMetadataQuery.getNonCumulativeCost(rel);
+ List<RelNode> inputs = rel.getInputs();
+ RelOptCost maxICost = HiveCost.ZERO;
+ for (RelNode input : inputs) {
+ RelOptCost iCost = RelMetadataQuery.getCumulativeCost(input);
+ if (maxICost.isLt(iCost)) {
+ maxICost = iCost;
+ }
+ }
+ return cost.plus(maxICost);
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,439 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelVisitor;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.SemiJoin;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdRowCount;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+public class HiveRelMdRowCount extends RelMdRowCount {
+
+ protected static final Log LOG = LogFactory.getLog(HiveRelMdRowCount.class.getName());
+
+
+ public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider
+ .reflectiveSource(BuiltInMethod.ROW_COUNT.method, new HiveRelMdRowCount());
+
+ protected HiveRelMdRowCount() {
+ super();
+ }
+
+ public Double getRowCount(Join join) {
+ PKFKRelationInfo pkfk = analyzeJoinForPKFK(join);
+ if (pkfk != null) {
+ double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor);
+ selectivity = Math.min(1.0, selectivity);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Identified Primary - Foreign Key relation:");
+ LOG.debug(RelOptUtil.toString(join));
+ LOG.debug(pkfk);
+ }
+ return pkfk.fkInfo.rowCount * selectivity;
+ }
+ return join.getRows();
+ }
+
+ public Double getRowCount(SemiJoin rel) {
+ PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel);
+ if (pkfk != null) {
+ double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor);
+ selectivity = Math.min(1.0, selectivity);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Identified Primary - Foreign Key relation:");
+ LOG.debug(RelOptUtil.toString(rel));
+ LOG.debug(pkfk);
+ }
+ return pkfk.fkInfo.rowCount * selectivity;
+ }
+ return super.getRowCount(rel);
+ }
+
+ static class PKFKRelationInfo {
+ public final int fkSide;
+ public final double ndvScalingFactor;
+ public final FKSideInfo fkInfo;
+ public final PKSideInfo pkInfo;
+ public final boolean isPKSideSimple;
+
+ PKFKRelationInfo(int fkSide,
+ FKSideInfo fkInfo,
+ PKSideInfo pkInfo,
+ double ndvScalingFactor,
+ boolean isPKSideSimple) {
+ this.fkSide = fkSide;
+ this.fkInfo = fkInfo;
+ this.pkInfo = pkInfo;
+ this.ndvScalingFactor = ndvScalingFactor;
+ this.isPKSideSimple = isPKSideSimple;
+ }
+
+ public String toString() {
+ return String.format(
+ "Primary - Foreign Key join:\n\tfkSide = %d\n\tFKInfo:%s\n" +
+ "\tPKInfo:%s\n\tisPKSideSimple:%s\n\tNDV Scaling Factor:%.2f\n",
+ fkSide,
+ fkInfo,
+ pkInfo,
+ isPKSideSimple,
+ ndvScalingFactor);
+ }
+ }
+
+ static class FKSideInfo {
+ public final double rowCount;
+ public final double distinctCount;
+ public FKSideInfo(double rowCount, double distinctCount) {
+ this.rowCount = rowCount;
+ this.distinctCount = distinctCount;
+ }
+
+ public String toString() {
+ return String.format("FKInfo(rowCount=%.2f,ndv=%.2f)", rowCount, distinctCount);
+ }
+ }
+
+ static class PKSideInfo extends FKSideInfo {
+ public final double selectivity;
+ public PKSideInfo(double rowCount, double distinctCount, double selectivity) {
+ super(rowCount, distinctCount);
+ this.selectivity = selectivity;
+ }
+
+ public String toString() {
+ return String.format("PKInfo(rowCount=%.2f,ndv=%.2f,selectivity=%.2f)", rowCount, distinctCount,selectivity);
+ }
+ }
+
+ /*
+ * For T1 join T2 on T1.x = T2.y if we identify 'y' s a key of T2 then we can
+ * infer the join cardinality as: rowCount(T1) * selectivity(T2) i.e this is
+ * like a SemiJoin where the T1(Fact side/FK side) is filtered by a factor
+ * based on the Selectivity of the PK/Dim table side.
+ *
+ * 1. If both T1.x and T2.y are keys then use the larger one as the PK side.
+ * 2. In case of outer Joins: a) The FK side should be the Null Preserving
+ * side. It doesn't make sense to apply this heuristic in case of Dim loj Fact
+ * or Fact roj Dim b) The selectivity factor applied on the Fact Table should
+ * be 1.
+ */
+ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel) {
+
+ RelNode left = joinRel.getInputs().get(0);
+ RelNode right = joinRel.getInputs().get(1);
+
+ final List<RexNode> initJoinFilters = RelOptUtil.conjunctions(joinRel
+ .getCondition());
+
+ /*
+ * No joining condition.
+ */
+ if (initJoinFilters.isEmpty()) {
+ return null;
+ }
+
+ List<RexNode> leftFilters = new ArrayList<RexNode>();
+ List<RexNode> rightFilters = new ArrayList<RexNode>();
+ List<RexNode> joinFilters = new ArrayList<RexNode>(initJoinFilters);
+
+ // @todo: remove this. 8/28/14 hb
+ // for now adding because RelOptUtil.classifyFilters has an assertion about
+ // column counts that is not true for semiJoins.
+ if (joinRel instanceof SemiJoin) {
+ return null;
+ }
+
+ RelOptUtil.classifyFilters(joinRel, joinFilters, joinRel.getJoinType(),
+ false, !joinRel.getJoinType().generatesNullsOnRight(), !joinRel
+ .getJoinType().generatesNullsOnLeft(), joinFilters, leftFilters,
+ rightFilters);
+
+ Pair<Integer, Integer> joinCols = canHandleJoin(joinRel, leftFilters,
+ rightFilters, joinFilters);
+ if (joinCols == null) {
+ return null;
+ }
+ int leftColIdx = joinCols.left;
+ int rightColIdx = joinCols.right;
+
+ RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
+ RexNode leftPred = RexUtil
+ .composeConjunction(rexBuilder, leftFilters, true);
+ RexNode rightPred = RexUtil.composeConjunction(rexBuilder, rightFilters,
+ true);
+ ImmutableBitSet lBitSet = ImmutableBitSet.of(leftColIdx);
+ ImmutableBitSet rBitSet = ImmutableBitSet.of(rightColIdx);
+
+ /*
+ * If the form is Dim loj F or Fact roj Dim or Dim semij Fact then return
+ * null.
+ */
+ boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel
+ .getJoinType() == JoinRelType.RIGHT)
+ && !(joinRel instanceof SemiJoin) && isKey(lBitSet, left);
+ boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel
+ .getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right);
+
+ if (!leftIsKey && !rightIsKey) {
+ return null;
+ }
+
+ double leftRowCount = RelMetadataQuery.getRowCount(left);
+ double rightRowCount = RelMetadataQuery.getRowCount(right);
+
+ if (leftIsKey && rightIsKey) {
+ if (rightRowCount < leftRowCount) {
+ leftIsKey = false;
+ }
+ }
+
+ int pkSide = leftIsKey ? 0 : rightIsKey ? 1 : -1;
+
+ boolean isPKSideSimpleTree = pkSide != -1 ?
+ IsSimpleTreeOnJoinKey.check(
+ pkSide == 0 ? left : right,
+ pkSide == 0 ? leftColIdx : rightColIdx) : false;
+
+ double leftNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(left, lBitSet, leftPred) : -1;
+ double rightNDV = isPKSideSimpleTree ? RelMetadataQuery.getDistinctRowCount(right, rBitSet, rightPred) : -1;
+
+ /*
+ * If the ndv of the PK - FK side don't match, and the PK side is a filter
+ * on the Key column then scale the NDV on the FK side.
+ *
+ * As described by Peter Boncz: http://databasearchitects.blogspot.com/
+ * in such cases we can be off by a large margin in the Join cardinality
+ * estimate. The e.g. he provides is on the join of StoreSales and DateDim
+ * on the TPCDS dataset. Since the DateDim is populated for 20 years into
+ * the future, while the StoreSales only has 5 years worth of data, there
+ * are 40 times fewer distinct dates in StoreSales.
+ *
+ * In general it is hard to infer the range for the foreign key on an
+ * arbitrary expression. For e.g. the NDV for DayofWeek is the same
+ * irrespective of NDV on the number of unique days, whereas the
+ * NDV of Quarters has the same ratio as the NDV on the keys.
+ *
+ * But for expressions that apply only on columns that have the same NDV
+ * as the key (implying that they are alternate keys) we can apply the
+ * ratio. So in the case of StoreSales - DateDim joins for predicate on the
+ * d_date column we can apply the scaling factor.
+ */
+ double ndvScalingFactor = 1.0;
+ if ( isPKSideSimpleTree ) {
+ ndvScalingFactor = pkSide == 0 ? leftNDV/rightNDV : rightNDV / leftNDV;
+ }
+
+ if (pkSide == 0) {
+ FKSideInfo fkInfo = new FKSideInfo(rightRowCount,
+ rightNDV);
+ double pkSelectivity = pkSelectivity(joinRel, true, left, leftRowCount);
+ PKSideInfo pkInfo = new PKSideInfo(leftRowCount,
+ leftNDV,
+ joinRel.getJoinType().generatesNullsOnRight() ? 1.0 :
+ pkSelectivity);
+
+ return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree);
+ }
+
+ if (pkSide == 1) {
+ FKSideInfo fkInfo = new FKSideInfo(leftRowCount,
+ leftNDV);
+ double pkSelectivity = pkSelectivity(joinRel, false, right, rightRowCount);
+ PKSideInfo pkInfo = new PKSideInfo(rightRowCount,
+ rightNDV,
+ joinRel.getJoinType().generatesNullsOnLeft() ? 1.0 :
+ pkSelectivity);
+
+ return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree);
+ }
+
+ return null;
+ }
+
+ private static double pkSelectivity(Join joinRel, boolean leftChild,
+ RelNode child,
+ double childRowCount) {
+ if ((leftChild && joinRel.getJoinType().generatesNullsOnRight()) ||
+ (!leftChild && joinRel.getJoinType().generatesNullsOnLeft())) {
+ return 1.0;
+ } else {
+ HiveTableScan tScan = HiveRelMdUniqueKeys.getTableScan(child, true);
+ if (tScan != null) {
+ double tRowCount = RelMetadataQuery.getRowCount(tScan);
+ return childRowCount / tRowCount;
+ } else {
+ return 1.0;
+ }
+ }
+ }
+
+ private static boolean isKey(ImmutableBitSet c, RelNode rel) {
+ boolean isKey = false;
+ Set<ImmutableBitSet> keys = RelMetadataQuery.getUniqueKeys(rel);
+ if (keys != null) {
+ for (ImmutableBitSet key : keys) {
+ if (key.equals(c)) {
+ isKey = true;
+ break;
+ }
+ }
+ }
+ return isKey;
+ }
+
+ /*
+ * 1. Join condition must be an Equality Predicate.
+ * 2. both sides must reference 1 column.
+ * 3. If needed flip the columns.
+ */
+ private static Pair<Integer, Integer> canHandleJoin(Join joinRel,
+ List<RexNode> leftFilters, List<RexNode> rightFilters,
+ List<RexNode> joinFilters) {
+
+ /*
+ * If after classifying filters there is more than 1 joining predicate, we
+ * don't handle this. Return null.
+ */
+ if (joinFilters.size() != 1) {
+ return null;
+ }
+
+ RexNode joinCond = joinFilters.get(0);
+
+ int leftColIdx;
+ int rightColIdx;
+
+ if (!(joinCond instanceof RexCall)) {
+ return null;
+ }
+
+ if (((RexCall) joinCond).getOperator() != SqlStdOperatorTable.EQUALS) {
+ return null;
+ }
+
+ ImmutableBitSet leftCols = RelOptUtil.InputFinder.bits(((RexCall) joinCond).getOperands().get(0));
+ ImmutableBitSet rightCols = RelOptUtil.InputFinder.bits(((RexCall) joinCond).getOperands().get(1));
+
+ if (leftCols.cardinality() != 1 || rightCols.cardinality() != 1 ) {
+ return null;
+ }
+
+ int nFieldsLeft = joinRel.getLeft().getRowType().getFieldList().size();
+ int nFieldsRight = joinRel.getRight().getRowType().getFieldList().size();
+ int nSysFields = joinRel.getSystemFieldList().size();
+ ImmutableBitSet rightFieldsBitSet = ImmutableBitSet.range(nSysFields + nFieldsLeft,
+ nSysFields + nFieldsLeft + nFieldsRight);
+ /*
+ * flip column references if join condition specified in reverse order to
+ * join sources.
+ */
+ if (rightFieldsBitSet.contains(leftCols)) {
+ ImmutableBitSet t = leftCols;
+ leftCols = rightCols;
+ rightCols = t;
+ }
+
+ leftColIdx = leftCols.nextSetBit(0) - nSysFields;
+ rightColIdx = rightCols.nextSetBit(0) - (nSysFields + nFieldsLeft);
+
+ return new Pair<Integer, Integer>(leftColIdx, rightColIdx);
+ }
+
+ private static class IsSimpleTreeOnJoinKey extends RelVisitor {
+
+ int joinKey;
+ boolean simpleTree;
+
+ static boolean check(RelNode r, int joinKey) {
+ IsSimpleTreeOnJoinKey v = new IsSimpleTreeOnJoinKey(joinKey);
+ v.go(r);
+ return v.simpleTree;
+ }
+
+ IsSimpleTreeOnJoinKey(int joinKey) {
+ super();
+ this.joinKey = joinKey;
+ simpleTree = true;
+ }
+
+ @Override
+ public void visit(RelNode node, int ordinal, RelNode parent) {
+
+ if (node instanceof HepRelVertex) {
+ node = ((HepRelVertex) node).getCurrentRel();
+ }
+
+ if (node instanceof TableScan) {
+ simpleTree = true;
+ } else if (node instanceof Project) {
+ simpleTree = isSimple((Project) node);
+ } else if (node instanceof Filter) {
+ simpleTree = isSimple((Filter) node);
+ } else {
+ simpleTree = false;
+ }
+
+ if (simpleTree) {
+ super.visit(node, ordinal, parent);
+ }
+ }
+
+ private boolean isSimple(Project project) {
+ RexNode r = project.getProjects().get(joinKey);
+ if (r instanceof RexInputRef) {
+ joinKey = ((RexInputRef) r).getIndex();
+ return true;
+ }
+ return false;
+ }
+
+ private boolean isSimple(Filter filter) {
+ ImmutableBitSet condBits = RelOptUtil.InputFinder.bits(filter.getCondition());
+ return isKey(condBits, filter);
+ }
+
+ }
+
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,242 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdSelectivity;
+import org.apache.calcite.rel.metadata.RelMdUtil;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.Pair;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+import com.google.common.collect.ImmutableMap;
+
+public class HiveRelMdSelectivity extends RelMdSelectivity {
+ public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.SELECTIVITY.method,
+ new HiveRelMdSelectivity());
+
+ protected HiveRelMdSelectivity() {
+ super();
+ }
+
+ public Double getSelectivity(HiveTableScan t, RexNode predicate) {
+ if (predicate != null) {
+ FilterSelectivityEstimator filterSelEstmator = new FilterSelectivityEstimator(t);
+ return filterSelEstmator.estimateSelectivity(predicate);
+ }
+
+ return 1.0;
+ }
+
+ public Double getSelectivity(HiveJoin j, RexNode predicate) {
+ if (j.getJoinType().equals(JoinRelType.INNER)) {
+ return computeInnerJoinSelectivity(j, predicate);
+ }
+ return 1.0;
+ }
+
+ private Double computeInnerJoinSelectivity(HiveJoin j, RexNode predicate) {
+ double ndvCrossProduct = 1;
+ Pair<Boolean, RexNode> predInfo =
+ getCombinedPredicateForJoin(j, predicate);
+ if (!predInfo.getKey()) {
+ return
+ new FilterSelectivityEstimator(j).
+ estimateSelectivity(predInfo.getValue());
+ }
+
+ RexNode combinedPredicate = predInfo.getValue();
+ JoinPredicateInfo jpi = JoinPredicateInfo.constructJoinPredicateInfo(j,
+ combinedPredicate);
+ ImmutableMap.Builder<Integer, Double> colStatMapBuilder = ImmutableMap
+ .builder();
+ ImmutableMap<Integer, Double> colStatMap;
+ int rightOffSet = j.getLeft().getRowType().getFieldCount();
+
+ // 1. Update Col Stats Map with col stats for columns from left side of
+ // Join which are part of join keys
+ for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
+ colStatMapBuilder.put(ljk,
+ HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), ljk));
+ }
+
+ // 2. Update Col Stats Map with col stats for columns from right side of
+ // Join which are part of join keys
+ for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
+ colStatMapBuilder.put(rjk + rightOffSet,
+ HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), rjk));
+ }
+ colStatMap = colStatMapBuilder.build();
+
+ // 3. Walk through the Join Condition Building NDV for selectivity
+ // NDV of the join can not exceed the cardinality of cross join.
+ List<JoinLeafPredicateInfo> peLst = jpi.getEquiJoinPredicateElements();
+ int noOfPE = peLst.size();
+ if (noOfPE > 0) {
+ ndvCrossProduct = exponentialBackoff(peLst, colStatMap);
+
+ if (j.isLeftSemiJoin())
+ ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft()),
+ ndvCrossProduct);
+ else
+ ndvCrossProduct = Math.min(RelMetadataQuery.getRowCount(j.getLeft())
+ * RelMetadataQuery.getRowCount(j.getRight()), ndvCrossProduct);
+ }
+
+ // 4. Join Selectivity = 1/NDV
+ return (1 / ndvCrossProduct);
+ }
+
+ // 3.2 if conjunctive predicate elements are more than one, then walk
+ // through them one by one. Compute cross product of NDV. Cross product is
+ // computed by multiplying the largest NDV of all of the conjunctive
+ // predicate
+ // elements with degraded NDV of rest of the conjunctive predicate
+ // elements. NDV is
+ // degraded using log function.Finally the ndvCrossProduct is fenced at
+ // the join
+ // cross product to ensure that NDV can not exceed worst case join
+ // cardinality.<br>
+ // NDV of a conjunctive predicate element is the max NDV of all arguments
+ // to lhs, rhs expressions.
+ // NDV(JoinCondition) = min (left cardinality * right cardinality,
+ // ndvCrossProduct(JoinCondition))
+ // ndvCrossProduct(JoinCondition) = ndv(pex)*log(ndv(pe1))*log(ndv(pe2))
+ // where pex is the predicate element of join condition with max ndv.
+ // ndv(pe) = max(NDV(left.Expr), NDV(right.Expr))
+ // NDV(expr) = max(NDV( expr args))
+ protected double logSmoothing(List<JoinLeafPredicateInfo> peLst, ImmutableMap<Integer, Double> colStatMap) {
+ int noOfPE = peLst.size();
+ double ndvCrossProduct = getMaxNDVForJoinSelectivity(peLst.get(0), colStatMap);
+ if (noOfPE > 1) {
+ double maxNDVSoFar = ndvCrossProduct;
+ double ndvToBeSmoothed;
+ double tmpNDV;
+
+ for (int i = 1; i < noOfPE; i++) {
+ tmpNDV = getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap);
+ if (tmpNDV > maxNDVSoFar) {
+ ndvToBeSmoothed = maxNDVSoFar;
+ maxNDVSoFar = tmpNDV;
+ ndvCrossProduct = (ndvCrossProduct / ndvToBeSmoothed) * tmpNDV;
+ } else {
+ ndvToBeSmoothed = tmpNDV;
+ }
+ // TODO: revisit the fence
+ if (ndvToBeSmoothed > 3)
+ ndvCrossProduct *= Math.log(ndvToBeSmoothed);
+ else
+ ndvCrossProduct *= ndvToBeSmoothed;
+ }
+ }
+ return ndvCrossProduct;
+ }
+
+ /*
+ * a) Order predciates based on ndv in reverse order. b) ndvCrossProduct =
+ * ndv(pe0) * ndv(pe1) ^(1/2) * ndv(pe2) ^(1/4) * ndv(pe3) ^(1/8) ...
+ */
+ protected double exponentialBackoff(List<JoinLeafPredicateInfo> peLst,
+ ImmutableMap<Integer, Double> colStatMap) {
+ int noOfPE = peLst.size();
+ List<Double> ndvs = new ArrayList<Double>(noOfPE);
+ for (int i = 0; i < noOfPE; i++) {
+ ndvs.add(getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap));
+ }
+ Collections.sort(ndvs);
+ Collections.reverse(ndvs);
+ double ndvCrossProduct = 1.0;
+ for (int i = 0; i < ndvs.size(); i++) {
+ double n = Math.pow(ndvs.get(i), Math.pow(1 / 2.0, i));
+ ndvCrossProduct *= n;
+ }
+ return ndvCrossProduct;
+ }
+
+ /**
+ *
+ * @param j
+ * @param additionalPredicate
+ * @return if predicate is the join condition return (true, joinCond)
+ * else return (false, minusPred)
+ */
+ private Pair<Boolean,RexNode> getCombinedPredicateForJoin(HiveJoin j, RexNode additionalPredicate) {
+ RexNode minusPred = RelMdUtil.minusPreds(j.getCluster().getRexBuilder(), additionalPredicate,
+ j.getCondition());
+
+ if (minusPred != null) {
+ List<RexNode> minusList = new ArrayList<RexNode>();
+ minusList.add(j.getCondition());
+ minusList.add(minusPred);
+
+ return new Pair<Boolean,RexNode>(false, minusPred);
+ }
+
+ return new Pair<Boolean,RexNode>(true,j.getCondition());
+ }
+
+ /**
+ * Compute Max NDV to determine Join Selectivity.
+ *
+ * @param jlpi
+ * @param colStatMap
+ * Immutable Map of Projection Index (in Join Schema) to Column Stat
+ * @param rightProjOffSet
+ * @return
+ */
+ private static Double getMaxNDVForJoinSelectivity(JoinLeafPredicateInfo jlpi,
+ ImmutableMap<Integer, Double> colStatMap) {
+ Double maxNDVSoFar = 1.0;
+
+ maxNDVSoFar = getMaxNDVFromProjections(colStatMap,
+ jlpi.getProjsFromLeftPartOfJoinKeysInJoinSchema(), maxNDVSoFar);
+ maxNDVSoFar = getMaxNDVFromProjections(colStatMap,
+ jlpi.getProjsFromRightPartOfJoinKeysInJoinSchema(), maxNDVSoFar);
+
+ return maxNDVSoFar;
+ }
+
+ private static Double getMaxNDVFromProjections(Map<Integer, Double> colStatMap,
+ Set<Integer> projectionSet, Double defaultMaxNDV) {
+ Double colNDV = null;
+ Double maxNDVSoFar = defaultMaxNDV;
+
+ for (Integer projIndx : projectionSet) {
+ colNDV = colStatMap.get(projIndx);
+ if (colNDV > maxNDVSoFar)
+ maxNDVSoFar = colNDV;
+ }
+
+ return maxNDVSoFar;
+ }
+
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java?rev=1644224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java Tue Dec 9 23:02:22 2014
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.metadata.BuiltInMetadata;
+import org.apache.calcite.rel.metadata.Metadata;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdUniqueKeys;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BitSets;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+import com.google.common.base.Function;
+
+public class HiveRelMdUniqueKeys {
+
+ public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider
+ .reflectiveSource(BuiltInMethod.UNIQUE_KEYS.method,
+ new HiveRelMdUniqueKeys());
+
+ /*
+ * Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col)
+ * - min(col) = rowCount(col)
+ *
+ * Why are we intercepting Project and not TableScan? Because if we
+ * have a method for TableScan, it will not know which columns to check for.
+ * Inferring Uniqueness for all columns is very expensive right now. The flip
+ * side of doing this is, it only works post Field Trimming.
+ */
+ public Set<ImmutableBitSet> getUniqueKeys(Project rel, boolean ignoreNulls) {
+
+ HiveTableScan tScan = getTableScan(rel.getInput(), false);
+
+ if ( tScan == null ) {
+ Function<RelNode, Metadata> fn = RelMdUniqueKeys.SOURCE.apply(
+ rel.getClass(), BuiltInMetadata.UniqueKeys.class);
+ return ((BuiltInMetadata.UniqueKeys) fn.apply(rel))
+ .getUniqueKeys(ignoreNulls);
+ }
+
+ Map<Integer, Integer> posMap = new HashMap<Integer, Integer>();
+ int projectPos = 0;
+ int colStatsPos = 0;
+
+ BitSet projectedCols = new BitSet();
+ for (RexNode r : rel.getProjects()) {
+ if (r instanceof RexInputRef) {
+ projectedCols.set(((RexInputRef) r).getIndex());
+ posMap.put(colStatsPos, projectPos);
+ colStatsPos++;
+ }
+ projectPos++;
+ }
+
+ double numRows = tScan.getRows();
+ List<ColStatistics> colStats = tScan.getColStat(BitSets
+ .toList(projectedCols));
+ Set<ImmutableBitSet> keys = new HashSet<ImmutableBitSet>();
+
+ colStatsPos = 0;
+ for (ColStatistics cStat : colStats) {
+ boolean isKey = false;
+ if (cStat.getCountDistint() >= numRows) {
+ isKey = true;
+ }
+ if ( !isKey && cStat.getRange() != null &&
+ cStat.getRange().maxValue != null &&
+ cStat.getRange().minValue != null) {
+ double r = cStat.getRange().maxValue.doubleValue() -
+ cStat.getRange().minValue.doubleValue() + 1;
+ isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON);
+ }
+ if ( isKey ) {
+ ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos));
+ keys.add(key);
+ }
+ colStatsPos++;
+ }
+
+ return keys;
+ }
+
+ /*
+ * traverse a path of Filter, Projects to get to the TableScan.
+ * In case of Unique keys, stop if you reach a Project, it will be handled
+ * by the invocation on the Project.
+ * In case of getting the base rowCount of a Path, keep going past a Project.
+ */
+ static HiveTableScan getTableScan(RelNode r, boolean traverseProject) {
+
+ while (r != null && !(r instanceof HiveTableScan)) {
+ if (r instanceof HepRelVertex) {
+ r = ((HepRelVertex) r).getCurrentRel();
+ } else if (r instanceof Filter) {
+ r = ((Filter) r).getInput();
+ } else if (traverseProject && r instanceof Project) {
+ r = ((Project) r).getInput();
+ } else {
+ r = null;
+ }
+ }
+ return r == null ? null : (HiveTableScan) r;
+ }
+
+}