You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2017/01/31 22:18:08 UTC
[6/9] incubator-quickstep git commit: Push down low cost disjunctive
predicates to filter the stored relations early
Push down low cost disjunctive predicates to filter the stored relations early
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/259cd5e7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/259cd5e7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/259cd5e7
Branch: refs/heads/exact-filter
Commit: 259cd5e731ead6e38f546c66211aceb3c20f6f4d
Parents: 6d83b46
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Mon Jan 30 01:02:19 2017 -0600
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Tue Jan 31 10:59:08 2017 -0600
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 1 +
query_optimizer/PhysicalGenerator.cpp | 15 ++
query_optimizer/rules/CMakeLists.txt | 24 ++
.../PushDownLowCostDisjunctivePredicate.cpp | 225 +++++++++++++++++++
.../PushDownLowCostDisjunctivePredicate.hpp | 116 ++++++++++
5 files changed, 381 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index e8bc21c..0ca971d 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -207,6 +207,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_rules_AttachLIPFilters
quickstep_queryoptimizer_rules_PruneColumns
+ quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
quickstep_queryoptimizer_rules_ReorderColumns
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
quickstep_queryoptimizer_rules_SwapProbeBuild
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index e12f8be..bd05267 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -28,6 +28,7 @@
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/rules/AttachLIPFilters.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
+#include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
#include "query_optimizer/rules/ReorderColumns.hpp"
#include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
#include "query_optimizer/rules/SwapProbeBuild.hpp"
@@ -108,12 +109,22 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan(
P::PhysicalPtr PhysicalGenerator::optimizePlan() {
std::vector<std::unique_ptr<Rule<P::Physical>>> rules;
rules.emplace_back(new PruneColumns());
+
+ // TODO(jianqiao): It is possible for PushDownLowCostDisjunctivePredicate to
+ // generate two chaining Selection nodes that can actually be fused into one.
+ // Note that currently it is okay to have the two Selections because they are
+ // applied to a small cardinality stored relation, which is very light-weight.
+ // However it is better to have a FuseSelection optimization (or even a more
+ // general FusePhysical optimization) in the future.
+ rules.emplace_back(new PushDownLowCostDisjunctivePredicate());
+
if (FLAGS_reorder_hash_joins) {
rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
rules.emplace_back(new PruneColumns());
} else {
rules.emplace_back(new SwapProbeBuild());
}
+
if (FLAGS_reorder_columns) {
// NOTE(jianqiao): This optimization relies on the fact that the intermediate
// relations all have SPLIT_ROW_STORE layouts. If this fact gets changed, the
@@ -121,6 +132,10 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
// should be re-evaluated.
rules.emplace_back(new ReorderColumns());
}
+
+ // NOTE(jianqiao): Adding rules after AttachLIPFilters requires extra handling
+ // of LIPFilterConfiguration for transformed nodes. So currently it is suggested
+ // that all the new rules be placed before this point.
if (FLAGS_use_lip_filters) {
rules.emplace_back(new AttachLIPFilters());
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index fe2fd17..86d1ef7 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -24,6 +24,9 @@ add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp C
add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
add_library(quickstep_queryoptimizer_rules_PruneColumns PruneColumns.cpp PruneColumns.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownFilter PushDownFilter.cpp PushDownFilter.hpp)
+add_library(quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
+ PushDownLowCostDisjunctivePredicate.cpp
+ PushDownLowCostDisjunctivePredicate.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin PushDownSemiAntiJoin.cpp PushDownSemiAntiJoin.hpp)
add_library(quickstep_queryoptimizer_rules_ReorderColumns ReorderColumns.cpp ReorderColumns.hpp)
add_library(quickstep_queryoptimizer_rules_Rule ../../empty_src.cpp Rule.hpp)
@@ -111,6 +114,26 @@ target_link_libraries(quickstep_queryoptimizer_rules_PushDownFilter
quickstep_queryoptimizer_rules_RuleHelper
quickstep_queryoptimizer_rules_TopDownRule
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
+ ${GFLAGS_LIB_NAME}
+ quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_expressions_LogicalAnd
+ quickstep_queryoptimizer_expressions_LogicalOr
+ quickstep_queryoptimizer_expressions_PatternMatcher
+ quickstep_queryoptimizer_expressions_Predicate
+ quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_NestedLoopsJoin
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_Selection
+ quickstep_queryoptimizer_physical_TableReference
+ quickstep_queryoptimizer_physical_TopLevelPlan
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ExpressionUtil
@@ -225,6 +248,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
quickstep_queryoptimizer_rules_GenerateJoins
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_PushDownFilter
+ quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
quickstep_queryoptimizer_rules_ReorderColumns
quickstep_queryoptimizer_rules_Rule
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp
new file mode 100644
index 0000000..e39f155
--- /dev/null
+++ b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp
@@ -0,0 +1,225 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
+
+#include <cstddef>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/LogicalAnd.hpp"
+#include "query_optimizer/expressions/LogicalOr.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/NestedLoopsJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+
+#include "gflags/gflags.h"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+DEFINE_uint64(push_down_disjunctive_predicate_cardinality_threshold, 100u,
+ "The cardinality threshold for a stored relation for the "
+ "PushDownLowCostDisjunctivePredicate optimization rule to push "
+ "down a disjunctive predicate to pre-filter that relation.");
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr PushDownLowCostDisjunctivePredicate::apply(const P::PhysicalPtr &input) {
+ DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+
+ const P::TopLevelPlanPtr top_level_plan =
+ std::static_pointer_cast<const P::TopLevelPlan>(input);
+ cost_model_.reset(
+ new cost::StarSchemaSimpleCostModel(
+ top_level_plan->shared_subplans()));
+
+ collectApplicablePredicates(input);
+
+ if (!applicable_predicates_.empty()) {
+ // Apply the selected predicates to stored relations.
+ return attachPredicates(input);
+ } else {
+ return input;
+ }
+}
+
+void PushDownLowCostDisjunctivePredicate::collectApplicablePredicates(
+ const physical::PhysicalPtr &input) {
+ P::TableReferencePtr table_reference;
+ if (P::SomeTableReference::MatchesWithConditionalCast(input, &table_reference)) {
+ // Consider only stored relations with small cardinality as targets.
+ if (cost_model_->estimateCardinality(input) <=
+ FLAGS_push_down_disjunctive_predicate_cardinality_threshold) {
+ applicable_nodes_.emplace_back(input, &table_reference->attribute_list());
+ }
+ return;
+ }
+
+ for (const auto &child : input->children()) {
+ collectApplicablePredicates(child);
+ }
+
+ E::PredicatePtr filter_predicate = nullptr;
+ switch (input->getPhysicalType()) {
+ case P::PhysicalType::kAggregate: {
+ filter_predicate =
+ std::static_pointer_cast<const P::Aggregate>(input)->filter_predicate();
+ break;
+ }
+ case P::PhysicalType::kHashJoin: {
+ const P::HashJoinPtr hash_join =
+ std::static_pointer_cast<const P::HashJoin>(input);
+ if (hash_join->join_type() == P::HashJoin::JoinType::kInnerJoin) {
+ filter_predicate = hash_join->residual_predicate();
+ }
+ break;
+ }
+ case P::PhysicalType::kNestedLoopsJoin: {
+ filter_predicate =
+ std::static_pointer_cast<const P::NestedLoopsJoin>(input)->join_predicate();
+ break;
+ }
+ case P::PhysicalType::kSelection: {
+ filter_predicate =
+ std::static_pointer_cast<const P::Selection>(input)->filter_predicate();
+ break;
+ }
+ default:
+ break;
+ }
+
+ E::LogicalOrPtr disjunctive_predicate;
+ if (filter_predicate == nullptr ||
+ !E::SomeLogicalOr::MatchesWithConditionalCast(filter_predicate, &disjunctive_predicate)) {
+ return;
+ }
+
+ // Consider only disjunctive normal form, i.e. disjunction of conjunctions.
+ // Divide the disjunctive components into groups.
+ std::vector<std::vector<E::PredicatePtr>> candidate_predicates;
+ std::vector<std::vector<std::vector<E::AttributeReferencePtr>>> candidate_attributes;
+ for (const auto &conjunctive_predicate : disjunctive_predicate->operands()) {
+ candidate_predicates.emplace_back();
+ candidate_attributes.emplace_back();
+ E::LogicalAndPtr logical_and;
+ if (E::SomeLogicalAnd::MatchesWithConditionalCast(conjunctive_predicate, &logical_and)) {
+ for (const auto &predicate : logical_and->operands()) {
+ candidate_predicates.back().emplace_back(predicate);
+ candidate_attributes.back().emplace_back(
+ predicate->getReferencedAttributes());
+ }
+ } else {
+ candidate_predicates.back().emplace_back(conjunctive_predicate);
+ candidate_attributes.back().emplace_back(
+ conjunctive_predicate->getReferencedAttributes());
+ }
+ }
+
+ // Check whether the conditions are met for pushing down part of the predicates
+ // to each small-cardinality stored relation.
+ for (const auto &node_pair : applicable_nodes_) {
+ const std::vector<E::AttributeReferencePtr> &target_attributes = *node_pair.second;
+ std::vector<E::PredicatePtr> selected_disj_preds;
+ for (std::size_t i = 0; i < candidate_predicates.size(); ++i) {
+ const auto &cand_preds = candidate_predicates[i];
+ const auto &cand_attrs = candidate_attributes[i];
+
+ std::vector<E::PredicatePtr> selected_conj_preds;
+ for (std::size_t j = 0; j < cand_preds.size(); ++j) {
+ if (E::SubsetOfExpressions(cand_attrs[j], target_attributes)) {
+ selected_conj_preds.emplace_back(cand_preds[j]);
+ }
+ }
+ if (selected_conj_preds.empty()) {
+ // Not every disjunctive component contains a predicate that can be applied
+ // to the table reference node -- condition failed, exit.
+ selected_disj_preds.clear();
+ break;
+ } else {
+ selected_disj_preds.emplace_back(
+ CreateConjunctive(selected_conj_preds));
+ }
+ }
+ if (!selected_disj_preds.empty()) {
+ applicable_predicates_[node_pair.first].add(
+ CreateDisjunctive(selected_disj_preds));
+ }
+ }
+}
+
+P::PhysicalPtr PushDownLowCostDisjunctivePredicate::attachPredicates(
+ const P::PhysicalPtr &input) const {
+ std::vector<P::PhysicalPtr> new_children;
+ for (const P::PhysicalPtr &child : input->children()) {
+ const P::PhysicalPtr new_child = attachPredicates(child);
+ new_children.push_back(new_child);
+ }
+
+ const P::PhysicalPtr output =
+ new_children == input->children() ? input
+ : input->copyWithNewChildren(new_children);
+
+ const auto &node_it = applicable_predicates_.find(input);
+ if (node_it != applicable_predicates_.end()) {
+ const E::PredicatePtr filter_predicate =
+ CreateConjunctive(node_it->second.predicates);
+ return P::Selection::Create(output,
+ E::ToNamedExpressions(output->getOutputAttributes()),
+ filter_predicate);
+ }
+
+ return output;
+}
+
+E::PredicatePtr PushDownLowCostDisjunctivePredicate::CreateConjunctive(
+ const std::vector<E::PredicatePtr> predicates) {
+ DCHECK_GE(predicates.size(), 1u);
+ if (predicates.size() == 1) {
+ return predicates.front();
+ } else {
+ return E::LogicalAnd::Create(predicates);
+ }
+}
+
+E::PredicatePtr PushDownLowCostDisjunctivePredicate::CreateDisjunctive(
+ const std::vector<E::PredicatePtr> predicates) {
+ DCHECK_GE(predicates.size(), 1u);
+ if (predicates.size() == 1) {
+ return predicates.front();
+ } else {
+ return E::LogicalOr::Create(predicates);
+ }
+}
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp
new file mode 100644
index 0000000..3e4b602
--- /dev/null
+++ b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_PUSH_DOWN_LOW_COST_DISJUNCTIVE_PREDICATE_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_PUSH_DOWN_LOW_COST_DISJUNCTIVE_PREDICATE_HPP_
+
+#include <cstddef>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+/**
+ * @brief Rule that applies to a physical plan to push down low-cost disjunctive
+ * predicate when proper conditions are met.
+ *
+ * Here we elaborate the conditions.
+ *
+ * Let
+ * P = p_{1,1} AND ... AND p_{1, m_1} OR ... OR p_{n,1} AND ... AND p_{n, m_n}
+ * be a predicate in disjunctive normal form.
+ *
+ * Now consider each small-cardinality relation R, if for each i in 1..n, there
+ * exists at least one predicate p_{i, k_i} that is applicable to R. Then we can
+ * construct a new predicate
+ * P' = p_{1, k_1} OR ... OR p_{n, k_n}
+ * and push down P' to be applied to R.
+ *
+ * Also, if any conjunctive component in P contains more than one predicate that
+ * is applicable to R, then we can combine all these applicable predicates as a
+ * conjunctive component in P'.
+ *
+ * Finally, note that if there exists a conjunctive component that contains no
+ * predicate applicable to R. Then the condition fails and we cannot do a push
+ * down for R.
+ */
+class PushDownLowCostDisjunctivePredicate : public Rule<physical::Physical> {
+ public:
+ /**
+ * @brief Constructor.
+ */
+ PushDownLowCostDisjunctivePredicate() {}
+
+ ~PushDownLowCostDisjunctivePredicate() override {}
+
+ std::string getName() const override {
+ return "PushDownLowCostDisjunctivePredicate";
+ }
+
+ physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+ struct PredicateInfo {
+ PredicateInfo() {}
+ inline void add(expressions::PredicatePtr predicate) {
+ predicates.emplace_back(predicate);
+ }
+ std::vector<expressions::PredicatePtr> predicates;
+ };
+
+ void collectApplicablePredicates(const physical::PhysicalPtr &input);
+
+ physical::PhysicalPtr attachPredicates(const physical::PhysicalPtr &input) const;
+
+ static expressions::PredicatePtr CreateConjunctive(
+ const std::vector<expressions::PredicatePtr> predicates);
+
+ static expressions::PredicatePtr CreateDisjunctive(
+ const std::vector<expressions::PredicatePtr> predicates);
+
+ std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+
+ std::vector<std::pair<physical::PhysicalPtr,
+ const std::vector<expressions::AttributeReferencePtr> *>> applicable_nodes_;
+ std::map<physical::PhysicalPtr, PredicateInfo> applicable_predicates_;
+
+ DISALLOW_COPY_AND_ASSIGN(PushDownLowCostDisjunctivePredicate);
+};
+
+/** @} */
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_PUSH_DOWN_LOW_COST_DISJUNCTIVE_PREDICATE_HPP_