You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2017/01/31 17:44:25 UTC

[1/2] incubator-quickstep git commit: Push down low cost disjunctive predicates to filter the stored relations early [Forced Update!]

Repository: incubator-quickstep
Updated Branches:
  refs/heads/partitioned-select 81d63a9d8 -> ea1356b40 (forced update)


Push down low cost disjunctive predicates to filter the stored relations early


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/259cd5e7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/259cd5e7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/259cd5e7

Branch: refs/heads/partitioned-select
Commit: 259cd5e731ead6e38f546c66211aceb3c20f6f4d
Parents: 6d83b46
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Mon Jan 30 01:02:19 2017 -0600
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Tue Jan 31 10:59:08 2017 -0600

----------------------------------------------------------------------
 query_optimizer/CMakeLists.txt                  |   1 +
 query_optimizer/PhysicalGenerator.cpp           |  15 ++
 query_optimizer/rules/CMakeLists.txt            |  24 ++
 .../PushDownLowCostDisjunctivePredicate.cpp     | 225 +++++++++++++++++++
 .../PushDownLowCostDisjunctivePredicate.hpp     | 116 ++++++++++
 5 files changed, 381 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index e8bc21c..0ca971d 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -207,6 +207,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
                       quickstep_queryoptimizer_physical_Physical
                       quickstep_queryoptimizer_rules_AttachLIPFilters
                       quickstep_queryoptimizer_rules_PruneColumns
+                      quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
                       quickstep_queryoptimizer_rules_ReorderColumns
                       quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
                       quickstep_queryoptimizer_rules_SwapProbeBuild

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index e12f8be..bd05267 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -28,6 +28,7 @@
 #include "query_optimizer/physical/Physical.hpp"
 #include "query_optimizer/rules/AttachLIPFilters.hpp"
 #include "query_optimizer/rules/PruneColumns.hpp"
+#include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
 #include "query_optimizer/rules/ReorderColumns.hpp"
 #include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
 #include "query_optimizer/rules/SwapProbeBuild.hpp"
@@ -108,12 +109,22 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan(
 P::PhysicalPtr PhysicalGenerator::optimizePlan() {
   std::vector<std::unique_ptr<Rule<P::Physical>>> rules;
   rules.emplace_back(new PruneColumns());
+
+  // TODO(jianqiao): It is possible for PushDownLowCostDisjunctivePredicate to
+  // generate two chaining Selection nodes that can actually be fused into one.
+  // Note that currently it is okay to have the two Selections because they are
+  // applied to a small cardinality stored relation, which is very light-weight.
+  // However it is better to have a FuseSelection optimization (or even a more
+  // general FusePhysical optimization) in the future.
+  rules.emplace_back(new PushDownLowCostDisjunctivePredicate());
+
   if (FLAGS_reorder_hash_joins) {
     rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
     rules.emplace_back(new PruneColumns());
   } else {
     rules.emplace_back(new SwapProbeBuild());
   }
+
   if (FLAGS_reorder_columns) {
     // NOTE(jianqiao): This optimization relies on the fact that the intermediate
     // relations all have SPLIT_ROW_STORE layouts. If this fact gets changed, the
@@ -121,6 +132,10 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
     // should be re-evaluated.
     rules.emplace_back(new ReorderColumns());
   }
+
+  // NOTE(jianqiao): Adding rules after AttachLIPFilters requires extra handling
+  // of LIPFilterConfiguration for transformed nodes. So currently it is suggested
+  // that all the new rules be placed before this point.
   if (FLAGS_use_lip_filters) {
     rules.emplace_back(new AttachLIPFilters());
   }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index fe2fd17..86d1ef7 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -24,6 +24,9 @@ add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp C
 add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
 add_library(quickstep_queryoptimizer_rules_PruneColumns PruneColumns.cpp PruneColumns.hpp)
 add_library(quickstep_queryoptimizer_rules_PushDownFilter PushDownFilter.cpp PushDownFilter.hpp)
+add_library(quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
+            PushDownLowCostDisjunctivePredicate.cpp
+            PushDownLowCostDisjunctivePredicate.hpp)
 add_library(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin PushDownSemiAntiJoin.cpp PushDownSemiAntiJoin.hpp)
 add_library(quickstep_queryoptimizer_rules_ReorderColumns ReorderColumns.cpp ReorderColumns.hpp)
 add_library(quickstep_queryoptimizer_rules_Rule ../../empty_src.cpp Rule.hpp)
@@ -111,6 +114,26 @@ target_link_libraries(quickstep_queryoptimizer_rules_PushDownFilter
                       quickstep_queryoptimizer_rules_RuleHelper
                       quickstep_queryoptimizer_rules_TopDownRule
                       quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
+                      ${GFLAGS_LIB_NAME}
+                      quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+                      quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_queryoptimizer_expressions_ExpressionUtil
+                      quickstep_queryoptimizer_expressions_LogicalAnd
+                      quickstep_queryoptimizer_expressions_LogicalOr
+                      quickstep_queryoptimizer_expressions_PatternMatcher
+                      quickstep_queryoptimizer_expressions_Predicate
+                      quickstep_queryoptimizer_physical_Aggregate
+                      quickstep_queryoptimizer_physical_HashJoin
+                      quickstep_queryoptimizer_physical_NestedLoopsJoin
+                      quickstep_queryoptimizer_physical_PatternMatcher
+                      quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_physical_PhysicalType
+                      quickstep_queryoptimizer_physical_Selection
+                      quickstep_queryoptimizer_physical_TableReference
+                      quickstep_queryoptimizer_physical_TopLevelPlan
+                      quickstep_queryoptimizer_rules_Rule
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
                       quickstep_queryoptimizer_expressions_AttributeReference
                       quickstep_queryoptimizer_expressions_ExpressionUtil
@@ -225,6 +248,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
                       quickstep_queryoptimizer_rules_GenerateJoins
                       quickstep_queryoptimizer_rules_PruneColumns
                       quickstep_queryoptimizer_rules_PushDownFilter
+                      quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
                       quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
                       quickstep_queryoptimizer_rules_ReorderColumns
                       quickstep_queryoptimizer_rules_Rule

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp
new file mode 100644
index 0000000..e39f155
--- /dev/null
+++ b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp
@@ -0,0 +1,225 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
+
+#include <cstddef>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/LogicalAnd.hpp"
+#include "query_optimizer/expressions/LogicalOr.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/NestedLoopsJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+
+#include "gflags/gflags.h"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+DEFINE_uint64(push_down_disjunctive_predicate_cardinality_threshold, 100u,
+              "The cardinality threshold for a stored relation for the "
+              "PushDownLowCostDisjunctivePredicate optimization rule to push "
+              "down a disjunctive predicate to pre-filter that relation.");
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr PushDownLowCostDisjunctivePredicate::apply(const P::PhysicalPtr &input) {
+  DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+
+  const P::TopLevelPlanPtr top_level_plan =
+     std::static_pointer_cast<const P::TopLevelPlan>(input);
+  cost_model_.reset(
+      new cost::StarSchemaSimpleCostModel(
+          top_level_plan->shared_subplans()));
+
+  collectApplicablePredicates(input);
+
+  if (!applicable_predicates_.empty()) {
+    // Apply the selected predicates to stored relations.
+    return attachPredicates(input);
+  } else {
+    return input;
+  }
+}
+
+void PushDownLowCostDisjunctivePredicate::collectApplicablePredicates(
+    const physical::PhysicalPtr &input) {
+  P::TableReferencePtr table_reference;
+  if (P::SomeTableReference::MatchesWithConditionalCast(input, &table_reference)) {
+    // Consider only stored relations with small cardinality as targets.
+    if (cost_model_->estimateCardinality(input) <=
+            FLAGS_push_down_disjunctive_predicate_cardinality_threshold) {
+      applicable_nodes_.emplace_back(input, &table_reference->attribute_list());
+    }
+    return;
+  }
+
+  for (const auto &child : input->children()) {
+    collectApplicablePredicates(child);
+  }
+
+  E::PredicatePtr filter_predicate = nullptr;
+  switch (input->getPhysicalType()) {
+    case P::PhysicalType::kAggregate: {
+      filter_predicate =
+          std::static_pointer_cast<const P::Aggregate>(input)->filter_predicate();
+      break;
+    }
+    case P::PhysicalType::kHashJoin: {
+      const P::HashJoinPtr hash_join =
+          std::static_pointer_cast<const P::HashJoin>(input);
+      if (hash_join->join_type() == P::HashJoin::JoinType::kInnerJoin) {
+        filter_predicate = hash_join->residual_predicate();
+      }
+      break;
+    }
+    case P::PhysicalType::kNestedLoopsJoin: {
+      filter_predicate =
+          std::static_pointer_cast<const P::NestedLoopsJoin>(input)->join_predicate();
+      break;
+    }
+    case P::PhysicalType::kSelection: {
+      filter_predicate =
+          std::static_pointer_cast<const P::Selection>(input)->filter_predicate();
+      break;
+    }
+    default:
+      break;
+  }
+
+  E::LogicalOrPtr disjunctive_predicate;
+  if (filter_predicate == nullptr ||
+      !E::SomeLogicalOr::MatchesWithConditionalCast(filter_predicate, &disjunctive_predicate)) {
+    return;
+  }
+
+  // Consider only disjunctive normal form, i.e. disjunction of conjunctions.
+  // Divide the disjunctive components into groups.
+  std::vector<std::vector<E::PredicatePtr>> candidate_predicates;
+  std::vector<std::vector<std::vector<E::AttributeReferencePtr>>> candidate_attributes;
+  for (const auto &conjunctive_predicate : disjunctive_predicate->operands()) {
+    candidate_predicates.emplace_back();
+    candidate_attributes.emplace_back();
+    E::LogicalAndPtr logical_and;
+    if (E::SomeLogicalAnd::MatchesWithConditionalCast(conjunctive_predicate, &logical_and)) {
+      for (const auto &predicate : logical_and->operands()) {
+        candidate_predicates.back().emplace_back(predicate);
+        candidate_attributes.back().emplace_back(
+            predicate->getReferencedAttributes());
+      }
+    } else {
+      candidate_predicates.back().emplace_back(conjunctive_predicate);
+      candidate_attributes.back().emplace_back(
+          conjunctive_predicate->getReferencedAttributes());
+    }
+  }
+
+  // Check whether the conditions are met for pushing down part of the predicates
+  // to each small-cardinality stored relation.
+  for (const auto &node_pair : applicable_nodes_) {
+    const std::vector<E::AttributeReferencePtr> &target_attributes = *node_pair.second;
+    std::vector<E::PredicatePtr> selected_disj_preds;
+    for (std::size_t i = 0; i < candidate_predicates.size(); ++i) {
+      const auto &cand_preds = candidate_predicates[i];
+      const auto &cand_attrs = candidate_attributes[i];
+
+      std::vector<E::PredicatePtr> selected_conj_preds;
+      for (std::size_t j = 0; j < cand_preds.size(); ++j) {
+        if (E::SubsetOfExpressions(cand_attrs[j], target_attributes)) {
+          selected_conj_preds.emplace_back(cand_preds[j]);
+        }
+      }
+      if (selected_conj_preds.empty()) {
+        // Not every disjunctive component contains a predicate that can be applied
+        // to the table reference node -- condition failed, exit.
+        selected_disj_preds.clear();
+        break;
+      } else {
+        selected_disj_preds.emplace_back(
+            CreateConjunctive(selected_conj_preds));
+      }
+    }
+    if (!selected_disj_preds.empty()) {
+      applicable_predicates_[node_pair.first].add(
+          CreateDisjunctive(selected_disj_preds));
+    }
+  }
+}
+
+P::PhysicalPtr PushDownLowCostDisjunctivePredicate::attachPredicates(
+    const P::PhysicalPtr &input) const {
+  std::vector<P::PhysicalPtr> new_children;
+  for (const P::PhysicalPtr &child : input->children()) {
+    const P::PhysicalPtr new_child = attachPredicates(child);
+    new_children.push_back(new_child);
+  }
+
+  const P::PhysicalPtr output =
+      new_children == input->children() ? input
+                                        : input->copyWithNewChildren(new_children);
+
+  const auto &node_it = applicable_predicates_.find(input);
+  if (node_it != applicable_predicates_.end()) {
+    const E::PredicatePtr filter_predicate =
+        CreateConjunctive(node_it->second.predicates);
+    return P::Selection::Create(output,
+                                E::ToNamedExpressions(output->getOutputAttributes()),
+                                filter_predicate);
+  }
+
+  return output;
+}
+
+E::PredicatePtr PushDownLowCostDisjunctivePredicate::CreateConjunctive(
+    const std::vector<E::PredicatePtr> predicates) {
+  DCHECK_GE(predicates.size(), 1u);
+  if (predicates.size() == 1) {
+    return predicates.front();
+  } else {
+    return E::LogicalAnd::Create(predicates);
+  }
+}
+
+E::PredicatePtr PushDownLowCostDisjunctivePredicate::CreateDisjunctive(
+    const std::vector<E::PredicatePtr> predicates) {
+  DCHECK_GE(predicates.size(), 1u);
+  if (predicates.size() == 1) {
+    return predicates.front();
+  } else {
+    return E::LogicalOr::Create(predicates);
+  }
+}
+
+}  // namespace optimizer
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp
new file mode 100644
index 0000000..3e4b602
--- /dev/null
+++ b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_PUSH_DOWN_LOW_COST_DISJUNCTIVE_PREDICATE_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_PUSH_DOWN_LOW_COST_DISJUNCTIVE_PREDICATE_HPP_
+
+#include <cstddef>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ *  @{
+ */
+
+/**
+ * @brief Rule that applies to a physical plan to push down low-cost disjunctive
+ *        predicate when proper conditions are met.
+ *
+ * Here we elaborate the conditions.
+ *
+ * Let
+ *   P = p_{1,1} AND ... AND p_{1, m_1} OR ... OR p_{n,1} AND ... AND p_{n, m_n}
+ * be a predicate in disjunctive normal form.
+ *
+ * Now consider each small-cardinality relation R, if for each i in 1..n, there
+ * exists at least one predicate p_{i, k_i} that is applicable to R. Then we can
+ * construct a new predicate
+ *   P' = p_{1, k_1} OR ... OR p_{n, k_n}
+ * and push down P' to be applied to R.
+ *
+ * Also, if any conjunctive component in P contains more than one predicate that
+ * is applicable to R, then we can combine all these applicable predicates as a
+ * conjunctive component in P'.
+ *
+ * Finally, note that if there exists a conjunctive component that contains no
+ * predicate applicable to R. Then the condition fails and we cannot do a push
+ * down for R.
+ */
+class PushDownLowCostDisjunctivePredicate : public Rule<physical::Physical> {
+ public:
+  /**
+   * @brief Constructor.
+   */
+  PushDownLowCostDisjunctivePredicate() {}
+
+  ~PushDownLowCostDisjunctivePredicate() override {}
+
+  std::string getName() const override {
+    return "PushDownLowCostDisjunctivePredicate";
+  }
+
+  physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+  struct PredicateInfo {
+    PredicateInfo() {}
+    inline void add(expressions::PredicatePtr predicate) {
+      predicates.emplace_back(predicate);
+    }
+    std::vector<expressions::PredicatePtr> predicates;
+  };
+
+  void collectApplicablePredicates(const physical::PhysicalPtr &input);
+
+  physical::PhysicalPtr attachPredicates(const physical::PhysicalPtr &input) const;
+
+  static expressions::PredicatePtr CreateConjunctive(
+      const std::vector<expressions::PredicatePtr> predicates);
+
+  static expressions::PredicatePtr CreateDisjunctive(
+      const std::vector<expressions::PredicatePtr> predicates);
+
+  std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+
+  std::vector<std::pair<physical::PhysicalPtr,
+                        const std::vector<expressions::AttributeReferencePtr> *>> applicable_nodes_;
+  std::map<physical::PhysicalPtr, PredicateInfo> applicable_predicates_;
+
+  DISALLOW_COPY_AND_ASSIGN(PushDownLowCostDisjunctivePredicate);
+};
+
+/** @} */
+
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_QUERY_OPTIMIZER_RULES_PUSH_DOWN_LOW_COST_DISJUNCTIVE_PREDICATE_HPP_


[2/2] incubator-quickstep git commit: Simplified the SelectOperator w/ partitions.

Posted by zu...@apache.org.
Simplified the SelectOperator w/ partitions.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/ea1356b4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/ea1356b4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/ea1356b4

Branch: refs/heads/partitioned-select
Commit: ea1356b40856e411ba1e8e9478b797d651b5aaaa
Parents: 259cd5e
Author: Zuyu Zhang <zu...@apache.org>
Authored: Mon Jan 30 00:35:12 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Tue Jan 31 09:44:18 2017 -0800

----------------------------------------------------------------------
 query_optimizer/ExecutionGenerator.cpp  | 30 +++++++---
 relational_operators/SelectOperator.cpp | 84 +++++++++++-----------------
 relational_operators/SelectOperator.hpp | 67 ++++++++++------------
 3 files changed, 84 insertions(+), 97 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ea1356b4/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index e25b8ad..eb80cd4 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -546,6 +546,13 @@ void ExecutionGenerator::convertSelection(
   const CatalogRelationInfo *input_relation_info =
       findRelationInfoOutputByPhysical(physical_selection->input());
   DCHECK(input_relation_info != nullptr);
+  const CatalogRelation &input_relation = *input_relation_info->relation;
+  const PartitionScheme *input_partition_scheme = input_relation.getPartitionScheme();
+
+  const std::size_t num_partitions =
+      input_partition_scheme
+          ? input_partition_scheme->getPartitionSchemeHeader().getNumPartitions()
+          : 1u;
 
   // Use the "simple" form of the selection operator (a pure projection that
   // doesn't require any expression evaluation or intermediate copies) if
@@ -554,19 +561,21 @@ void ExecutionGenerator::convertSelection(
   SelectOperator *op =
       convertSimpleProjection(project_expressions_group_index, &attributes)
           ? new SelectOperator(query_handle_->query_id(),
-                               *input_relation_info->relation,
+                               input_relation,
                                *output_relation,
                                insert_destination_index,
                                execution_predicate_index,
                                move(attributes),
-                               input_relation_info->isStoredRelation())
+                               input_relation_info->isStoredRelation(),
+                               num_partitions)
           : new SelectOperator(query_handle_->query_id(),
-                               *input_relation_info->relation,
+                               input_relation,
                                *output_relation,
                                insert_destination_index,
                                execution_predicate_index,
                                project_expressions_group_index,
-                               input_relation_info->isStoredRelation());
+                               input_relation_info->isStoredRelation(),
+                               num_partitions);
 
   const QueryPlan::DAGNodeIndex select_index =
       execution_plan_->addRelationalOperator(op);
@@ -1248,7 +1257,13 @@ void ExecutionGenerator::convertInsertSelection(
 
   const CatalogRelationInfo *selection_relation_info =
       findRelationInfoOutputByPhysical(physical_plan->selection());
-  const CatalogRelation *selection_relation = selection_relation_info->relation;
+  const CatalogRelation &selection_relation = *selection_relation_info->relation;
+  const PartitionScheme *selection_partition_scheme = selection_relation.getPartitionScheme();
+
+  const std::size_t num_partitions =
+      selection_partition_scheme
+          ? selection_partition_scheme->getPartitionSchemeHeader().getNumPartitions()
+          : 1u;
 
   // Prepare the attributes, which are output columns of the selection relation.
   std::vector<attribute_id> attributes;
@@ -1269,12 +1284,13 @@ void ExecutionGenerator::convertInsertSelection(
   // physical plan by modifying class Physical.
   SelectOperator *insert_selection_op =
       new SelectOperator(query_handle_->query_id(),
-                         *selection_relation,
+                         selection_relation,
                          destination_relation,
                          insert_destination_index,
                          QueryContext::kInvalidPredicateId,
                          move(attributes),
-                         selection_relation_info->isStoredRelation());
+                         selection_relation_info->isStoredRelation(),
+                         num_partitions);
 
   const QueryPlan::DAGNodeIndex insert_selection_index =
       execution_plan_->addRelationalOperator(insert_selection_op);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ea1356b4/relational_operators/SelectOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/SelectOperator.cpp b/relational_operators/SelectOperator.cpp
index 5419cf8..b63f0be 100644
--- a/relational_operators/SelectOperator.cpp
+++ b/relational_operators/SelectOperator.cpp
@@ -66,64 +66,40 @@ bool SelectOperator::getAllWorkOrders(
       return true;
     }
 
-    if (input_relation_.hasPartitionScheme()) {
-      for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
-        for (const block_id input_block_id : input_relation_block_ids_in_partition_[part_id]) {
-          numa_node_id numa_node = 0;
+    for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+      for (const block_id input_block_id : input_relation_block_ids_[part_id]) {
+        numa_node_id numa_node = 0;
 #ifdef QUICKSTEP_HAVE_LIBNUMA
-          if (input_relation_.hasNUMAPlacementScheme()) {
-            numa_node = placement_scheme_->getNUMANodeForBlock(input_block_id);
-          }
-#endif  // QUICKSTEP_HAVE_LIBNUMA
-          container->addNormalWorkOrder(
-              new SelectWorkOrder(query_id_, input_relation_, input_block_id, predicate, simple_projection_,
-                                  simple_selection_, selection, output_destination, storage_manager,
-                                  CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context), numa_node),
-              op_index_);
+        if (input_relation_.hasNUMAPlacementScheme()) {
+          numa_node = placement_scheme_->getNUMANodeForBlock(input_block_id);
         }
-      }
-    } else {
-      for (const block_id input_block_id : input_relation_block_ids_) {
+#endif  // QUICKSTEP_HAVE_LIBNUMA
         container->addNormalWorkOrder(
             new SelectWorkOrder(query_id_, input_relation_, input_block_id, predicate, simple_projection_,
                                 simple_selection_, selection, output_destination, storage_manager,
-                                CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context)),
+                                CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context), numa_node),
             op_index_);
       }
     }
     started_ = true;
     return true;
   } else {
-    if (input_relation_.hasPartitionScheme()) {
-      for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
-        while (num_workorders_generated_in_partition_[part_id] <
-               input_relation_block_ids_in_partition_[part_id].size()) {
-          const block_id block_in_partition
-              = input_relation_block_ids_in_partition_[part_id][num_workorders_generated_in_partition_[part_id]];
-
-          numa_node_id numa_node = 0;
+    for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+      while (num_workorders_generated_[part_id] < input_relation_block_ids_[part_id].size()) {
+        const block_id block = input_relation_block_ids_[part_id][num_workorders_generated_[part_id]];
+
+        numa_node_id numa_node = 0;
 #ifdef QUICKSTEP_HAVE_LIBNUMA
-          if (input_relation_.hasNUMAPlacementScheme()) {
-            numa_node = placement_scheme_->getNUMANodeForBlock(block_in_partition);
-          }
-#endif  // QUICKSTEP_HAVE_LIBNUMA
-          container->addNormalWorkOrder(
-              new SelectWorkOrder(query_id_, input_relation_, block_in_partition, predicate, simple_projection_,
-                                  simple_selection_, selection, output_destination, storage_manager,
-                                  CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context), numa_node),
-              op_index_);
-          ++num_workorders_generated_in_partition_[part_id];
+        if (input_relation_.hasNUMAPlacementScheme()) {
+          numa_node = placement_scheme_->getNUMANodeForBlock(block);
         }
-      }
-    } else {
-      while (num_workorders_generated_ < input_relation_block_ids_.size()) {
+#endif  // QUICKSTEP_HAVE_LIBNUMA
         container->addNormalWorkOrder(
-            new SelectWorkOrder(query_id_, input_relation_, input_relation_block_ids_[num_workorders_generated_],
-                                predicate, simple_projection_, simple_selection_, selection, output_destination,
-                                storage_manager,
-                                CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context)),
+            new SelectWorkOrder(query_id_, input_relation_, block, predicate, simple_projection_,
+                                simple_selection_, selection, output_destination, storage_manager,
+                                CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context), numa_node),
             op_index_);
-        ++num_workorders_generated_;
+        ++num_workorders_generated_[part_id];
       }
     }
     return done_feeding_input_relation_;
@@ -132,19 +108,25 @@ bool SelectOperator::getAllWorkOrders(
 
 bool SelectOperator::getAllWorkOrderProtos(WorkOrderProtosContainer *container) {
   if (input_relation_is_stored_) {
-    if (!started_) {
-      for (const block_id input_block_id : input_relation_block_ids_) {
+    if (started_) {
+      return true;
+    }
+
+    for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+      for (const block_id input_block_id : input_relation_block_ids_[part_id]) {
         container->addWorkOrderProto(createWorkOrderProto(input_block_id), op_index_);
       }
-      started_ = true;
     }
+    started_ = true;
     return true;
   } else {
-    while (num_workorders_generated_ < input_relation_block_ids_.size()) {
-      container->addWorkOrderProto(
-          createWorkOrderProto(input_relation_block_ids_[num_workorders_generated_]),
-          op_index_);
-      ++num_workorders_generated_;
+    for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+      while (num_workorders_generated_[part_id] < input_relation_block_ids_[part_id].size()) {
+        container->addWorkOrderProto(
+            createWorkOrderProto(input_relation_block_ids_[part_id][num_workorders_generated_[part_id]]),
+            op_index_);
+        ++num_workorders_generated_[part_id];
+      }
     }
     return done_feeding_input_relation_;
   }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ea1356b4/relational_operators/SelectOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/SelectOperator.hpp b/relational_operators/SelectOperator.hpp
index 79ab37f..b9a4d49 100644
--- a/relational_operators/SelectOperator.hpp
+++ b/relational_operators/SelectOperator.hpp
@@ -85,6 +85,8 @@ class SelectOperator : public RelationalOperator {
    * @param input_relation_is_stored If input_relation is a stored relation and
    *        is fully available to the operator before it can start generating
    *        workorders.
+   * @param num_partitions The number of partitions in 'input_relation'.
+   *        If no partitions, it is one.
    **/
   SelectOperator(
       const std::size_t query_id,
@@ -93,36 +95,33 @@ class SelectOperator : public RelationalOperator {
       const QueryContext::insert_destination_id output_destination_index,
       const QueryContext::predicate_id predicate_index,
       const QueryContext::scalar_group_id selection_index,
-      const bool input_relation_is_stored)
+      const bool input_relation_is_stored,
+      const std::size_t num_partitions)
       : RelationalOperator(query_id),
         input_relation_(input_relation),
         output_relation_(output_relation),
         output_destination_index_(output_destination_index),
         predicate_index_(predicate_index),
         selection_index_(selection_index),
-        num_workorders_generated_(0),
+        num_partitions_(num_partitions),
+        input_relation_block_ids_(num_partitions),
+        num_workorders_generated_(num_partitions),
         simple_projection_(false),
         input_relation_is_stored_(input_relation_is_stored),
         started_(false) {
 #ifdef QUICKSTEP_HAVE_LIBNUMA
     placement_scheme_ = input_relation.getNUMAPlacementSchemePtr();
 #endif
-    if (input_relation.hasPartitionScheme()) {
-      const PartitionScheme &part_scheme = *input_relation.getPartitionScheme();
-      num_partitions_ = part_scheme.getPartitionSchemeHeader().getNumPartitions();
+    if (input_relation_is_stored) {
+      if (input_relation.hasPartitionScheme()) {
+        const PartitionScheme &part_scheme = *input_relation.getPartitionScheme();
 
-      num_workorders_generated_in_partition_.resize(num_partitions_);
-
-      if (input_relation_is_stored) {
         for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
-          input_relation_block_ids_in_partition_.push_back(
-              part_scheme.getBlocksInPartition(part_id));
+          input_relation_block_ids_[part_id] = part_scheme.getBlocksInPartition(part_id);
         }
       } else {
-        input_relation_block_ids_in_partition_.resize(num_partitions_);
+        input_relation_block_ids_[0] = input_relation.getBlocksSnapshot();
       }
-    } else if (input_relation_is_stored) {
-      input_relation_block_ids_ = input_relation.getBlocksSnapshot();
     }
   }
 
@@ -144,6 +143,8 @@ class SelectOperator : public RelationalOperator {
    * @param input_relation_is_stored If input_relation is a stored relation and
    *        is fully available to the operator before it can start generating
    *        workorders.
+   * @param num_partitions The number of partitions in 'input_relation'.
+   *        If no partitions, it is one.
    **/
   SelectOperator(
       const std::size_t query_id,
@@ -152,7 +153,8 @@ class SelectOperator : public RelationalOperator {
       const QueryContext::insert_destination_id output_destination_index,
       const QueryContext::predicate_id predicate_index,
       std::vector<attribute_id> &&selection,
-      const bool input_relation_is_stored)
+      const bool input_relation_is_stored,
+      const std::size_t num_partitions)
       : RelationalOperator(query_id),
         input_relation_(input_relation),
         output_relation_(output_relation),
@@ -160,29 +162,25 @@ class SelectOperator : public RelationalOperator {
         predicate_index_(predicate_index),
         selection_index_(QueryContext::kInvalidScalarGroupId),
         simple_selection_(std::move(selection)),
-        num_workorders_generated_(0),
+        num_partitions_(num_partitions),
+        input_relation_block_ids_(num_partitions),
+        num_workorders_generated_(num_partitions),
         simple_projection_(true),
         input_relation_is_stored_(input_relation_is_stored),
         started_(false) {
 #ifdef QUICKSTEP_HAVE_LIBNUMA
     placement_scheme_ = input_relation.getNUMAPlacementSchemePtr();
 #endif
-    if (input_relation.hasPartitionScheme()) {
-      const PartitionScheme &part_scheme = *input_relation.getPartitionScheme();
-      num_partitions_ = part_scheme.getPartitionSchemeHeader().getNumPartitions();
-
-      num_workorders_generated_in_partition_.resize(num_partitions_);
+    if (input_relation_is_stored) {
+      if (input_relation.hasPartitionScheme()) {
+        const PartitionScheme &part_scheme = *input_relation.getPartitionScheme();
 
-      if (input_relation_is_stored) {
         for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
-          input_relation_block_ids_in_partition_.push_back(
-              part_scheme.getBlocksInPartition(part_id));
+          input_relation_block_ids_[part_id] = part_scheme.getBlocksInPartition(part_id);
         }
       } else {
-        input_relation_block_ids_in_partition_.resize(num_partitions_);
+        input_relation_block_ids_[0] = input_relation.getBlocksSnapshot();
       }
-    } else if (input_relation_is_stored) {
-      input_relation_block_ids_ = input_relation.getBlocksSnapshot();
     }
   }
 
@@ -206,11 +204,7 @@ class SelectOperator : public RelationalOperator {
 
   void feedInputBlock(const block_id input_block_id, const relation_id input_relation_id,
                       const partition_id part_id) override {
-    if (input_relation_.hasPartitionScheme()) {
-      input_relation_block_ids_in_partition_[part_id].push_back(input_block_id);
-    } else {
-      input_relation_block_ids_.push_back(input_block_id);
-    }
+    input_relation_block_ids_[part_id].push_back(input_block_id);
   }
 
   QueryContext::insert_destination_id getInsertDestinationID() const override {
@@ -237,17 +231,12 @@ class SelectOperator : public RelationalOperator {
   const QueryContext::scalar_group_id selection_index_;
   const std::vector<attribute_id> simple_selection_;
 
-  std::vector<block_id> input_relation_block_ids_;
-  // A single workorder is generated for each block of input relation.
-  std::vector<block_id>::size_type num_workorders_generated_;
-
-  // Used for the partition case only.
+  const std::size_t num_partitions_;
   // A vector of vectors V where V[i] indicates the list of block IDs of the
   // input relation that belong to the partition i.
-  std::vector<std::vector<block_id>> input_relation_block_ids_in_partition_;
+  std::vector<std::vector<block_id>> input_relation_block_ids_;
   // A single workorder is generated for each block in each partition of input relation.
-  std::vector<std::size_t> num_workorders_generated_in_partition_;
-  std::size_t num_partitions_;
+  std::vector<std::size_t> num_workorders_generated_;
 
   const bool simple_projection_;
   const bool input_relation_is_stored_;