You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ha...@apache.org on 2016/08/01 22:25:22 UTC

[1/2] incubator-quickstep git commit: Implemented hashjoin optimization class and removed the logic from ExecutionGenerator.

Repository: incubator-quickstep
Updated Branches:
  refs/heads/refactor-hashjoin-probe-build [created] a0905647b


Implemented hashjoin optimization class and removed the logic from ExecutionGenerator.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/372902dd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/372902dd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/372902dd

Branch: refs/heads/refactor-hashjoin-probe-build
Commit: 372902dd706f9e328dde3709532b5fbf111fdf24
Parents: 260b862
Author: Hakan Memisoglu <ha...@gmail.com>
Authored: Mon Aug 1 16:39:07 2016 -0500
Committer: Hakan Memisoglu <ha...@gmail.com>
Committed: Mon Aug 1 16:39:07 2016 -0500

----------------------------------------------------------------------
 query_optimizer/CMakeLists.txt           |  1 +
 query_optimizer/ExecutionGenerator.cpp   | 17 --------
 query_optimizer/PhysicalGenerator.cpp    |  2 +
 query_optimizer/rules/CMakeLists.txt     | 12 ++++++
 query_optimizer/rules/SwapProbeBuild.cpp | 57 +++++++++++++++++++++++++++
 query_optimizer/rules/SwapProbeBuild.hpp | 46 +++++++++++++++++++++
 6 files changed, 118 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/372902dd/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index a56b714..c55881f 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -199,6 +199,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
                       quickstep_queryoptimizer_physical_Physical
                       quickstep_queryoptimizer_rules_PruneColumns
                       quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
+                      quickstep_queryoptimizer_rules_SwapProbeBuild
                       quickstep_queryoptimizer_strategy_Aggregate
                       quickstep_queryoptimizer_strategy_Join
                       quickstep_queryoptimizer_strategy_OneToOne

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/372902dd/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 88103df..cd6a7c6 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -671,23 +671,6 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
     key_types.push_back(&left_attribute_type);
   }
 
-  std::size_t probe_cardinality = cost_model_->estimateCardinality(probe_physical);
-  std::size_t build_cardinality = cost_model_->estimateCardinality(build_physical);
-  // For inner join, we may swap the probe table and the build table.
-  if (physical_plan->join_type() == P::HashJoin::JoinType::kInnerJoin)  {
-    // Choose the smaller table as the inner build table,
-    // and the other one as the outer probe table.
-    if (probe_cardinality < build_cardinality) {
-      // Switch the probe and build physical nodes.
-      std::swap(probe_physical, build_physical);
-      std::swap(probe_cardinality, build_cardinality);
-      std::swap(probe_attribute_ids, build_attribute_ids);
-      std::swap(any_probe_attributes_nullable, any_build_attributes_nullable);
-      std::swap(probe_original_attribute_ids, build_original_attribute_ids);
-      std::swap(referenced_stored_probe_relation, referenced_stored_build_relation);
-    }
-  }
-
   // Convert the residual predicate proto.
   QueryContext::predicate_id residual_predicate_index = QueryContext::kInvalidPredicateId;
   if (physical_plan->residual_predicate()) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/372902dd/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 75a7bc9..897b212 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -28,6 +28,7 @@
 #include "query_optimizer/physical/Physical.hpp"
 #include "query_optimizer/rules/PruneColumns.hpp"
 #include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
+#include "query_optimizer/rules/SwapProbeBuild.hpp"
 #include "query_optimizer/strategy/Aggregate.hpp"
 #include "query_optimizer/strategy/Join.hpp"
 #include "query_optimizer/strategy/OneToOne.hpp"
@@ -98,6 +99,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
     rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
   }
   rules.emplace_back(new PruneColumns());
+  rules.emplace_back(new SwapProbeBuild());
 
   for (std::unique_ptr<Rule<P::Physical>> &rule : rules) {
     physical_plan_ = rule->apply(physical_plan_);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/372902dd/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 1990174..73d7e38 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -29,6 +29,7 @@ add_library(quickstep_queryoptimizer_rules_RuleHelper RuleHelper.cpp RuleHelper.
 add_library(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
             StarSchemaHashJoinOrderOptimization.cpp
             StarSchemaHashJoinOrderOptimization.hpp)
+add_library(quickstep_queryoptimizer_rules_SwapProbeBuild SwapProbeBuild.cpp SwapProbeBuild.hpp)
 add_library(quickstep_queryoptimizer_rules_TopDownRule ../../empty_src.cpp TopDownRule.hpp)
 add_library(quickstep_queryoptimizer_rules_UpdateExpression UpdateExpression.cpp UpdateExpression.hpp)
 add_library(quickstep_queryoptimizer_rules_UnnestSubqueries UnnestSubqueries.cpp UnnestSubqueries.hpp)
@@ -127,6 +128,16 @@ target_link_libraries(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOpti
                       quickstep_queryoptimizer_physical_TopLevelPlan
                       quickstep_queryoptimizer_rules_Rule
                       quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_SwapProbeBuild
+                      quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+                      quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_queryoptimizer_physical_HashJoin
+                      quickstep_queryoptimizer_physical_PatternMatcher
+                      quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_physical_TopLevelPlan
+                      quickstep_queryoptimizer_rules_BottomUpRule
+                      quickstep_queryoptimizer_rules_Rule
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_rules_TopDownRule
                       quickstep_queryoptimizer_rules_Rule
                       quickstep_utility_Macros)
@@ -185,6 +196,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
                       quickstep_queryoptimizer_rules_Rule
                       quickstep_queryoptimizer_rules_RuleHelper
                       quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
+                      quickstep_queryoptimizer_rules_SwapProbeBuild
                       quickstep_queryoptimizer_rules_TopDownRule
                       quickstep_queryoptimizer_rules_UpdateExpression
                       quickstep_queryoptimizer_rules_UnnestSubqueries)

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/372902dd/query_optimizer/rules/SwapProbeBuild.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/SwapProbeBuild.cpp b/query_optimizer/rules/SwapProbeBuild.cpp
new file mode 100644
index 0000000..63347e9
--- /dev/null
+++ b/query_optimizer/rules/SwapProbeBuild.cpp
@@ -0,0 +1,57 @@
+#include "query_optimizer/rules/SwapProbeBuild.hpp"
+
+#include <cstddef>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+
+
+namespace quickstep {
+namespace optimizer {
+
+P::PhysicalPtr SwapProbeBuild::applyToNode(const P::PhysicalPtr &input) {
+  P::HashJoinPtr hash_join;
+
+  if (P::SomeHashJoin::MatchesWithConditionalCast(input, &hash_join)) {
+    P::PhysicalPtr left = hash_join->left();
+    P::PhysicalPtr right = hash_join->right();
+
+    P::TopLevelPlanPtr top_level;
+    if (P::SomeTopLevelPlan::MatchesWithConditionalCast(input, &top_level)) {
+      cost_model_.reset(new C::StarSchemaSimpleCostModel(top_level->shared_subplans()));
+    } else {
+      std::vector<P::PhysicalPtr> plans = {input};
+      cost_model_.reset(new C::StarSchemaSimpleCostModel(plans));
+    }
+
+    std::size_t left_cardinality = cost_model_->estimateCardinality(left);
+    std::size_t right_cardinality = cost_model_->estimateCardinality(right);
+
+    if (right_cardinality > left_cardinality) {
+      std::vector<E::AttributeReferencePtr> left_join_attributes = hash_join->left_join_attributes();
+      std::vector<E::AttributeReferencePtr> right_join_attributes = hash_join->right_join_attributes();
+
+      P::PhysicalPtr output = P::HashJoin::Create(right,
+                                                  left,
+                                                  right_join_attributes,
+                                                  left_join_attributes,
+                                                  hash_join->residual_predicate(),
+                                                  hash_join->project_expressions(),
+                                                  hash_join->join_type());
+      LOG_APPLYING_RULE(input, output);
+      return output;
+    }
+  }
+
+  LOG_IGNORING_RULE(input);
+  return input;
+}
+
+}  // namespace optimizer
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/372902dd/query_optimizer/rules/SwapProbeBuild.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/SwapProbeBuild.hpp b/query_optimizer/rules/SwapProbeBuild.hpp
new file mode 100644
index 0000000..5cc8cf9
--- /dev/null
+++ b/query_optimizer/rules/SwapProbeBuild.hpp
@@ -0,0 +1,46 @@
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_SWAP_PROBE_BUILD_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_SWAP_PROBE_BUILD_HPP_
+
+#include <string>
+
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "query_optimizer/rules/BottomUpRule.hpp"
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ *  @{
+ */
+
+namespace P = ::quickstep::optimizer::physical;
+namespace E = ::quickstep::optimizer::expressions;
+namespace C = ::quickstep::optimizer::cost;
+
+/**
+ * @brief Rule that applies to a physical plan to arrange probe and
+ *        build side based on the cardinalities.
+ */
+class SwapProbeBuild : public BottomUpRule<P::Physical> {
+ public:
+  SwapProbeBuild() {
+  }
+
+  std::string getName() const override { return "SwapProbeBuild"; }
+
+ protected:
+  P::PhysicalPtr applyToNode(const P::PhysicalPtr &input) override;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(SwapProbeBuild);
+
+  std::unique_ptr<C::StarSchemaSimpleCostModel> cost_model_;
+};
+
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif


[2/2] incubator-quickstep git commit: Added a field in Physical HashJoin to save right(build) side estimated cardinality.

Posted by ha...@apache.org.
Added a field in Physical HashJoin to save right(build) side estimated cardinality.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/a0905647
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/a0905647
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/a0905647

Branch: refs/heads/refactor-hashjoin-probe-build
Commit: a0905647b783873d8c5ff7b5f17c6fa66f357e36
Parents: 372902d
Author: Hakan Memisoglu <ha...@gmail.com>
Authored: Mon Aug 1 17:24:47 2016 -0500
Committer: Hakan Memisoglu <ha...@gmail.com>
Committed: Mon Aug 1 17:24:47 2016 -0500

----------------------------------------------------------------------
 query_optimizer/ExecutionGenerator.cpp   |  2 +-
 query_optimizer/physical/HashJoin.hpp    | 33 +++++++++++++++++++++++----
 query_optimizer/rules/SwapProbeBuild.cpp | 17 +++++++++++++-
 3 files changed, 45 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0905647/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index cd6a7c6..83c90bd 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -727,7 +727,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
         build_relation->getAttributeById(build_attribute)->getType().getProto());
   }
 
-  hash_table_proto->set_estimated_num_entries(build_cardinality);
+  hash_table_proto->set_estimated_num_entries(physical_plan->estimated_right_cardinality());
 
   // Create three operators.
   const QueryPlan::DAGNodeIndex build_operator_index =

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0905647/query_optimizer/physical/HashJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.hpp b/query_optimizer/physical/HashJoin.hpp
index b904b5f..9c2e9de 100644
--- a/query_optimizer/physical/HashJoin.hpp
+++ b/query_optimizer/physical/HashJoin.hpp
@@ -20,6 +20,7 @@
 #ifndef QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_HASHJOIN_HPP_
 #define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_HASHJOIN_HPP_
 
+#include <cstddef>
 #include <memory>
 #include <string>
 #include <type_traits>
@@ -106,6 +107,22 @@ class HashJoin : public BinaryJoin {
     return join_type_;
   }
 
+  /**
+   * @return Estimated number of tuples from the right (build)'s side.
+   */
+  std::size_t estimated_right_cardinality() const {
+    return estimated_right_cardinality_;
+  }
+
+  /**
+   * @brief Sets the build side cardinality for using as a hint in a later stage.
+   *
+   * @param estimated_right_cardinality New build side estimated cardinality to be set.
+   */
+  void set_estimated_right_cardinality(const std::size_t estimated_right_cardinality) {
+    estimated_right_cardinality_ = estimated_right_cardinality;
+  }
+
   PhysicalPtr copyWithNewChildren(
       const std::vector<PhysicalPtr> &new_children) const override {
     DCHECK_EQ(children().size(), new_children.size());
@@ -115,7 +132,8 @@ class HashJoin : public BinaryJoin {
                   right_join_attributes_,
                   residual_predicate_,
                   project_expressions(),
-                  join_type_);
+                  join_type_,
+                  estimated_right_cardinality_);
   }
 
   std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override;
@@ -144,7 +162,8 @@ class HashJoin : public BinaryJoin {
       const std::vector<expressions::AttributeReferencePtr> &right_join_attributes,
       const expressions::PredicatePtr &residual_predicate,
       const std::vector<expressions::NamedExpressionPtr> &project_expressions,
-      const JoinType join_type) {
+      const JoinType join_type,
+      const std::size_t estimated_right_cardinality = 0u) {
     return HashJoinPtr(
         new HashJoin(left,
                      right,
@@ -152,7 +171,8 @@ class HashJoin : public BinaryJoin {
                      right_join_attributes,
                      residual_predicate,
                      project_expressions,
-                     join_type));
+                     join_type,
+                     estimated_right_cardinality));
   }
 
  protected:
@@ -172,18 +192,21 @@ class HashJoin : public BinaryJoin {
       const std::vector<expressions::AttributeReferencePtr> &right_join_attributes,
       const expressions::PredicatePtr &residual_predicate,
       const std::vector<expressions::NamedExpressionPtr> &project_expressions,
-      const JoinType join_type)
+      const JoinType join_type,
+      const std::size_t estimated_right_cardinality)
       : BinaryJoin(left, right, project_expressions),
         left_join_attributes_(left_join_attributes),
         right_join_attributes_(right_join_attributes),
         residual_predicate_(residual_predicate),
-        join_type_(join_type) {
+        join_type_(join_type),
+        estimated_right_cardinality_(estimated_right_cardinality) {
   }
 
   std::vector<expressions::AttributeReferencePtr> left_join_attributes_;
   std::vector<expressions::AttributeReferencePtr> right_join_attributes_;
   expressions::PredicatePtr residual_predicate_;
   JoinType join_type_;
+  std::size_t estimated_right_cardinality_;
 
   DISALLOW_COPY_AND_ASSIGN(HashJoin);
 };

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0905647/query_optimizer/rules/SwapProbeBuild.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/SwapProbeBuild.cpp b/query_optimizer/rules/SwapProbeBuild.cpp
index 63347e9..a24979f 100644
--- a/query_optimizer/rules/SwapProbeBuild.cpp
+++ b/query_optimizer/rules/SwapProbeBuild.cpp
@@ -43,10 +43,25 @@ P::PhysicalPtr SwapProbeBuild::applyToNode(const P::PhysicalPtr &input) {
                                                   left_join_attributes,
                                                   hash_join->residual_predicate(),
                                                   hash_join->project_expressions(),
-                                                  hash_join->join_type());
+                                                  hash_join->join_type(),
+                                                  left_cardinality);
       LOG_APPLYING_RULE(input, output);
       return output;
     }
+    else {
+      P::PhysicalPtr output = P::HashJoin::Create(left,
+                                                  right,
+                                                  hash_join->left_join_attributes(),
+                                                  hash_join->right_join_attributes(),
+                                                  hash_join->residual_predicate(),
+                                                  hash_join->project_expressions(),
+                                                  hash_join->join_type(),
+                                                  right_cardinality);
+      // Since we did not apply the swap logic, we will not report it to the log.
+      // However we also did not ignored the rule completely, therefore we will not
+      // log that we ignored the rule.
+      return output;
+    }
   }
 
   LOG_IGNORING_RULE(input);