You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ha...@apache.org on 2016/08/01 22:25:22 UTC
[1/2] incubator-quickstep git commit: Implemented hashjoin
optimization class and removed the logic from ExecutionGenerator.
Repository: incubator-quickstep
Updated Branches:
refs/heads/refactor-hashjoin-probe-build [created] a0905647b
Implemented hashjoin optimization class and removed the logic from ExecutionGenerator.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/372902dd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/372902dd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/372902dd
Branch: refs/heads/refactor-hashjoin-probe-build
Commit: 372902dd706f9e328dde3709532b5fbf111fdf24
Parents: 260b862
Author: Hakan Memisoglu <ha...@gmail.com>
Authored: Mon Aug 1 16:39:07 2016 -0500
Committer: Hakan Memisoglu <ha...@gmail.com>
Committed: Mon Aug 1 16:39:07 2016 -0500
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 1 +
query_optimizer/ExecutionGenerator.cpp | 17 --------
query_optimizer/PhysicalGenerator.cpp | 2 +
query_optimizer/rules/CMakeLists.txt | 12 ++++++
query_optimizer/rules/SwapProbeBuild.cpp | 57 +++++++++++++++++++++++++++
query_optimizer/rules/SwapProbeBuild.hpp | 46 +++++++++++++++++++++
6 files changed, 118 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/372902dd/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index a56b714..c55881f 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -199,6 +199,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
+ quickstep_queryoptimizer_rules_SwapProbeBuild
quickstep_queryoptimizer_strategy_Aggregate
quickstep_queryoptimizer_strategy_Join
quickstep_queryoptimizer_strategy_OneToOne
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/372902dd/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 88103df..cd6a7c6 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -671,23 +671,6 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
key_types.push_back(&left_attribute_type);
}
- std::size_t probe_cardinality = cost_model_->estimateCardinality(probe_physical);
- std::size_t build_cardinality = cost_model_->estimateCardinality(build_physical);
- // For inner join, we may swap the probe table and the build table.
- if (physical_plan->join_type() == P::HashJoin::JoinType::kInnerJoin) {
- // Choose the smaller table as the inner build table,
- // and the other one as the outer probe table.
- if (probe_cardinality < build_cardinality) {
- // Switch the probe and build physical nodes.
- std::swap(probe_physical, build_physical);
- std::swap(probe_cardinality, build_cardinality);
- std::swap(probe_attribute_ids, build_attribute_ids);
- std::swap(any_probe_attributes_nullable, any_build_attributes_nullable);
- std::swap(probe_original_attribute_ids, build_original_attribute_ids);
- std::swap(referenced_stored_probe_relation, referenced_stored_build_relation);
- }
- }
-
// Convert the residual predicate proto.
QueryContext::predicate_id residual_predicate_index = QueryContext::kInvalidPredicateId;
if (physical_plan->residual_predicate()) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/372902dd/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 75a7bc9..897b212 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -28,6 +28,7 @@
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
#include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
+#include "query_optimizer/rules/SwapProbeBuild.hpp"
#include "query_optimizer/strategy/Aggregate.hpp"
#include "query_optimizer/strategy/Join.hpp"
#include "query_optimizer/strategy/OneToOne.hpp"
@@ -98,6 +99,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
}
rules.emplace_back(new PruneColumns());
+ rules.emplace_back(new SwapProbeBuild());
for (std::unique_ptr<Rule<P::Physical>> &rule : rules) {
physical_plan_ = rule->apply(physical_plan_);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/372902dd/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 1990174..73d7e38 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -29,6 +29,7 @@ add_library(quickstep_queryoptimizer_rules_RuleHelper RuleHelper.cpp RuleHelper.
add_library(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
StarSchemaHashJoinOrderOptimization.cpp
StarSchemaHashJoinOrderOptimization.hpp)
+add_library(quickstep_queryoptimizer_rules_SwapProbeBuild SwapProbeBuild.cpp SwapProbeBuild.hpp)
add_library(quickstep_queryoptimizer_rules_TopDownRule ../../empty_src.cpp TopDownRule.hpp)
add_library(quickstep_queryoptimizer_rules_UpdateExpression UpdateExpression.cpp UpdateExpression.hpp)
add_library(quickstep_queryoptimizer_rules_UnnestSubqueries UnnestSubqueries.cpp UnnestSubqueries.hpp)
@@ -127,6 +128,16 @@ target_link_libraries(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOpti
quickstep_queryoptimizer_physical_TopLevelPlan
quickstep_queryoptimizer_rules_Rule
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_SwapProbeBuild
+ quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_TopLevelPlan
+ quickstep_queryoptimizer_rules_BottomUpRule
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_TopDownRule
quickstep_queryoptimizer_rules_Rule
quickstep_utility_Macros)
@@ -185,6 +196,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
quickstep_queryoptimizer_rules_Rule
quickstep_queryoptimizer_rules_RuleHelper
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
+ quickstep_queryoptimizer_rules_SwapProbeBuild
quickstep_queryoptimizer_rules_TopDownRule
quickstep_queryoptimizer_rules_UpdateExpression
quickstep_queryoptimizer_rules_UnnestSubqueries)
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/372902dd/query_optimizer/rules/SwapProbeBuild.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/SwapProbeBuild.cpp b/query_optimizer/rules/SwapProbeBuild.cpp
new file mode 100644
index 0000000..63347e9
--- /dev/null
+++ b/query_optimizer/rules/SwapProbeBuild.cpp
@@ -0,0 +1,57 @@
+#include "query_optimizer/rules/SwapProbeBuild.hpp"
+
+#include <cstddef>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+
+
+namespace quickstep {
+namespace optimizer {
+
+P::PhysicalPtr SwapProbeBuild::applyToNode(const P::PhysicalPtr &input) {
+ P::HashJoinPtr hash_join;
+
+ if (P::SomeHashJoin::MatchesWithConditionalCast(input, &hash_join)) {
+ P::PhysicalPtr left = hash_join->left();
+ P::PhysicalPtr right = hash_join->right();
+
+ P::TopLevelPlanPtr top_level;
+ if (P::SomeTopLevelPlan::MatchesWithConditionalCast(input, &top_level)) {
+ cost_model_.reset(new C::StarSchemaSimpleCostModel(top_level->shared_subplans()));
+ } else {
+ std::vector<P::PhysicalPtr> plans = {input};
+ cost_model_.reset(new C::StarSchemaSimpleCostModel(plans));
+ }
+
+ std::size_t left_cardinality = cost_model_->estimateCardinality(left);
+ std::size_t right_cardinality = cost_model_->estimateCardinality(right);
+
+ if (right_cardinality > left_cardinality) {
+ std::vector<E::AttributeReferencePtr> left_join_attributes = hash_join->left_join_attributes();
+ std::vector<E::AttributeReferencePtr> right_join_attributes = hash_join->right_join_attributes();
+
+ P::PhysicalPtr output = P::HashJoin::Create(right,
+ left,
+ right_join_attributes,
+ left_join_attributes,
+ hash_join->residual_predicate(),
+ hash_join->project_expressions(),
+ hash_join->join_type());
+ LOG_APPLYING_RULE(input, output);
+ return output;
+ }
+ }
+
+ LOG_IGNORING_RULE(input);
+ return input;
+}
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/372902dd/query_optimizer/rules/SwapProbeBuild.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/SwapProbeBuild.hpp b/query_optimizer/rules/SwapProbeBuild.hpp
new file mode 100644
index 0000000..5cc8cf9
--- /dev/null
+++ b/query_optimizer/rules/SwapProbeBuild.hpp
@@ -0,0 +1,46 @@
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_SWAP_PROBE_BUILD_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_SWAP_PROBE_BUILD_HPP_
+
+#include <string>
+
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "query_optimizer/rules/BottomUpRule.hpp"
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+namespace P = ::quickstep::optimizer::physical;
+namespace E = ::quickstep::optimizer::expressions;
+namespace C = ::quickstep::optimizer::cost;
+
+/**
+ * @brief Rule that applies to a physical plan to arrange probe and
+ * build side based on the cardinalities.
+ */
+class SwapProbeBuild : public BottomUpRule<P::Physical> {
+ public:
+ SwapProbeBuild() {
+ }
+
+ std::string getName() const override { return "SwapProbeBuild"; }
+
+ protected:
+ P::PhysicalPtr applyToNode(const P::PhysicalPtr &input) override;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SwapProbeBuild);
+
+ std::unique_ptr<C::StarSchemaSimpleCostModel> cost_model_;
+};
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif
[2/2] incubator-quickstep git commit: Added a field in Physical
HashJoin to save right(build) side estimated cardinality.
Posted by ha...@apache.org.
Added a field in Physical HashJoin to save right(build) side estimated cardinality.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/a0905647
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/a0905647
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/a0905647
Branch: refs/heads/refactor-hashjoin-probe-build
Commit: a0905647b783873d8c5ff7b5f17c6fa66f357e36
Parents: 372902d
Author: Hakan Memisoglu <ha...@gmail.com>
Authored: Mon Aug 1 17:24:47 2016 -0500
Committer: Hakan Memisoglu <ha...@gmail.com>
Committed: Mon Aug 1 17:24:47 2016 -0500
----------------------------------------------------------------------
query_optimizer/ExecutionGenerator.cpp | 2 +-
query_optimizer/physical/HashJoin.hpp | 33 +++++++++++++++++++++++----
query_optimizer/rules/SwapProbeBuild.cpp | 17 +++++++++++++-
3 files changed, 45 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0905647/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index cd6a7c6..83c90bd 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -727,7 +727,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
build_relation->getAttributeById(build_attribute)->getType().getProto());
}
- hash_table_proto->set_estimated_num_entries(build_cardinality);
+ hash_table_proto->set_estimated_num_entries(physical_plan->estimated_right_cardinality());
// Create three operators.
const QueryPlan::DAGNodeIndex build_operator_index =
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0905647/query_optimizer/physical/HashJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.hpp b/query_optimizer/physical/HashJoin.hpp
index b904b5f..9c2e9de 100644
--- a/query_optimizer/physical/HashJoin.hpp
+++ b/query_optimizer/physical/HashJoin.hpp
@@ -20,6 +20,7 @@
#ifndef QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_HASHJOIN_HPP_
#define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_HASHJOIN_HPP_
+#include <cstddef>
#include <memory>
#include <string>
#include <type_traits>
@@ -106,6 +107,22 @@ class HashJoin : public BinaryJoin {
return join_type_;
}
+ /**
+ * @return Estimated number of tuples from the right (build)'s side.
+ */
+ std::size_t estimated_right_cardinality() const {
+ return estimated_right_cardinality_;
+ }
+
+ /**
+ * @brief Sets the build side cardinality for using as a hint in a later stage.
+ *
+ * @param estimated_right_cardinality New build side estimated cardinality to be set.
+ */
+ void set_estimated_right_cardinality(const std::size_t estimated_right_cardinality) {
+ estimated_right_cardinality_ = estimated_right_cardinality;
+ }
+
PhysicalPtr copyWithNewChildren(
const std::vector<PhysicalPtr> &new_children) const override {
DCHECK_EQ(children().size(), new_children.size());
@@ -115,7 +132,8 @@ class HashJoin : public BinaryJoin {
right_join_attributes_,
residual_predicate_,
project_expressions(),
- join_type_);
+ join_type_,
+ estimated_right_cardinality_);
}
std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override;
@@ -144,7 +162,8 @@ class HashJoin : public BinaryJoin {
const std::vector<expressions::AttributeReferencePtr> &right_join_attributes,
const expressions::PredicatePtr &residual_predicate,
const std::vector<expressions::NamedExpressionPtr> &project_expressions,
- const JoinType join_type) {
+ const JoinType join_type,
+ const std::size_t estimated_right_cardinality = 0u) {
return HashJoinPtr(
new HashJoin(left,
right,
@@ -152,7 +171,8 @@ class HashJoin : public BinaryJoin {
right_join_attributes,
residual_predicate,
project_expressions,
- join_type));
+ join_type,
+ estimated_right_cardinality));
}
protected:
@@ -172,18 +192,21 @@ class HashJoin : public BinaryJoin {
const std::vector<expressions::AttributeReferencePtr> &right_join_attributes,
const expressions::PredicatePtr &residual_predicate,
const std::vector<expressions::NamedExpressionPtr> &project_expressions,
- const JoinType join_type)
+ const JoinType join_type,
+ const std::size_t estimated_right_cardinality)
: BinaryJoin(left, right, project_expressions),
left_join_attributes_(left_join_attributes),
right_join_attributes_(right_join_attributes),
residual_predicate_(residual_predicate),
- join_type_(join_type) {
+ join_type_(join_type),
+ estimated_right_cardinality_(estimated_right_cardinality) {
}
std::vector<expressions::AttributeReferencePtr> left_join_attributes_;
std::vector<expressions::AttributeReferencePtr> right_join_attributes_;
expressions::PredicatePtr residual_predicate_;
JoinType join_type_;
+ std::size_t estimated_right_cardinality_;
DISALLOW_COPY_AND_ASSIGN(HashJoin);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0905647/query_optimizer/rules/SwapProbeBuild.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/SwapProbeBuild.cpp b/query_optimizer/rules/SwapProbeBuild.cpp
index 63347e9..a24979f 100644
--- a/query_optimizer/rules/SwapProbeBuild.cpp
+++ b/query_optimizer/rules/SwapProbeBuild.cpp
@@ -43,10 +43,25 @@ P::PhysicalPtr SwapProbeBuild::applyToNode(const P::PhysicalPtr &input) {
left_join_attributes,
hash_join->residual_predicate(),
hash_join->project_expressions(),
- hash_join->join_type());
+ hash_join->join_type(),
+ left_cardinality);
LOG_APPLYING_RULE(input, output);
return output;
}
+ else {
+ P::PhysicalPtr output = P::HashJoin::Create(left,
+ right,
+ hash_join->left_join_attributes(),
+ hash_join->right_join_attributes(),
+ hash_join->residual_predicate(),
+ hash_join->project_expressions(),
+ hash_join->join_type(),
+ right_cardinality);
+ // Since we did not apply the swap logic, we will not report it to the log.
+ // However we also did not ignored the rule completely, therefore we will not
+ // log that we ignored the rule.
+ return output;
+ }
}
LOG_IGNORING_RULE(input);