You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/07/07 18:57:57 UTC
[7/8] incubator-quickstep git commit: updates
updates
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/a3ba59f9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/a3ba59f9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/a3ba59f9
Branch: refs/heads/adaptive-bloom-filters
Commit: a3ba59f9a5bd02620ed3e9101899c2c885343d57
Parents: 0e685e0
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Tue Jul 5 15:50:45 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Thu Jul 7 12:54:04 2016 -0500
----------------------------------------------------------------------
query_optimizer/PhysicalGenerator.cpp | 1 +
.../StarSchemaHashJoinOrderOptimization.cpp | 29 ++++++++++++-
.../StarSchemaHashJoinOrderOptimization.hpp | 43 +++++++++++++++-----
3 files changed, 61 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a3ba59f9/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index ee133b5..5f97d6f 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -95,6 +95,7 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan(
P::PhysicalPtr PhysicalGenerator::optimizePlan() {
std::vector<std::unique_ptr<Rule<P::Physical>>> rules;
if (FLAGS_reorder_hash_joins) {
+ rules.emplace_back(new PruneColumns());
rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
}
rules.emplace_back(new PruneColumns());
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a3ba59f9/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
index c9bd7d2..2e0b6c6 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
@@ -73,6 +73,9 @@ P::PhysicalPtr StarSchemaHashJoinOrderOptimization::applyInternal(const P::Physi
JoinGroupInfo *join_group = nullptr;
if (parent_join_group == nullptr || !is_valid_cascading_hash_join) {
new_join_group.reset(new JoinGroupInfo());
+ for (const auto &attr : input->getReferencedAttributes()) {
+ new_join_group->referenced_attributes.emplace(attr->id());
+ }
join_group = new_join_group.get();
} else {
join_group = parent_join_group;
@@ -145,7 +148,9 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
i,
tables[i],
cost_model_->estimateCardinality(tables[i]),
- cost_model_->estimateSelectivity(tables[i]));
+ cost_model_->estimateSelectivity(tables[i]),
+ CountSharedAttributes(join_group.referenced_attributes,
+ tables[i]->getOutputAttributes()));
}
// Auxiliary mapping info.
@@ -237,6 +242,11 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
CHECK(selected_probe_table_info != nullptr);
CHECK(selected_build_table_info != nullptr);
+ std::cerr << selected_probe_table_info->estimated_num_output_attributes
+ << " -- "
+ << selected_build_table_info->estimated_num_output_attributes
+ << "\n";
+
remaining_tables.erase(selected_probe_table_info);
remaining_tables.erase(selected_build_table_info);
@@ -283,6 +293,10 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
selected_probe_table_info->estimated_cardinality = cost_model_->estimateCardinality(output);
selected_probe_table_info->estimated_selectivity = cost_model_->estimateSelectivity(output);
+ selected_probe_table_info->estimated_num_output_attributes =
+ CountSharedAttributes(join_group.referenced_attributes,
+ output->getOutputAttributes());
+
remaining_tables.emplace(selected_probe_table_info);
// Update join attribute groups.
@@ -307,5 +321,18 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
}
}
+std::size_t StarSchemaHashJoinOrderOptimization::CountSharedAttributes(
+ const std::unordered_set<expressions::ExprId> &attr_set1,
+ const std::vector<expressions::AttributeReferencePtr> &attr_set2) {
+ std::size_t cnt = 0;
+ for (const auto &attr : attr_set2) {
+ if (attr_set1.find(attr->id()) != attr_set1.end()) {
+ ++cnt;
+ }
+ }
+ return cnt;
+}
+
+
} // namespace optimizer
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a3ba59f9/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
index 9ff89fd..6ad300c 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
@@ -62,6 +62,7 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
* @brief A group of tables to form a hash join tree.
*/
struct JoinGroupInfo {
+ std::unordered_set<expressions::ExprId> referenced_attributes;
std::vector<physical::PhysicalPtr> tables;
std::vector<std::pair<expressions::ExprId, expressions::ExprId>> join_attribute_pairs;
};
@@ -70,20 +71,23 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
* @brief Auxiliary information of a table for the optimizer.
*/
struct TableInfo {
- TableInfo(const std::size_t in_table_info_id,
- const physical::PhysicalPtr &in_table,
- const std::size_t in_estimated_cardinality,
- const double in_estimated_selectivity)
- : table_info_id(in_table_info_id),
- table(in_table),
- estimated_cardinality(in_estimated_cardinality),
- estimated_selectivity(in_estimated_selectivity) {
+ TableInfo(const std::size_t table_info_id_in,
+ const physical::PhysicalPtr &table_in,
+ const std::size_t estimated_cardinality_in,
+ const double estimated_selectivity_in,
+ const std::size_t estimated_num_output_attributes_in)
+ : table_info_id(table_info_id_in),
+ table(table_in),
+ estimated_cardinality(estimated_cardinality_in),
+ estimated_selectivity(estimated_selectivity_in),
+ estimated_num_output_attributes(estimated_num_output_attributes_in) {
}
const std::size_t table_info_id;
physical::PhysicalPtr table;
std::size_t estimated_cardinality;
double estimated_selectivity;
+ std::size_t estimated_num_output_attributes;
};
struct JoinPair {
@@ -91,13 +95,26 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
: probe(probe_in), build(build_in) {
}
- inline bool isBetterThan (const JoinPair &rhs) const {
+ inline bool isBetterThan(const JoinPair &rhs) const {
const auto &lhs = *this;
- const bool lhs_has_small_build = lhs.build->estimated_cardinality < 0x1000;
- const bool rhs_has_small_build = rhs.build->estimated_cardinality < 0x1000;
+ const bool lhs_has_large_output =
+ lhs.build->estimated_num_output_attributes
+ + lhs.probe->estimated_num_output_attributes > 5;
+ const bool rhs_has_large_output =
+ rhs.build->estimated_num_output_attributes
+ + rhs.probe->estimated_num_output_attributes > 5;
+ if (lhs_has_large_output != rhs_has_large_output) {
+ return rhs_has_large_output;
+ }
+
+ const bool lhs_has_small_build =
+ !lhs_has_large_output && lhs.build->estimated_cardinality < 0x1000;
+ const bool rhs_has_small_build =
+ !rhs_has_large_output && rhs.build->estimated_cardinality < 0x1000;
if (lhs_has_small_build != rhs_has_small_build) {
return lhs_has_small_build;
}
+
if (lhs.probe->estimated_cardinality != rhs.probe->estimated_cardinality) {
return lhs.probe->estimated_cardinality < rhs.probe->estimated_cardinality;
}
@@ -126,6 +143,10 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
const expressions::PredicatePtr &residual_predicate,
const std::vector<expressions::NamedExpressionPtr> &project_expressions);
+ static std::size_t CountSharedAttributes(
+ const std::unordered_set<expressions::ExprId> &attr_set1,
+ const std::vector<expressions::AttributeReferencePtr> &attr_set2);
+
std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
DISALLOW_COPY_AND_ASSIGN(StarSchemaHashJoinOrderOptimization);