You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/07/07 18:57:57 UTC

[7/8] incubator-quickstep git commit: updates

updates


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/a3ba59f9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/a3ba59f9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/a3ba59f9

Branch: refs/heads/adaptive-bloom-filters
Commit: a3ba59f9a5bd02620ed3e9101899c2c885343d57
Parents: 0e685e0
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Tue Jul 5 15:50:45 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Thu Jul 7 12:54:04 2016 -0500

----------------------------------------------------------------------
 query_optimizer/PhysicalGenerator.cpp           |  1 +
 .../StarSchemaHashJoinOrderOptimization.cpp     | 29 ++++++++++++-
 .../StarSchemaHashJoinOrderOptimization.hpp     | 43 +++++++++++++++-----
 3 files changed, 61 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a3ba59f9/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index ee133b5..5f97d6f 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -95,6 +95,7 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan(
 P::PhysicalPtr PhysicalGenerator::optimizePlan() {
   std::vector<std::unique_ptr<Rule<P::Physical>>> rules;
   if (FLAGS_reorder_hash_joins) {
+    rules.emplace_back(new PruneColumns());
     rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
   }
   rules.emplace_back(new PruneColumns());

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a3ba59f9/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
index c9bd7d2..2e0b6c6 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
@@ -73,6 +73,9 @@ P::PhysicalPtr StarSchemaHashJoinOrderOptimization::applyInternal(const P::Physi
     JoinGroupInfo *join_group = nullptr;
     if (parent_join_group == nullptr || !is_valid_cascading_hash_join) {
       new_join_group.reset(new JoinGroupInfo());
+      for (const auto &attr : input->getReferencedAttributes()) {
+        new_join_group->referenced_attributes.emplace(attr->id());
+      }
       join_group = new_join_group.get();
     } else {
       join_group = parent_join_group;
@@ -145,7 +148,9 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
         i,
         tables[i],
         cost_model_->estimateCardinality(tables[i]),
-        cost_model_->estimateSelectivity(tables[i]));
+        cost_model_->estimateSelectivity(tables[i]),
+        CountSharedAttributes(join_group.referenced_attributes,
+                              tables[i]->getOutputAttributes()));
   }
 
   // Auxiliary mapping info.
@@ -237,6 +242,11 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
     CHECK(selected_probe_table_info != nullptr);
     CHECK(selected_build_table_info != nullptr);
 
+    std::cerr << selected_probe_table_info->estimated_num_output_attributes
+              << " -- "
+              << selected_build_table_info->estimated_num_output_attributes
+              << "\n";
+
     remaining_tables.erase(selected_probe_table_info);
     remaining_tables.erase(selected_build_table_info);
 
@@ -283,6 +293,10 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
       selected_probe_table_info->estimated_cardinality = cost_model_->estimateCardinality(output);
       selected_probe_table_info->estimated_selectivity = cost_model_->estimateSelectivity(output);
 
+      selected_probe_table_info->estimated_num_output_attributes =
+          CountSharedAttributes(join_group.referenced_attributes,
+                                output->getOutputAttributes());
+
       remaining_tables.emplace(selected_probe_table_info);
 
       // Update join attribute groups.
@@ -307,5 +321,18 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
   }
 }
 
+std::size_t StarSchemaHashJoinOrderOptimization::CountSharedAttributes(
+    const std::unordered_set<expressions::ExprId> &attr_set1,
+    const std::vector<expressions::AttributeReferencePtr> &attr_set2) {
+  std::size_t cnt = 0;
+  for (const auto &attr : attr_set2) {
+    if (attr_set1.find(attr->id()) != attr_set1.end()) {
+      ++cnt;
+    }
+  }
+  return cnt;
+}
+
+
 }  // namespace optimizer
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a3ba59f9/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
index 9ff89fd..6ad300c 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
@@ -62,6 +62,7 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
    * @brief A group of tables to form a hash join tree.
    */
   struct JoinGroupInfo {
+    std::unordered_set<expressions::ExprId> referenced_attributes;
     std::vector<physical::PhysicalPtr> tables;
     std::vector<std::pair<expressions::ExprId, expressions::ExprId>> join_attribute_pairs;
   };
@@ -70,20 +71,23 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
    * @brief Auxiliary information of a table for the optimizer.
    */
   struct TableInfo {
-    TableInfo(const std::size_t in_table_info_id,
-              const physical::PhysicalPtr &in_table,
-              const std::size_t in_estimated_cardinality,
-              const double in_estimated_selectivity)
-        : table_info_id(in_table_info_id),
-          table(in_table),
-          estimated_cardinality(in_estimated_cardinality),
-          estimated_selectivity(in_estimated_selectivity) {
+    TableInfo(const std::size_t table_info_id_in,
+              const physical::PhysicalPtr &table_in,
+              const std::size_t estimated_cardinality_in,
+              const double estimated_selectivity_in,
+              const std::size_t estimated_num_output_attributes_in)
+        : table_info_id(table_info_id_in),
+          table(table_in),
+          estimated_cardinality(estimated_cardinality_in),
+          estimated_selectivity(estimated_selectivity_in),
+          estimated_num_output_attributes(estimated_num_output_attributes_in) {
     }
 
     const std::size_t table_info_id;
     physical::PhysicalPtr table;
     std::size_t estimated_cardinality;
     double estimated_selectivity;
+    std::size_t estimated_num_output_attributes;
   };
 
   struct JoinPair {
@@ -91,13 +95,26 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
         : probe(probe_in), build(build_in) {
     }
 
-    inline bool isBetterThan (const JoinPair &rhs) const {
+    inline bool isBetterThan(const JoinPair &rhs) const {
       const auto &lhs = *this;
-      const bool lhs_has_small_build = lhs.build->estimated_cardinality < 0x1000;
-      const bool rhs_has_small_build = rhs.build->estimated_cardinality < 0x1000;
+      const bool lhs_has_large_output =
+          lhs.build->estimated_num_output_attributes
+              + lhs.probe->estimated_num_output_attributes > 5;
+      const bool rhs_has_large_output =
+          rhs.build->estimated_num_output_attributes
+              + rhs.probe->estimated_num_output_attributes > 5;
+      if (lhs_has_large_output != rhs_has_large_output) {
+        return rhs_has_large_output;
+      }
+
+      const bool lhs_has_small_build =
+          !lhs_has_large_output && lhs.build->estimated_cardinality < 0x1000;
+      const bool rhs_has_small_build =
+          !rhs_has_large_output && rhs.build->estimated_cardinality < 0x1000;
       if (lhs_has_small_build != rhs_has_small_build) {
         return lhs_has_small_build;
       }
+
       if (lhs.probe->estimated_cardinality != rhs.probe->estimated_cardinality) {
         return lhs.probe->estimated_cardinality < rhs.probe->estimated_cardinality;
       }
@@ -126,6 +143,10 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
       const expressions::PredicatePtr &residual_predicate,
       const std::vector<expressions::NamedExpressionPtr> &project_expressions);
 
+  static std::size_t CountSharedAttributes(
+      const std::unordered_set<expressions::ExprId> &attr_set1,
+      const std::vector<expressions::AttributeReferencePtr> &attr_set2);
+
   std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
 
   DISALLOW_COPY_AND_ASSIGN(StarSchemaHashJoinOrderOptimization);