You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/08/11 20:27:56 UTC

[16/16] incubator-quickstep git commit: Initial work on better estimation

Initial work on better estimation


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/39f57499
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/39f57499
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/39f57499

Branch: refs/heads/LIP-for-tpch
Commit: 39f574999a280e0fbcb1792db84f584deca20675
Parents: 9b90665
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Thu Aug 11 15:20:53 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Thu Aug 11 15:23:00 2016 -0500

----------------------------------------------------------------------
 cli/QuickstepCli.cpp                            |  8 ++---
 .../cost_model/StarSchemaSimpleCostModel.cpp    | 36 ++++++++++++++++++++
 .../cost_model/StarSchemaSimpleCostModel.hpp    | 10 ++++++
 utility/PlanVisualizer.cpp                      | 25 ++++++++++++--
 4 files changed, 73 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/39f57499/cli/QuickstepCli.cpp
----------------------------------------------------------------------
diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp
index bf0e2c7..e7ffc0c 100644
--- a/cli/QuickstepCli.cpp
+++ b/cli/QuickstepCli.cpp
@@ -347,10 +347,10 @@ int main(int argc, char* argv[]) {
 
 //  addPrimaryKeyInfoForTPCHTables(query_processor->getDefaultDatabase());
 //  addPrimaryKeyInfoForSSBTables(query_processor->getDefaultDatabase());
-  std::string proto_str;
-  google::protobuf::TextFormat::PrintToString(
-      query_processor->getDefaultDatabase()->getProto(), &proto_str);
-  std::cerr << proto_str << "\n";
+//  std::string proto_str;
+//  google::protobuf::TextFormat::PrintToString(
+//      query_processor->getDefaultDatabase()->getProto(), &proto_str);
+//  std::cerr << proto_str << "\n";
 //  query_processor->markCatalogAltered();
 //  query_processor->saveCatalog();
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/39f57499/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index 9eea27c..ba7a3c6 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -25,6 +25,8 @@
 #include <vector>
 
 #include "catalog/CatalogRelation.hpp"
+#include "catalog/CatalogRelationStatistics.hpp"
+#include "catalog/CatalogTypedefs.hpp"
 #include "query_optimizer/expressions/AttributeReference.hpp"
 #include "query_optimizer/expressions/ComparisonExpression.hpp"
 #include "query_optimizer/expressions/ExprId.hpp"
@@ -283,6 +285,40 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
   return 1.0;
 }
 
+
+void StarSchemaSimpleCostModel::getStatistics(
+    const physical::PhysicalPtr &physical_plan,
+    const expressions::AttributeReferencePtr &attribute,
+    const CatalogRelationStatistics** stat,
+    attribute_id* attr_id) {
+  switch (physical_plan->getPhysicalType()) {
+    case P::PhysicalType::kTableReference: {
+      const P::TableReferencePtr table_reference =
+          std::static_pointer_cast<const P::TableReference>(physical_plan);
+      const CatalogRelation *catalog_relation = table_reference->relation();
+      const std::vector<E::AttributeReferencePtr> &attributes =
+          table_reference->attribute_list();
+      for (std::size_t i = 0; i < attributes.size(); ++i) {
+        if (attributes[i]->id() == attribute->id()) {
+          *stat = &catalog_relation->getStatistics();
+          *attr_id = i;
+        }
+      }
+    }
+    default:
+      break;
+  }
+
+  for (const auto &child : physical_plan->children()) {
+    for (const auto &attr : child->getOutputAttributes()) {
+      if (attr->id() == attribute->id()) {
+        getStatistics(child, attribute, stat, attr_id);
+      }
+    }
+  }
+}
+
+
 }  // namespace cost
 }  // namespace optimizer
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/39f57499/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
index 4314b92..fdd2b36 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
@@ -24,7 +24,9 @@
 #include <unordered_map>
 #include <vector>
 
+#include "catalog/CatalogTypedefs.hpp"
 #include "query_optimizer/cost_model/CostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
 #include "query_optimizer/expressions/ExprId.hpp"
 #include "query_optimizer/expressions/Predicate.hpp"
 #include "query_optimizer/physical/Aggregate.hpp"
@@ -39,6 +41,9 @@
 #include "utility/Macros.hpp"
 
 namespace quickstep {
+
+class CatalogRelationStatistics;
+
 namespace optimizer {
 namespace cost {
 
@@ -75,6 +80,11 @@ class StarSchemaSimpleCostModel : public CostModel {
    */
   double estimateSelectivity(const physical::PhysicalPtr &physical_plan);
 
+  void getStatistics(const physical::PhysicalPtr &physical_plan,
+                     const expressions::AttributeReferencePtr &attribute,
+                     const CatalogRelationStatistics** stat,
+                     attribute_id* attr_id);
+
  private:
   std::size_t estimateCardinalityForTopLevelPlan(
       const physical::TopLevelPlanPtr &physical_plan);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/39f57499/utility/PlanVisualizer.cpp
----------------------------------------------------------------------
diff --git a/utility/PlanVisualizer.cpp b/utility/PlanVisualizer.cpp
index 9af00b4..2039a69 100644
--- a/utility/PlanVisualizer.cpp
+++ b/utility/PlanVisualizer.cpp
@@ -28,7 +28,8 @@
 #include <vector>
 
 #include "catalog/CatalogRelation.hpp"
-
+#include "catalog/CatalogRelationStatistics.hpp"
+#include "catalog/CatalogTypedefs.hpp"
 #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
 #include "query_optimizer/expressions/AttributeReference.hpp"
 #include "query_optimizer/physical/Aggregate.hpp"
@@ -131,7 +132,27 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
 
     for (const auto &attr : child->getOutputAttributes()) {
       if (referenced_ids.find(attr->id()) != referenced_ids.end()) {
-        edge_info.labels.emplace_back(attr->attribute_alias());
+        std::string attr_info = attr->attribute_alias();
+        const CatalogRelationStatistics *stat = nullptr;
+        attribute_id attr_id = 0;
+        cost_model_->getStatistics(child, attr, &stat, &attr_id);
+        if (stat != nullptr) {
+          if (stat->hasNumDistinctValues(attr_id)) {
+            attr_info.append(", # distinct = ");
+            attr_info.append(std::to_string(static_cast<std::size_t>(
+                stat->getNumDistinctValues(attr_id) * cost_model_->estimateSelectivity(child))));
+          }
+          const Type& attr_type = attr->getValueType();
+          if (stat->hasMinValue(attr_id)) {
+            attr_info.append(", min = ");
+            attr_info.append(attr_type.printValueToString(stat->getMinValue(attr_id)));
+          }
+          if (stat->hasMaxValue(attr_id)) {
+            attr_info.append(", max = ");
+            attr_info.append(attr_type.printValueToString(stat->getMaxValue(attr_id)));
+          }
+        }
+        edge_info.labels.emplace_back(attr_info);
       }
     }
   }