You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/08/11 20:27:56 UTC
[16/16] incubator-quickstep git commit: Initial work on better
estimation
Initial work on better estimation
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/39f57499
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/39f57499
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/39f57499
Branch: refs/heads/LIP-for-tpch
Commit: 39f574999a280e0fbcb1792db84f584deca20675
Parents: 9b90665
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Thu Aug 11 15:20:53 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Thu Aug 11 15:23:00 2016 -0500
----------------------------------------------------------------------
cli/QuickstepCli.cpp | 8 ++---
.../cost_model/StarSchemaSimpleCostModel.cpp | 36 ++++++++++++++++++++
.../cost_model/StarSchemaSimpleCostModel.hpp | 10 ++++++
utility/PlanVisualizer.cpp | 25 ++++++++++++--
4 files changed, 73 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/39f57499/cli/QuickstepCli.cpp
----------------------------------------------------------------------
diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp
index bf0e2c7..e7ffc0c 100644
--- a/cli/QuickstepCli.cpp
+++ b/cli/QuickstepCli.cpp
@@ -347,10 +347,10 @@ int main(int argc, char* argv[]) {
// addPrimaryKeyInfoForTPCHTables(query_processor->getDefaultDatabase());
// addPrimaryKeyInfoForSSBTables(query_processor->getDefaultDatabase());
- std::string proto_str;
- google::protobuf::TextFormat::PrintToString(
- query_processor->getDefaultDatabase()->getProto(), &proto_str);
- std::cerr << proto_str << "\n";
+// std::string proto_str;
+// google::protobuf::TextFormat::PrintToString(
+// query_processor->getDefaultDatabase()->getProto(), &proto_str);
+// std::cerr << proto_str << "\n";
// query_processor->markCatalogAltered();
// query_processor->saveCatalog();
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/39f57499/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index 9eea27c..ba7a3c6 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -25,6 +25,8 @@
#include <vector>
#include "catalog/CatalogRelation.hpp"
+#include "catalog/CatalogRelationStatistics.hpp"
+#include "catalog/CatalogTypedefs.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ComparisonExpression.hpp"
#include "query_optimizer/expressions/ExprId.hpp"
@@ -283,6 +285,40 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
return 1.0;
}
+
+void StarSchemaSimpleCostModel::getStatistics(
+ const physical::PhysicalPtr &physical_plan,
+ const expressions::AttributeReferencePtr &attribute,
+ const CatalogRelationStatistics** stat,
+ attribute_id* attr_id) {
+ switch (physical_plan->getPhysicalType()) {
+ case P::PhysicalType::kTableReference: {
+ const P::TableReferencePtr table_reference =
+ std::static_pointer_cast<const P::TableReference>(physical_plan);
+ const CatalogRelation *catalog_relation = table_reference->relation();
+ const std::vector<E::AttributeReferencePtr> &attributes =
+ table_reference->attribute_list();
+ for (std::size_t i = 0; i < attributes.size(); ++i) {
+ if (attributes[i]->id() == attribute->id()) {
+ *stat = &catalog_relation->getStatistics();
+ *attr_id = i;
+ }
+ }
+ }
+ default:
+ break;
+ }
+
+ for (const auto &child : physical_plan->children()) {
+ for (const auto &attr : child->getOutputAttributes()) {
+ if (attr->id() == attribute->id()) {
+ getStatistics(child, attribute, stat, attr_id);
+ }
+ }
+ }
+}
+
+
} // namespace cost
} // namespace optimizer
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/39f57499/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
index 4314b92..fdd2b36 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
@@ -24,7 +24,9 @@
#include <unordered_map>
#include <vector>
+#include "catalog/CatalogTypedefs.hpp"
#include "query_optimizer/cost_model/CostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/expressions/Predicate.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
@@ -39,6 +41,9 @@
#include "utility/Macros.hpp"
namespace quickstep {
+
+class CatalogRelationStatistics;
+
namespace optimizer {
namespace cost {
@@ -75,6 +80,11 @@ class StarSchemaSimpleCostModel : public CostModel {
*/
double estimateSelectivity(const physical::PhysicalPtr &physical_plan);
+ void getStatistics(const physical::PhysicalPtr &physical_plan,
+ const expressions::AttributeReferencePtr &attribute,
+ const CatalogRelationStatistics** stat,
+ attribute_id* attr_id);
+
private:
std::size_t estimateCardinalityForTopLevelPlan(
const physical::TopLevelPlanPtr &physical_plan);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/39f57499/utility/PlanVisualizer.cpp
----------------------------------------------------------------------
diff --git a/utility/PlanVisualizer.cpp b/utility/PlanVisualizer.cpp
index 9af00b4..2039a69 100644
--- a/utility/PlanVisualizer.cpp
+++ b/utility/PlanVisualizer.cpp
@@ -28,7 +28,8 @@
#include <vector>
#include "catalog/CatalogRelation.hpp"
-
+#include "catalog/CatalogRelationStatistics.hpp"
+#include "catalog/CatalogTypedefs.hpp"
#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
@@ -131,7 +132,27 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
for (const auto &attr : child->getOutputAttributes()) {
if (referenced_ids.find(attr->id()) != referenced_ids.end()) {
- edge_info.labels.emplace_back(attr->attribute_alias());
+ std::string attr_info = attr->attribute_alias();
+ const CatalogRelationStatistics *stat = nullptr;
+ attribute_id attr_id = 0;
+ cost_model_->getStatistics(child, attr, &stat, &attr_id);
+ if (stat != nullptr) {
+ if (stat->hasNumDistinctValues(attr_id)) {
+ attr_info.append(", # distinct = ");
+ attr_info.append(std::to_string(static_cast<std::size_t>(
+ stat->getNumDistinctValues(attr_id) * cost_model_->estimateSelectivity(child))));
+ }
+ const Type& attr_type = attr->getValueType();
+ if (stat->hasMinValue(attr_id)) {
+ attr_info.append(", min = ");
+ attr_info.append(attr_type.printValueToString(stat->getMinValue(attr_id)));
+ }
+ if (stat->hasMaxValue(attr_id)) {
+ attr_info.append(", max = ");
+ attr_info.append(attr_type.printValueToString(stat->getMaxValue(attr_id)));
+ }
+ }
+ edge_info.labels.emplace_back(attr_info);
}
}
}