You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/10/18 16:08:03 UTC
incubator-quickstep git commit: Use SimpleCostModel for estimating
JoinHashTable size
Repository: incubator-quickstep
Updated Branches:
refs/heads/build-cardinality-fix [created] 56b74ba5e
Use SimpleCostModel for estimating JoinHashTable size
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/56b74ba5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/56b74ba5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/56b74ba5
Branch: refs/heads/build-cardinality-fix
Commit: 56b74ba5e346474260621b909d3fde844fec4658
Parents: 17ffbb0
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Tue Oct 18 11:07:51 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Tue Oct 18 11:07:51 2016 -0500
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 1 +
query_optimizer/ExecutionGenerator.cpp | 10 +++++++---
query_optimizer/ExecutionGenerator.hpp | 9 +++++++--
query_optimizer/cost_model/CMakeLists.txt | 1 +
query_optimizer/cost_model/SimpleCostModel.cpp | 11 +++++++++++
query_optimizer/cost_model/SimpleCostModel.hpp | 6 ++++++
6 files changed, 33 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/56b74ba5/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 988ffd8..022531f 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -78,6 +78,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
quickstep_queryoptimizer_QueryHandle
quickstep_queryoptimizer_QueryPlan
quickstep_queryoptimizer_costmodel_CostModel
+ quickstep_queryoptimizer_costmodel_SimpleCostModel
quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
quickstep_queryoptimizer_expressions_AggregateFunction
quickstep_queryoptimizer_expressions_Alias
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/56b74ba5/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 9347c9c..83ad488 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -58,6 +58,7 @@
#include "query_optimizer/OptimizerContext.hpp"
#include "query_optimizer/QueryHandle.hpp"
#include "query_optimizer/QueryPlan.hpp"
+#include "query_optimizer/cost_model/SimpleCostModel.hpp"
#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
#include "query_optimizer/expressions/AggregateFunction.hpp"
#include "query_optimizer/expressions/Alias.hpp"
@@ -166,8 +167,10 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) {
CHECK(P::SomeTopLevelPlan::MatchesWithConditionalCast(physical_plan, &top_level_physical_plan_))
<< "The physical plan must be rooted by a TopLevelPlan";
- cost_model_.reset(
+ cost_model_for_aggregation_.reset(
new cost::StarSchemaSimpleCostModel(top_level_physical_plan_->shared_subplans()));
+ cost_model_for_hash_join_.reset(
+ new cost::SimpleCostModel(top_level_physical_plan_->shared_subplans()));
const CatalogRelation *result_relation = nullptr;
@@ -606,7 +609,8 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
const CatalogRelation *referenced_stored_probe_relation = nullptr;
const CatalogRelation *referenced_stored_build_relation = nullptr;
- std::size_t build_cardinality = cost_model_->estimateCardinality(build_physical);
+ std::size_t build_cardinality =
+ cost_model_for_hash_join_->estimateCardinality(build_physical);
bool any_probe_attributes_nullable = false;
bool any_build_attributes_nullable = false;
@@ -1412,7 +1416,7 @@ void ExecutionGenerator::convertAggregate(
}
const std::size_t estimated_num_groups =
- cost_model_->estimateNumGroupsForAggregate(physical_plan);
+ cost_model_for_aggregation_->estimateNumGroupsForAggregate(physical_plan);
aggr_state_proto->set_estimated_num_entries(std::max(16uL, estimated_num_groups));
const QueryPlan::DAGNodeIndex aggregation_operator_index =
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/56b74ba5/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp
index 2aaf5ab..d53ae2e 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -419,9 +419,14 @@ class ExecutionGenerator {
std::vector<CatalogRelationInfo> temporary_relation_info_vec_;
/**
- * @brief The cost model to use for creating the execution plan.
+ * @brief The cost model to use for estimating aggregation hash table size.
*/
- std::unique_ptr<cost::CostModel> cost_model_;
+ std::unique_ptr<cost::CostModel> cost_model_for_aggregation_;
+
+ /**
+ * @brief The cost model to use for estimating join hash table size.
+ */
+ std::unique_ptr<cost::CostModel> cost_model_for_hash_join_;
physical::TopLevelPlanPtr top_level_physical_plan_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/56b74ba5/query_optimizer/cost_model/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/CMakeLists.txt b/query_optimizer/cost_model/CMakeLists.txt
index ba99de3..d616696 100644
--- a/query_optimizer/cost_model/CMakeLists.txt
+++ b/query_optimizer/cost_model/CMakeLists.txt
@@ -38,6 +38,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_SimpleCostModel
quickstep_queryoptimizer_physical_PhysicalType
quickstep_queryoptimizer_physical_Selection
quickstep_queryoptimizer_physical_SharedSubplanReference
+ quickstep_queryoptimizer_physical_Sort
quickstep_queryoptimizer_physical_TableGenerator
quickstep_queryoptimizer_physical_TableReference
quickstep_queryoptimizer_physical_TopLevelPlan
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/56b74ba5/query_optimizer/cost_model/SimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/SimpleCostModel.cpp b/query_optimizer/cost_model/SimpleCostModel.cpp
index f313c90..74b62d6 100644
--- a/query_optimizer/cost_model/SimpleCostModel.cpp
+++ b/query_optimizer/cost_model/SimpleCostModel.cpp
@@ -30,6 +30,7 @@
#include "query_optimizer/physical/PhysicalType.hpp"
#include "query_optimizer/physical/Selection.hpp"
#include "query_optimizer/physical/SharedSubplanReference.hpp"
+#include "query_optimizer/physical/Sort.hpp"
#include "query_optimizer/physical/TableGenerator.hpp"
#include "query_optimizer/physical/TableReference.hpp"
#include "query_optimizer/physical/TopLevelPlan.hpp"
@@ -73,6 +74,9 @@ std::size_t SimpleCostModel::estimateCardinality(
return estimateCardinality(
shared_subplans_[shared_subplan_reference->subplan_id()]);
}
+ case P::PhysicalType::kSort:
+ return estimateCardinalityForSort(
+ std::static_pointer_cast<const P::Sort>(physical_plan));
case P::PhysicalType::kWindowAggregate:
return estimateCardinalityForWindowAggregate(
std::static_pointer_cast<const P::WindowAggregate>(physical_plan));
@@ -96,6 +100,13 @@ std::size_t SimpleCostModel::estimateCardinalityForSelection(
return estimateCardinality(physical_plan->input());
}
+std::size_t SimpleCostModel::estimateCardinalityForSort(
+ const physical::SortPtr &physical_plan) {
+ std::size_t cardinality = estimateCardinality(
+ std::static_pointer_cast<const P::Sort>(physical_plan)->input());
+ return std::min(cardinality, static_cast<std::size_t>(physical_plan->limit()));
+}
+
std::size_t SimpleCostModel::estimateCardinalityForTableGenerator(
const P::TableGeneratorPtr &physical_plan) {
return physical_plan->generator_function_handle()->getEstimatedCardinality();
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/56b74ba5/query_optimizer/cost_model/SimpleCostModel.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/SimpleCostModel.hpp b/query_optimizer/cost_model/SimpleCostModel.hpp
index 25ff8fe..16366cd 100644
--- a/query_optimizer/cost_model/SimpleCostModel.hpp
+++ b/query_optimizer/cost_model/SimpleCostModel.hpp
@@ -29,6 +29,7 @@
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/Sort.hpp"
#include "query_optimizer/physical/TableGenerator.hpp"
#include "query_optimizer/physical/TableReference.hpp"
#include "query_optimizer/physical/TopLevelPlan.hpp"
@@ -74,6 +75,11 @@ class SimpleCostModel : public CostModel {
std::size_t estimateCardinalityForTableGenerator(
const physical::TableGeneratorPtr &physical_plan);
+ // Returns the estimated cardinality as K if there is a LIMIT K clause,
+ // otherwise returns the estimated cardinality of the input plan.
+ std::size_t estimateCardinalityForSort(
+ const physical::SortPtr &physical_plan);
+
// Returns the larger value of the estimated cardinalities of two
// input plans.
std::size_t estimateCardinalityForHashJoin(