You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/10/04 21:35:37 UTC
incubator-quickstep git commit: Improve StarSchemaSimpleCostModel to
provide closer estimation of number of groups for hash aggregation. [Forced
Update!]
Repository: incubator-quickstep
Updated Branches:
refs/heads/estimate-num-distinct-values 2b33f6264 -> a12ee48a9 (forced update)
Improve StarSchemaSimpleCostModel to provide closer estimation of number of groups for hash aggregation.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/a12ee48a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/a12ee48a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/a12ee48a
Branch: refs/heads/estimate-num-distinct-values
Commit: a12ee48a9599b614465faef6189168707fde46ec
Parents: 4126c4f
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Mon Oct 3 23:58:30 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Tue Oct 4 16:34:08 2016 -0500
----------------------------------------------------------------------
query_optimizer/cost_model/CMakeLists.txt | 2 +
.../cost_model/StarSchemaSimpleCostModel.cpp | 227 ++++++++++++++-----
.../cost_model/StarSchemaSimpleCostModel.hpp | 61 +++--
query_optimizer/expressions/ExpressionUtil.hpp | 19 ++
.../StarSchemaHashJoinOrderOptimization.hpp | 2 +-
5 files changed, 240 insertions(+), 71 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a12ee48a/query_optimizer/cost_model/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/CMakeLists.txt b/query_optimizer/cost_model/CMakeLists.txt
index abbc3da..d1646fc 100644
--- a/query_optimizer/cost_model/CMakeLists.txt
+++ b/query_optimizer/cost_model/CMakeLists.txt
@@ -50,6 +50,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostMod
quickstep_queryoptimizer_expressions_ComparisonExpression
quickstep_queryoptimizer_expressions_ExprId
quickstep_queryoptimizer_expressions_ExpressionType
+ quickstep_queryoptimizer_expressions_ExpressionUtil
quickstep_queryoptimizer_expressions_LogicalAnd
quickstep_queryoptimizer_expressions_LogicalOr
quickstep_queryoptimizer_expressions_PatternMatcher
@@ -57,6 +58,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostMod
quickstep_queryoptimizer_physical_Aggregate
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_NestedLoopsJoin
+ quickstep_queryoptimizer_physical_PatternMatcher
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_physical_PhysicalType
quickstep_queryoptimizer_physical_Selection
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a12ee48a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index 911a765..019c5ed 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -29,6 +29,7 @@
#include "query_optimizer/expressions/ComparisonExpression.hpp"
#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/expressions/ExpressionType.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
#include "query_optimizer/expressions/LogicalAnd.hpp"
#include "query_optimizer/expressions/LogicalOr.hpp"
#include "query_optimizer/expressions/Predicate.hpp"
@@ -36,6 +37,7 @@
#include "query_optimizer/physical/Aggregate.hpp"
#include "query_optimizer/physical/NestedLoopsJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/PhysicalType.hpp"
#include "query_optimizer/physical/Selection.hpp"
@@ -85,8 +87,8 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinality(
shared_subplans_[shared_subplan_reference->subplan_id()]);
}
case P::PhysicalType::kSort:
- return estimateCardinality(
- std::static_pointer_cast<const P::Sort>(physical_plan)->input());
+ return estimateCardinalityForSort(
+ std::static_pointer_cast<const P::Sort>(physical_plan));
case P::PhysicalType::kWindowAggregate:
return estimateCardinalityForWindowAggregate(
std::static_pointer_cast<const P::WindowAggregate>(physical_plan));
@@ -111,9 +113,17 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinalityForTableReference(
std::size_t StarSchemaSimpleCostModel::estimateCardinalityForSelection(
const P::SelectionPtr &physical_plan) {
- double selectivity = estimateSelectivityForSelection(physical_plan);
- return std::max(static_cast<std::size_t>(estimateCardinality(physical_plan->input()) * selectivity),
- static_cast<std::size_t>(1));
+ double selectivity =
+ estimateSelectivityForPredicate(physical_plan->filter_predicate(), physical_plan);
+ return static_cast<std::size_t>(
+ estimateCardinality(physical_plan->input()) * selectivity);
+}
+
+std::size_t StarSchemaSimpleCostModel::estimateCardinalityForSort(
+ const physical::SortPtr &physical_plan) {
+ std::size_t cardinality = estimateCardinality(
+ std::static_pointer_cast<const P::Sort>(physical_plan)->input());
+ return std::min(cardinality, static_cast<std::size_t>(physical_plan->limit()));
}
std::size_t StarSchemaSimpleCostModel::estimateCardinalityForTableGenerator(
@@ -127,8 +137,8 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinalityForHashJoin(
std::size_t right_cardinality = estimateCardinality(physical_plan->right());
double left_selectivity = estimateSelectivity(physical_plan->left());
double right_selectivity = estimateSelectivity(physical_plan->right());
- return std::max(static_cast<std::size_t>(left_cardinality * right_selectivity) + 1,
- static_cast<std::size_t>(right_cardinality * left_selectivity) + 1);
+ return std::max(static_cast<std::size_t>(left_cardinality * right_selectivity + 0.5),
+ static_cast<std::size_t>(right_cardinality * left_selectivity + 0.5));
}
std::size_t StarSchemaSimpleCostModel::estimateCardinalityForNestedLoopsJoin(
@@ -142,8 +152,27 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinalityForAggregate(
if (physical_plan->grouping_expressions().empty()) {
return 1;
}
- return std::max(static_cast<std::size_t>(1),
- estimateCardinality(physical_plan->input()) / 10);
+
+ std::size_t estimated_child_cardinality = estimateCardinality(physical_plan->input());
+ std::size_t estimated_num_groups = 1;
+ std::size_t max_attr_num_distinct_values = 0;
+ for (const auto &expr : physical_plan->grouping_expressions()) {
+ E::AttributeReferencePtr attr;
+ if (E::SomeAttributeReference::MatchesWithConditionalCast(expr, &attr)) {
+ std::size_t attr_num_distinct_values =
+ estimateNumDistinctValues(attr->id(), physical_plan->input());
+ estimated_num_groups *= std::max(1uL, attr_num_distinct_values);
+ max_attr_num_distinct_values =
+ std::max(max_attr_num_distinct_values, attr_num_distinct_values);
+ } else {
+ // TODO(jianqiao): implement estimateNumDistinctValues() for expressions.
+ estimated_num_groups *= 64uL;
+ }
+ }
+ estimated_num_groups = std::max(
+ std::min(estimated_num_groups, estimated_child_cardinality / 10),
+ max_attr_num_distinct_values);
+ return estimated_num_groups;
}
std::size_t StarSchemaSimpleCostModel::estimateCardinalityForWindowAggregate(
@@ -151,24 +180,85 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinalityForWindowAggregate(
return estimateCardinality(physical_plan->input());
}
+std::size_t StarSchemaSimpleCostModel::estimateNumDistinctValues(
+ const expressions::ExprId attribute_id,
+ const physical::PhysicalPtr &physical_plan) {
+ DCHECK(E::ContainsExprId(physical_plan->getOutputAttributes(), attribute_id));
+
+ P::TableReferencePtr table_reference;
+ if (P::SomeTableReference::MatchesWithConditionalCast(physical_plan, &table_reference)) {
+ return getNumDistinctValues(attribute_id, table_reference);
+ }
+
+ double filter_selectivity = estimateSelectivityForFilterPredicate(physical_plan);
+ switch (physical_plan->getPhysicalType()) {
+ case P::PhysicalType::kSelection: // Fall through
+ case P::PhysicalType::kAggregate: {
+ const P::PhysicalPtr &child = physical_plan->children()[0];
+ if (E::ContainsExprId(child->getOutputAttributes(), attribute_id)) {
+ std::size_t child_num_distinct_values =
+ estimateNumDistinctValues(attribute_id, child);
+ return static_cast<std::size_t>(
+ child_num_distinct_values * filter_selectivity + 0.5);
+ }
+ break;
+ }
+ case P::PhysicalType::kHashJoin: {
+ const P::HashJoinPtr &hash_join =
+ std::static_pointer_cast<const P::HashJoin>(physical_plan);
+ if (E::ContainsExprId(hash_join->left()->getOutputAttributes(), attribute_id)) {
+ std::size_t left_child_num_distinct_values =
+ estimateNumDistinctValues(attribute_id, hash_join->left());
+ double right_child_selectivity =
+ estimateSelectivity(hash_join->right());
+ return static_cast<std::size_t>(
+ left_child_num_distinct_values * right_child_selectivity * filter_selectivity + 0.5);
+ }
+ if (E::ContainsExprId(hash_join->right()->getOutputAttributes(), attribute_id)) {
+ std::size_t right_child_num_distinct_values =
+ estimateNumDistinctValues(attribute_id, hash_join->right());
+ double left_child_selectivity =
+ estimateSelectivity(hash_join->left());
+ return static_cast<std::size_t>(
+ right_child_num_distinct_values * left_child_selectivity * filter_selectivity + 0.5);
+ }
+ }
+ default:
+ break;
+ }
+
+ return 16uL;
+}
+
double StarSchemaSimpleCostModel::estimateSelectivity(
const physical::PhysicalPtr &physical_plan) {
switch (physical_plan->getPhysicalType()) {
case P::PhysicalType::kSelection: {
- return estimateSelectivityForSelection(
- std::static_pointer_cast<const P::Selection>(physical_plan));
+ const P::SelectionPtr &selection =
+ std::static_pointer_cast<const P::Selection>(physical_plan);
+ double filter_selectivity =
+ estimateSelectivityForPredicate(selection->filter_predicate(), selection);
+ double child_selectivity = estimateSelectivity(selection->input());
+ return filter_selectivity * child_selectivity;
}
case P::PhysicalType::kHashJoin: {
const P::HashJoinPtr &hash_join =
std::static_pointer_cast<const P::HashJoin>(physical_plan);
- return std::min(estimateSelectivity(hash_join->left()),
- estimateSelectivity(hash_join->right()));
+ double filter_selectivity =
+ estimateSelectivityForPredicate(hash_join->residual_predicate(), hash_join);
+ double child_selectivity =
+ estimateSelectivity(hash_join->left()) * estimateSelectivity(hash_join->right());
+ return filter_selectivity * child_selectivity;
}
case P::PhysicalType::kNestedLoopsJoin: {
const P::NestedLoopsJoinPtr &nested_loop_join =
std::static_pointer_cast<const P::NestedLoopsJoin>(physical_plan);
- return std::min(estimateSelectivity(nested_loop_join->left()),
- estimateSelectivity(nested_loop_join->right()));
+ double filter_selectivity =
+ estimateSelectivityForPredicate(nested_loop_join->join_predicate(), nested_loop_join);
+ double child_selectivity = std::min(
+ estimateSelectivity(nested_loop_join->left()),
+ estimateSelectivity(nested_loop_join->right()));
+ return filter_selectivity * child_selectivity;
}
case P::PhysicalType::kSharedSubplanReference: {
const P::SharedSubplanReferencePtr shared_subplan_reference =
@@ -177,36 +267,51 @@ double StarSchemaSimpleCostModel::estimateSelectivity(
shared_subplans_[shared_subplan_reference->subplan_id()]);
}
default:
- return 1.0;
+ break;
}
-}
-double StarSchemaSimpleCostModel::estimateSelectivityForSelection(
- const physical::SelectionPtr &physical_plan) {
- const E::PredicatePtr &filter_predicate = physical_plan->filter_predicate();
+ if (physical_plan->getNumChildren() == 1) {
+ return estimateSelectivity(physical_plan->children()[0]);
+ }
- // If the subplan is a table reference, gather the number of distinct values
- // statistics for each column (attribute).
- std::unordered_map<E::ExprId, std::size_t> num_distinct_values_map;
- if (physical_plan->input()->getPhysicalType() == P::PhysicalType::kTableReference) {
- const P::TableReferencePtr &table_reference =
- std::static_pointer_cast<const P::TableReference>(physical_plan->input());
- const CatalogRelation &relation = *table_reference->relation();
- const std::vector<E::AttributeReferencePtr> &attributes = table_reference->attribute_list();
- for (std::size_t i = 0; i < attributes.size(); ++i) {
- std::size_t num_distinct_values = relation.getStatistics().getNumDistinctValues(i);
- if (num_distinct_values > 0) {
- num_distinct_values_map[attributes[i]->id()] = num_distinct_values;
- }
- }
+ return 1.0;
+}
+
+double StarSchemaSimpleCostModel::estimateSelectivityForFilterPredicate(
+ const physical::PhysicalPtr &physical_plan) {
+ E::PredicatePtr filter_predicate = nullptr;
+ switch (physical_plan->getPhysicalType()) {
+ case P::PhysicalType::kSelection:
+ filter_predicate =
+ std::static_pointer_cast<const P::Selection>(physical_plan)->filter_predicate();
+ break;
+ case P::PhysicalType::kAggregate:
+ filter_predicate =
+ std::static_pointer_cast<const P::Aggregate>(physical_plan)->filter_predicate();
+ break;
+ case P::PhysicalType::kHashJoin:
+ filter_predicate =
+ std::static_pointer_cast<const P::HashJoin>(physical_plan)->residual_predicate();
+ break;
+ case P::PhysicalType::kNestedLoopsJoin:
+ filter_predicate =
+ std::static_pointer_cast<const P::NestedLoopsJoin>(physical_plan)->join_predicate();
+ break;
+ default:
+ break;
}
- return estimateSelectivityForPredicate(num_distinct_values_map, filter_predicate);
+ if (filter_predicate == nullptr) {
+ return 1.0;
+ } else {
+ return estimateSelectivityForPredicate(filter_predicate, physical_plan);
+ }
}
+
double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
- const std::unordered_map<expressions::ExprId, std::size_t> &num_distinct_values_map,
- const expressions::PredicatePtr &filter_predicate) {
+ const expressions::PredicatePtr &filter_predicate,
+ const P::PhysicalPtr &physical_plan) {
if (filter_predicate == nullptr) {
return 1.0;
}
@@ -215,10 +320,8 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
case E::ExpressionType::kComparisonExpression: {
// Case 1 - Number of distinct values statistics available
// Case 1.1 - Equality comparison: 1.0 / num_distinct_values
- // Case 1.2 - Otherwise: 5.0 / num_distinct_values
- // Case 2 - Number of distinct values statistics not available
- // Case 2.1 - Equality comparison: 0.1
- // Case 2.2 - Otherwise: 0.5
+ // Case 1.2 - Otherwise: 0.1
+ // Case 2 - Otherwise: 0.5
const E::ComparisonExpressionPtr &comparison_expression =
std::static_pointer_cast<const E::ComparisonExpression>(filter_predicate);
E::AttributeReferencePtr attr;
@@ -226,25 +329,26 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
E::SomeScalarLiteral::Matches(comparison_expression->right())) ||
(E::SomeAttributeReference::MatchesWithConditionalCast(comparison_expression->right(), &attr) &&
E::SomeScalarLiteral::Matches(comparison_expression->left()))) {
- const auto it = num_distinct_values_map.find(attr->id());
- if (it != num_distinct_values_map.end() && it->second > 0) {
- double unit_selectivity = 1.0 / it->second;
- return comparison_expression->isEqualityComparisonPredicate()
- ? unit_selectivity
- : std::min(0.5, unit_selectivity * 5.0);
+ for (const auto &child : physical_plan->children()) {
+ if (E::ContainsExprId(child->getOutputAttributes(), attr->id())) {
+ const std::size_t child_num_distinct_values = estimateNumDistinctValues(attr->id(), child);
+ if (comparison_expression->isEqualityComparisonPredicate()) {
+ return 1.0 / child_num_distinct_values;
+ } else {
+ return 1.0 / std::max(std::min(child_num_distinct_values / 100.0, 10.0), 2.0);
+ }
+ }
}
+ return 0.1;
}
-
- return comparison_expression->isEqualityComparisonPredicate() ? 0.1 : 0.5;
+ return 0.5;
}
case E::ExpressionType::kLogicalAnd: {
const E::LogicalAndPtr &logical_and =
std::static_pointer_cast<const E::LogicalAnd>(filter_predicate);
double selectivity = 1.0;
for (const auto &predicate : logical_and->operands()) {
- selectivity =
- std::min(selectivity,
- estimateSelectivityForPredicate(num_distinct_values_map, predicate));
+ selectivity = selectivity * estimateSelectivityForPredicate(predicate, physical_plan);
}
return selectivity;
}
@@ -253,7 +357,7 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
std::static_pointer_cast<const E::LogicalOr>(filter_predicate);
double selectivity = 0;
for (const auto &predicate : logical_or->operands()) {
- selectivity += estimateSelectivityForPredicate(num_distinct_values_map, predicate);
+ selectivity += estimateSelectivityForPredicate(predicate, physical_plan);
}
return std::min(selectivity, 1.0);
}
@@ -263,6 +367,23 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
return 1.0;
}
+std::size_t StarSchemaSimpleCostModel::getNumDistinctValues(
+ const E::ExprId attribute_id,
+ const P::TableReferencePtr &table_reference) {
+ const CatalogRelation &relation = *table_reference->relation();
+ const std::vector<E::AttributeReferencePtr> &attributes = table_reference->attribute_list();
+ for (std::size_t i = 0; i < attributes.size(); ++i) {
+ if (attributes[i]->id() == attribute_id) {
+ std::size_t num_distinct_values = relation.getStatistics().getNumDistinctValues(i);
+ if (num_distinct_values > 0) {
+ return num_distinct_values;
+ }
+ break;
+ }
+ }
+ return estimateCardinalityForTableReference(table_reference);
+}
+
} // namespace cost
} // namespace optimizer
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a12ee48a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
index 4314b92..d752065 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
@@ -21,7 +21,6 @@
#define QUERY_OPTIMIZER_COST_MODEL_STAR_SCHEMA_SIMPLE_COST_MODEL_HPP_
#include <cstddef>
-#include <unordered_map>
#include <vector>
#include "query_optimizer/cost_model/CostModel.hpp"
@@ -32,6 +31,7 @@
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/Sort.hpp"
#include "query_optimizer/physical/TableGenerator.hpp"
#include "query_optimizer/physical/TableReference.hpp"
#include "query_optimizer/physical/TopLevelPlan.hpp"
@@ -67,6 +67,16 @@ class StarSchemaSimpleCostModel : public CostModel {
const physical::PhysicalPtr &physical_plan) override;
/**
+ * @brief Estimate the number of distinct values of an attribute in a relation.
+ *
+ * @param attribute_id The expression id of the target attribute.
+ * @param physical_plan The physical plan of the attribute's relation.
+ * @return The estimated number of distinct values for the attribute.
+ */
+ std::size_t estimateNumDistinctValues(const expressions::ExprId attribute_id,
+ const physical::PhysicalPtr &physical_plan);
+
+ /**
* @brief Estimate the "selectivity" of a physical plan under the assumption
* that it acts as a filtered dimension table in a hash join.
*
@@ -75,40 +85,57 @@ class StarSchemaSimpleCostModel : public CostModel {
*/
double estimateSelectivity(const physical::PhysicalPtr &physical_plan);
+ /**
+ * @brief Estimate the filter predicate's selectivity if it is present in
+ * the input plan's root node.
+ *
+ * @param physical_plan The input physical plan.
+ * @return The estimated selectivity of the filter predicate if physical_plan
+ * has such a filter predicate; 1.0 otherwise.
+ */
+ double estimateSelectivityForFilterPredicate(
+ const physical::PhysicalPtr &physical_plan);
+
private:
- std::size_t estimateCardinalityForTopLevelPlan(
- const physical::TopLevelPlanPtr &physical_plan);
+ std::size_t estimateCardinalityForAggregate(
+ const physical::AggregatePtr &physical_plan);
- std::size_t estimateCardinalityForTableReference(
- const physical::TableReferencePtr &physical_plan);
+ std::size_t estimateCardinalityForHashJoin(
+ const physical::HashJoinPtr &physical_plan);
+
+ std::size_t estimateCardinalityForNestedLoopsJoin(
+ const physical::NestedLoopsJoinPtr &physical_plan);
std::size_t estimateCardinalityForSelection(
const physical::SelectionPtr &physical_plan);
+ std::size_t estimateCardinalityForSort(
+ const physical::SortPtr &physical_plan);
+
std::size_t estimateCardinalityForTableGenerator(
const physical::TableGeneratorPtr &physical_plan);
- std::size_t estimateCardinalityForHashJoin(
- const physical::HashJoinPtr &physical_plan);
-
- std::size_t estimateCardinalityForNestedLoopsJoin(
- const physical::NestedLoopsJoinPtr &physical_plan);
+ std::size_t estimateCardinalityForTableReference(
+ const physical::TableReferencePtr &physical_plan);
- std::size_t estimateCardinalityForAggregate(
- const physical::AggregatePtr &physical_plan);
+ std::size_t estimateCardinalityForTopLevelPlan(
+ const physical::TopLevelPlanPtr &physical_plan);
std::size_t estimateCardinalityForWindowAggregate(
const physical::WindowAggregatePtr &physical_plan);
- double estimateSelectivityForSelection(
- const physical::SelectionPtr &physical_plan);
-
double estimateSelectivityForPredicate(
- const std::unordered_map<expressions::ExprId, std::size_t> &num_distinct_values_map,
- const expressions::PredicatePtr &filter_predicate);
+ const expressions::PredicatePtr &filter_predicate,
+ const physical::PhysicalPtr &physical_plan);
const std::vector<physical::PhysicalPtr> &shared_subplans_;
+ // Get the number of distinct values of an attribute in the table reference.
+ // If the stat is not avaiable, simply returns the table's cardinality.
+ std::size_t getNumDistinctValues(const expressions::ExprId attribute_id,
+ const physical::TableReferencePtr &table_reference);
+
+
DISALLOW_COPY_AND_ASSIGN(StarSchemaSimpleCostModel);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a12ee48a/query_optimizer/expressions/ExpressionUtil.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/ExpressionUtil.hpp b/query_optimizer/expressions/ExpressionUtil.hpp
index e9a4067..422d5ab 100644
--- a/query_optimizer/expressions/ExpressionUtil.hpp
+++ b/query_optimizer/expressions/ExpressionUtil.hpp
@@ -95,6 +95,25 @@ bool ContainsExpression(
}
/**
+ * @brief Checks whether expr_id equals any expression's id in \p expressions.
+ *
+ * @param expressions A list of expressions to be checked with.
+ * @param expr_id The ExprId that is to be checked if it is in \p expressions.
+ * @return True if \p expr_id is contained by \p expressions.
+ */
+template <class NamedExpressionType>
+bool ContainsExprId(
+ const std::vector<std::shared_ptr<const NamedExpressionType>> &expressions,
+ const ExprId expr_id) {
+ for (const std::shared_ptr<const NamedExpressionType> &expression : expressions) {
+ if (expression->id() == expr_id) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
* @brief Checks whether \p left is a subset of \p right.
*
* @param left The left operand of the subset operator (i.e. the one that may be
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a12ee48a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
index 4d6765c..c1a7bae 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
@@ -103,7 +103,7 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
if (lhs->estimated_selectivity < rhs->estimated_selectivity) {
return !swapped;
- } else if (lhs->estimated_cardinality < 1000u &&
+ } else if (lhs->estimated_cardinality < 100u &&
rhs->estimated_cardinality > 10000u &&
lhs->estimated_selectivity < rhs->estimated_selectivity * 1.5) {
return !swapped;