You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2017/01/31 22:57:18 UTC
[3/3] incubator-quickstep git commit: Add BitVectorExactFilter as a
LIP filter and supports Join-to-Semijoin transformation.
Add BitVectorExactFilter as a LIP filter and supports Join-to-Semijoin transformation.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/4ba819c5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/4ba819c5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/4ba819c5
Branch: refs/heads/exact-filter
Commit: 4ba819c5b82af1d9284525bd7a16784e0254be3f
Parents: 5ffdaaf
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Thu Oct 27 14:16:32 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Tue Jan 31 16:57:09 2017 -0600
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 4 +
query_optimizer/ExecutionGenerator.cpp | 62 +++
query_optimizer/ExecutionGenerator.hpp | 8 +
query_optimizer/LIPFilterGenerator.cpp | 109 +++--
query_optimizer/LIPFilterGenerator.hpp | 49 ++-
query_optimizer/PhysicalGenerator.cpp | 19 +-
query_optimizer/cost_model/CMakeLists.txt | 5 +
query_optimizer/cost_model/SimpleCostModel.cpp | 9 +
query_optimizer/cost_model/SimpleCostModel.hpp | 5 +
.../cost_model/StarSchemaSimpleCostModel.cpp | 163 ++++++-
.../cost_model/StarSchemaSimpleCostModel.hpp | 83 ++++
query_optimizer/expressions/ExpressionUtil.hpp | 8 +-
query_optimizer/physical/CMakeLists.txt | 14 +
query_optimizer/physical/FilterJoin.cpp | 115 +++++
query_optimizer/physical/FilterJoin.hpp | 187 ++++++++
.../physical/LIPFilterConfiguration.hpp | 265 ++++++++---
query_optimizer/physical/PatternMatcher.hpp | 2 +
query_optimizer/physical/PhysicalType.hpp | 1 +
query_optimizer/physical/TopLevelPlan.hpp | 3 +-
query_optimizer/rules/AttachLIPFilters.cpp | 28 +-
query_optimizer/rules/CMakeLists.txt | 22 +
query_optimizer/rules/InjectJoinFilters.cpp | 438 +++++++++++++++++++
query_optimizer/rules/InjectJoinFilters.hpp | 116 +++++
query_optimizer/tests/OptimizerTextTest.cpp | 2 +
relational_operators/BuildLIPFilterOperator.cpp | 154 +++++++
relational_operators/BuildLIPFilterOperator.hpp | 200 +++++++++
relational_operators/CMakeLists.txt | 24 +
relational_operators/WorkOrder.proto | 49 ++-
relational_operators/WorkOrderFactory.cpp | 45 ++
utility/CMakeLists.txt | 1 +
utility/PlanVisualizer.cpp | 24 +-
utility/lip_filter/BitVectorExactFilter.hpp | 202 +++++++++
utility/lip_filter/CMakeLists.txt | 11 +
utility/lip_filter/LIPFilter.hpp | 2 +-
utility/lip_filter/LIPFilter.proto | 25 +-
utility/lip_filter/LIPFilterDeployment.cpp | 72 +--
utility/lip_filter/LIPFilterDeployment.hpp | 28 +-
utility/lip_filter/LIPFilterFactory.cpp | 50 +++
38 files changed, 2394 insertions(+), 210 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 0ca971d..7f90e11 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -96,6 +96,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
quickstep_queryoptimizer_physical_CreateTable
quickstep_queryoptimizer_physical_DeleteTuples
quickstep_queryoptimizer_physical_DropTable
+ quickstep_queryoptimizer_physical_FilterJoin
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_InsertSelection
quickstep_queryoptimizer_physical_InsertTuple
@@ -115,6 +116,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
quickstep_queryoptimizer_physical_WindowAggregate
quickstep_relationaloperators_AggregationOperator
quickstep_relationaloperators_BuildHashOperator
+ quickstep_relationaloperators_BuildLIPFilterOperator
quickstep_relationaloperators_CreateIndexOperator
quickstep_relationaloperators_CreateTableOperator
quickstep_relationaloperators_DeleteOperator
@@ -161,6 +163,7 @@ target_link_libraries(quickstep_queryoptimizer_LIPFilterGenerator
quickstep_queryoptimizer_QueryPlan
quickstep_queryoptimizer_expressions_ExprId
quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_FilterJoin
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_LIPFilterConfiguration
quickstep_queryoptimizer_physical_Physical
@@ -206,6 +209,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_logical_Logical
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_rules_AttachLIPFilters
+ quickstep_queryoptimizer_rules_InjectJoinFilters
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
quickstep_queryoptimizer_rules_ReorderColumns
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index e25b8ad..ce1452e 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -76,6 +76,7 @@
#include "query_optimizer/physical/CreateTable.hpp"
#include "query_optimizer/physical/DeleteTuples.hpp"
#include "query_optimizer/physical/DropTable.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/InsertSelection.hpp"
#include "query_optimizer/physical/InsertTuple.hpp"
@@ -95,6 +96,7 @@
#include "query_optimizer/physical/WindowAggregate.hpp"
#include "relational_operators/AggregationOperator.hpp"
#include "relational_operators/BuildHashOperator.hpp"
+#include "relational_operators/BuildLIPFilterOperator.hpp"
#include "relational_operators/CreateIndexOperator.hpp"
#include "relational_operators/CreateTableOperator.hpp"
#include "relational_operators/DeleteOperator.hpp"
@@ -271,6 +273,9 @@ void ExecutionGenerator::generatePlanInternal(
case P::PhysicalType::kDropTable:
return convertDropTable(
std::static_pointer_cast<const P::DropTable>(physical_plan));
+ case P::PhysicalType::kFilterJoin:
+ return convertFilterJoin(
+ std::static_pointer_cast<const P::FilterJoin>(physical_plan));
case P::PhysicalType::kHashJoin:
return convertHashJoin(
std::static_pointer_cast<const P::HashJoin>(physical_plan));
@@ -608,6 +613,63 @@ void ExecutionGenerator::convertSharedSubplanReference(const physical::SharedSub
}
}
+void ExecutionGenerator::convertFilterJoin(const P::FilterJoinPtr &physical_plan) {
+ P::PhysicalPtr probe_physical = physical_plan->left();
+ P::PhysicalPtr build_physical = physical_plan->right();
+
+ // Let B denote the build side child. If B is also a FilterJoin, then the
+ // actual "concrete" input relation is B's probe side child, and B's build
+ // side becomes a LIPFilter that is attached to the BuildLIPFilterOperator
+ // created below.
+ P::FilterJoinPtr filter_join;
+ if (P::SomeFilterJoin::MatchesWithConditionalCast(build_physical, &filter_join)) {
+ build_physical = filter_join->left();
+ DCHECK(build_physical->getPhysicalType() != P::PhysicalType::kFilterJoin);
+ }
+
+ // Convert the predicate proto.
+ QueryContext::predicate_id build_side_predicate_index = QueryContext::kInvalidPredicateId;
+ if (physical_plan->build_side_filter_predicate()) {
+ build_side_predicate_index = query_context_proto_->predicates_size();
+
+ std::unique_ptr<const Predicate> build_side_predicate(
+ convertPredicate(physical_plan->build_side_filter_predicate()));
+ query_context_proto_->add_predicates()->CopyFrom(build_side_predicate->getProto());
+ }
+
+ const CatalogRelationInfo *probe_relation_info =
+ findRelationInfoOutputByPhysical(probe_physical);
+ const CatalogRelationInfo *build_relation_info =
+ findRelationInfoOutputByPhysical(build_physical);
+
+ // Create a BuildLIPFilterOperator for the FilterJoin. This operator builds
+ // LIP filters that are applied properly in downstream operators to achieve
+ // the filter-join semantics.
+ const QueryPlan::DAGNodeIndex build_filter_operator_index =
+ execution_plan_->addRelationalOperator(
+ new BuildLIPFilterOperator(
+ query_handle_->query_id(),
+ *build_relation_info->relation,
+ build_side_predicate_index,
+ build_relation_info->isStoredRelation()));
+
+ if (!build_relation_info->isStoredRelation()) {
+ execution_plan_->addDirectDependency(build_filter_operator_index,
+ build_relation_info->producer_operator_index,
+ false /* is_pipeline_breaker */);
+ }
+
+ physical_to_output_relation_map_.emplace(
+ std::piecewise_construct,
+ std::forward_as_tuple(physical_plan),
+ std::forward_as_tuple(probe_relation_info->producer_operator_index,
+ probe_relation_info->relation));
+
+ DCHECK(lip_filter_generator_ != nullptr);
+ lip_filter_generator_->addFilterJoinInfo(physical_plan,
+ build_filter_operator_index);
+}
+
void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
// HashJoin is converted to three operators:
// BuildHash, HashJoin, DestroyHash. The second is the primary operator.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp
index 55197c9..eba6eee 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -46,6 +46,7 @@
#include "query_optimizer/physical/CreateTable.hpp"
#include "query_optimizer/physical/DeleteTuples.hpp"
#include "query_optimizer/physical/DropTable.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/InsertSelection.hpp"
#include "query_optimizer/physical/InsertTuple.hpp"
@@ -248,6 +249,13 @@ class ExecutionGenerator {
void convertSharedSubplanReference(const physical::SharedSubplanReferencePtr &physical_plan);
/**
+ * @brief Converts a FilterJoin to a BuildLIPFilter operator.
+ *
+ * @param physical_plan The FilterJoin to be converted.
+ */
+ void convertFilterJoin(const physical::FilterJoinPtr &physical_plan);
+
+ /**
* @brief Converts a HashJoin to BuildHash, HashJoin and
* DestroyHash operators.
*
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/LIPFilterGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/LIPFilterGenerator.cpp b/query_optimizer/LIPFilterGenerator.cpp
index 404037e..2ce2ea8 100644
--- a/query_optimizer/LIPFilterGenerator.cpp
+++ b/query_optimizer/LIPFilterGenerator.cpp
@@ -20,11 +20,13 @@
#include "query_optimizer/LIPFilterGenerator.hpp"
#include <map>
+#include <memory>
#include <utility>
#include <vector>
#include "catalog/CatalogAttribute.hpp"
#include "query_execution/QueryContext.pb.h"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
#include "relational_operators/RelationalOperator.hpp"
#include "types/Type.hpp"
#include "utility/lip_filter/LIPFilter.hpp"
@@ -47,7 +49,7 @@ void LIPFilterGenerator::registerAttributeMap(
if (build_it != build_info_map.end()) {
auto &map_entry = attribute_map_[node];
for (const auto &info : build_it->second) {
- E::ExprId attr_id = info.build_attribute->id();
+ E::ExprId attr_id = info->build_attribute()->id();
map_entry.emplace(attr_id, attribute_substitution_map.at(attr_id));
}
}
@@ -57,15 +59,16 @@ void LIPFilterGenerator::registerAttributeMap(
if (probe_it != probe_info_map.end()) {
auto &map_entry = attribute_map_[node];
for (const auto &info : probe_it->second) {
- E::ExprId attr_id = info.probe_attribute->id();
+ E::ExprId attr_id = info->probe_attribute()->id();
map_entry.emplace(attr_id, attribute_substitution_map.at(attr_id));
}
}
}
void LIPFilterGenerator::deployLIPFilters(QueryPlan *execution_plan,
- serialization::QueryContext *query_context_proto) const {
- LIPFilterBuilderMap lip_filter_builder_map;
+ serialization::QueryContext *query_context_proto) {
+ lip_filter_builder_map_.clear();
+ lip_filter_deployment_protos_.clear();
// Deploy builders
const auto &build_info_map = lip_filter_configuration_->getBuildInfoMap();
@@ -76,8 +79,7 @@ void LIPFilterGenerator::deployLIPFilters(QueryPlan *execution_plan,
query_context_proto,
info.builder_node,
info.builder_operator_index,
- build_it->second,
- &lip_filter_builder_map);
+ build_it->second);
}
}
@@ -90,10 +92,16 @@ void LIPFilterGenerator::deployLIPFilters(QueryPlan *execution_plan,
query_context_proto,
info.prober_node,
info.prober_operator_index,
- probe_it->second,
- lip_filter_builder_map);
+ probe_it->second);
}
}
+
+ // Attach LIPFilterDeployment information to the RelationalOperators.
+ for (const auto &entry : lip_filter_deployment_protos_) {
+ RelationalOperator *relop =
+ execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(entry.first);
+ relop->deployLIPFilters(entry.second.first);
+ }
}
void LIPFilterGenerator::deployBuilderInternal(
@@ -101,30 +109,46 @@ void LIPFilterGenerator::deployBuilderInternal(
serialization::QueryContext *query_context_proto,
const physical::PhysicalPtr &builder_node,
const QueryPlan::DAGNodeIndex builder_operator_index,
- const std::vector<physical::LIPFilterBuildInfo> &build_info_vec,
- LIPFilterBuilderMap *lip_filter_builder_map) const {
- const auto lip_deployment_index = query_context_proto->lip_filter_deployments_size();
+ const std::vector<physical::LIPFilterBuildInfoPtr> &build_info_vec) {
auto *lip_filter_deployment_info_proto =
- query_context_proto->add_lip_filter_deployments();
- lip_filter_deployment_info_proto->set_action_type(serialization::LIPFilterActionType::BUILD);
+ getLIPFilterDeploymentProto(builder_operator_index, query_context_proto);
const auto &builder_attribute_map = attribute_map_.at(builder_node);
for (const auto &info : build_info_vec) {
// Add the LIPFilter information into query context.
const QueryContext::lip_filter_id lip_filter_id = query_context_proto->lip_filters_size();
serialization::LIPFilter *lip_filter_proto = query_context_proto->add_lip_filters();
- const CatalogAttribute *target_attr = builder_attribute_map.at(info.build_attribute->id());
+ const CatalogAttribute *target_attr =
+ builder_attribute_map.at(info->build_attribute()->id());
const Type &attr_type = target_attr->getType();
- switch (info.filter_type) {
+ switch (info->filter_type()) {
case LIPFilterType::kSingleIdentityHashFilter: {
DCHECK(!attr_type.isVariableLength());
+ const P::SingleIdentityHashFilterBuildInfo &sihf_info =
+ *std::static_pointer_cast<const P::SingleIdentityHashFilterBuildInfo>(info);
lip_filter_proto->set_lip_filter_type(
serialization::LIPFilterType::SINGLE_IDENTITY_HASH_FILTER);
- lip_filter_proto->SetExtension(
- serialization::SingleIdentityHashFilter::filter_cardinality, info.filter_cardinality);
- lip_filter_proto->SetExtension(
- serialization::SingleIdentityHashFilter::attribute_size, attr_type.minimumByteLength());
+ lip_filter_proto->SetExtension(serialization::SingleIdentityHashFilter::filter_cardinality,
+ sihf_info.filter_cardinality());
+ lip_filter_proto->SetExtension(serialization::SingleIdentityHashFilter::attribute_size,
+ attr_type.minimumByteLength());
+ break;
+ }
+ case LIPFilterType::kBitVectorExactFilter: {
+ DCHECK(!attr_type.isVariableLength());
+ const P::BitVectorExactFilterBuildInfo &bvef_info =
+ *std::static_pointer_cast<const P::BitVectorExactFilterBuildInfo>(info);
+ lip_filter_proto->set_lip_filter_type(
+ serialization::LIPFilterType::BIT_VECTOR_EXACT_FILTER);
+ lip_filter_proto->SetExtension(serialization::BitVectorExactFilter::min_value,
+ bvef_info.min_value());
+ lip_filter_proto->SetExtension(serialization::BitVectorExactFilter::max_value,
+ bvef_info.max_value());
+ lip_filter_proto->SetExtension(serialization::BitVectorExactFilter::attribute_size,
+ attr_type.minimumByteLength());
+ lip_filter_proto->SetExtension(serialization::BitVectorExactFilter::is_anti_filter,
+ bvef_info.is_anti_filter());
break;
}
default:
@@ -133,21 +157,16 @@ void LIPFilterGenerator::deployBuilderInternal(
}
// Register the builder information which is needed later by the probers.
- lip_filter_builder_map->emplace(
- std::make_pair(info.build_attribute->id(), builder_node),
+ lip_filter_builder_map_.emplace(
+ std::make_pair(info->build_attribute()->id(), builder_node),
std::make_pair(lip_filter_id, builder_operator_index));
// Add the builder deployment information into query context.
- auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_entries();
+ auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_build_entries();
lip_filter_entry_proto->set_lip_filter_id(lip_filter_id);
lip_filter_entry_proto->set_attribute_id(target_attr->getID());
lip_filter_entry_proto->mutable_attribute_type()->CopyFrom(attr_type.getProto());
}
-
- // Attach the LIPFilterDeployment information to the RelationalOperator.
- RelationalOperator *relop =
- execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(builder_operator_index);
- relop->deployLIPFilters(lip_deployment_index);
}
void LIPFilterGenerator::deployProberInteral(
@@ -155,23 +174,21 @@ void LIPFilterGenerator::deployProberInteral(
serialization::QueryContext *query_context_proto,
const physical::PhysicalPtr &prober_node,
const QueryPlan::DAGNodeIndex prober_operator_index,
- const std::vector<physical::LIPFilterProbeInfo> &probe_info_vec,
- const LIPFilterBuilderMap &lip_filter_builder_map) const {
- const auto lip_deployment_index = query_context_proto->lip_filter_deployments_size();
+ const std::vector<physical::LIPFilterProbeInfoPtr> &probe_info_vec) {
auto *lip_filter_deployment_info_proto =
- query_context_proto->add_lip_filter_deployments();
- lip_filter_deployment_info_proto->set_action_type(serialization::LIPFilterActionType::PROBE);
+ getLIPFilterDeploymentProto(prober_operator_index, query_context_proto);
const auto &prober_attribute_map = attribute_map_.at(prober_node);
for (const auto &info : probe_info_vec) {
// Find the corresponding builder for the to-be-probed LIPFilter.
const auto &builder_info =
- lip_filter_builder_map.at(
- std::make_pair(info.build_attribute->id(), info.builder));
- const CatalogAttribute *target_attr = prober_attribute_map.at(info.probe_attribute->id());
+ lip_filter_builder_map_.at(
+ std::make_pair(info->build_attribute()->id(), info->builder()));
+ const CatalogAttribute *target_attr =
+ prober_attribute_map.at(info->probe_attribute()->id());
// Add the prober deployment information into query context.
- auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_entries();
+ auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_probe_entries();
lip_filter_entry_proto->set_lip_filter_id(builder_info.first);
lip_filter_entry_proto->set_attribute_id(target_attr->getID());
lip_filter_entry_proto->mutable_attribute_type()->CopyFrom(
@@ -183,11 +200,23 @@ void LIPFilterGenerator::deployProberInteral(
builder_info.second,
true /* is_pipeline_breaker */);
}
+}
- // Attach the LIPFilterDeployment information to the RelationalOperator.
- RelationalOperator *relop =
- execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(prober_operator_index);
- relop->deployLIPFilters(lip_deployment_index);
+serialization::LIPFilterDeployment* LIPFilterGenerator::getLIPFilterDeploymentProto(
+ const QueryPlan::DAGNodeIndex op_index,
+ serialization::QueryContext *query_context_proto) {
+ const auto proto_it = lip_filter_deployment_protos_.find(op_index);
+ if (proto_it == lip_filter_deployment_protos_.end()) {
+ const int lip_deployment_index =
+ query_context_proto->lip_filter_deployments_size();
+ auto *lip_filter_deployment_proto =
+ query_context_proto->add_lip_filter_deployments();
+ lip_filter_deployment_protos_.emplace(
+ op_index, std::make_pair(lip_deployment_index, lip_filter_deployment_proto));
+ return lip_filter_deployment_proto;
+ } else {
+ return proto_it->second.second;
+ }
}
} // namespace optimizer
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/LIPFilterGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/LIPFilterGenerator.hpp b/query_optimizer/LIPFilterGenerator.hpp
index 9d191a1..750499d 100644
--- a/query_optimizer/LIPFilterGenerator.hpp
+++ b/query_optimizer/LIPFilterGenerator.hpp
@@ -30,6 +30,7 @@
#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/Selection.hpp"
@@ -39,7 +40,12 @@
namespace quickstep {
-namespace serialization { class QueryContext; }
+namespace serialization {
+
+class QueryContext;
+class LIPFilterDeployment;
+
+}
class CatalogAttribute;
@@ -93,6 +99,20 @@ class LIPFilterGenerator {
/**
* @brief Add physical-to-execution mapping information for deploying LIPFilters
+ * to a FilterJoin node.
+ *
+ * @param filter_join A physical FilterJoin node.
+ * @param build_filter_operator_index The index of the BuildLIPFilterOperator
+ * that corresponds to \p filter_join in the execution plan.
+ */
+ void addFilterJoinInfo(const physical::FilterJoinPtr &filter_join,
+ const QueryPlan::DAGNodeIndex build_filter_operator_index) {
+ builder_infos_.emplace_back(filter_join, build_filter_operator_index);
+ prober_infos_.emplace_back(filter_join, build_filter_operator_index);
+ }
+
+ /**
+ * @brief Add physical-to-execution mapping information for deploying LIPFilters
* to a hash-join.
*
* @param hash_join A physical HashJoin node.
@@ -128,7 +148,7 @@ class LIPFilterGenerator {
* @param query_context_proto QueryContext protobuf for the execution plan.
*/
void deployLIPFilters(QueryPlan *execution_plan,
- serialization::QueryContext *query_context_proto) const;
+ serialization::QueryContext *query_context_proto);
private:
/**
@@ -157,24 +177,21 @@ class LIPFilterGenerator {
const QueryPlan::DAGNodeIndex prober_operator_index;
};
- // Maps each LIPFilter's building attribute to the LIPFilter's id in QueryContext
- // as well as the LIPFilter's building relational operator's index.
- typedef std::map<std::pair<expressions::ExprId, physical::PhysicalPtr>,
- std::pair<QueryContext::lip_filter_id, QueryPlan::DAGNodeIndex>> LIPFilterBuilderMap;
-
void deployBuilderInternal(QueryPlan *execution_plan,
serialization::QueryContext *query_context_proto,
const physical::PhysicalPtr &builder_node,
const QueryPlan::DAGNodeIndex builder_operator_index,
- const std::vector<physical::LIPFilterBuildInfo> &build_info_vec,
- LIPFilterBuilderMap *lip_filter_builder_map) const;
+ const std::vector<physical::LIPFilterBuildInfoPtr> &build_info_vec);
void deployProberInteral(QueryPlan *execution_plan,
serialization::QueryContext *query_context_proto,
const physical::PhysicalPtr &prober_node,
const QueryPlan::DAGNodeIndex prober_operator_index,
- const std::vector<physical::LIPFilterProbeInfo> &probe_info_vec,
- const LIPFilterBuilderMap &lip_filter_builder_map) const;
+ const std::vector<physical::LIPFilterProbeInfoPtr> &probe_info_vec);
+
+ serialization::LIPFilterDeployment* getLIPFilterDeploymentProto(
+ const QueryPlan::DAGNodeIndex op_index,
+ serialization::QueryContext *query_context_proto);
const physical::LIPFilterConfigurationPtr lip_filter_configuration_;
@@ -183,6 +200,16 @@ class LIPFilterGenerator {
std::map<physical::PhysicalPtr, std::map<expressions::ExprId, const CatalogAttribute *>> attribute_map_;
+ // Maps each LIPFilter's building attribute to the LIPFilter's id in QueryContext
+ // as well as the LIPFilter's building relational operator's index.
+ std::map<std::pair<expressions::ExprId, physical::PhysicalPtr>,
+ std::pair<QueryContext::lip_filter_id, QueryPlan::DAGNodeIndex>> lip_filter_builder_map_;
+
+ // Maps each relational operator's index to the attached LIPFilterDeployment's
+ // index and proto.
+ std::map<QueryPlan::DAGNodeIndex,
+ std::pair<int, serialization::LIPFilterDeployment *>> lip_filter_deployment_protos_;
+
DISALLOW_COPY_AND_ASSIGN(LIPFilterGenerator);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index bd05267..5dc0ffb 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -27,6 +27,7 @@
#include "query_optimizer/logical/Logical.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/rules/AttachLIPFilters.hpp"
+#include "query_optimizer/rules/InjectJoinFilters.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
#include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
#include "query_optimizer/rules/ReorderColumns.hpp"
@@ -56,6 +57,14 @@ DEFINE_bool(reorder_hash_joins, true,
"cardinality and selective tables to be joined first, which is suitable "
"for queries on star-schema tables.");
+DEFINE_bool(use_filter_joins, true,
+ "If true, apply an optimization that strength-reduces HashJoins to "
+ "FilterJoins (implemented as LIPFilters attached to some anchoring "
+ "operators. Briefly speaking, in the case that the join attribute has "
+ "consecutive integer values bounded in a reasonably small range, we "
+ "build a BitVector on the build-side attribute and use the BitVector "
+ "to filter the probe side table.");
+
DEFINE_bool(use_lip_filters, true,
"If true, use LIP (Lookahead Information Passing) filters to accelerate "
"query processing. LIP filters are effective for queries on star schema "
@@ -133,9 +142,13 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
rules.emplace_back(new ReorderColumns());
}
- // NOTE(jianqiao): Adding rules after AttachLIPFilters requires extra handling
- // of LIPFilterConfiguration for transformed nodes. So currently it is suggested
- // that all the new rules be placed before this point.
+ // NOTE(jianqiao): Adding rules after InjectJoinFilters (or AttachLIPFilters) requires
+ // extra handling of LIPFilterConfiguration for transformed nodes. So currently it is
+ // suggested that all the new rules be placed before this point.
+ if (FLAGS_use_filter_joins) {
+ rules.emplace_back(new InjectJoinFilters());
+ }
+
if (FLAGS_use_lip_filters) {
rules.emplace_back(new AttachLIPFilters());
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/cost_model/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/CMakeLists.txt b/query_optimizer/cost_model/CMakeLists.txt
index 90133e7..5f28bb3 100644
--- a/query_optimizer/cost_model/CMakeLists.txt
+++ b/query_optimizer/cost_model/CMakeLists.txt
@@ -33,6 +33,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_SimpleCostModel
quickstep_catalog_CatalogRelationStatistics
quickstep_queryoptimizer_costmodel_CostModel
quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_FilterJoin
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_NestedLoopsJoin
quickstep_queryoptimizer_physical_Physical
@@ -49,6 +50,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostMod
glog
quickstep_catalog_CatalogRelation
quickstep_catalog_CatalogRelationStatistics
+ quickstep_catalog_CatalogTypedefs
quickstep_queryoptimizer_costmodel_CostModel
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ComparisonExpression
@@ -60,6 +62,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostMod
quickstep_queryoptimizer_expressions_PatternMatcher
quickstep_queryoptimizer_expressions_Predicate
quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_FilterJoin
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_NestedLoopsJoin
quickstep_queryoptimizer_physical_PatternMatcher
@@ -72,6 +75,8 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostMod
quickstep_queryoptimizer_physical_TableReference
quickstep_queryoptimizer_physical_TopLevelPlan
quickstep_queryoptimizer_physical_WindowAggregate
+ quickstep_types_NullType
+ quickstep_types_TypedValue
quickstep_utility_Macros)
# Module all-in-one library:
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/cost_model/SimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/SimpleCostModel.cpp b/query_optimizer/cost_model/SimpleCostModel.cpp
index 7808898..e9d2e3a 100644
--- a/query_optimizer/cost_model/SimpleCostModel.cpp
+++ b/query_optimizer/cost_model/SimpleCostModel.cpp
@@ -27,6 +27,7 @@
#include "query_optimizer/cost_model/CostModel.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
#include "query_optimizer/physical/NestedLoopsJoin.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/PhysicalType.hpp"
@@ -61,6 +62,9 @@ std::size_t SimpleCostModel::estimateCardinality(
case P::PhysicalType::kTableGenerator:
return estimateCardinalityForTableGenerator(
std::static_pointer_cast<const P::TableGenerator>(physical_plan));
+ case P::PhysicalType::kFilterJoin:
+ return estimateCardinalityForFilterJoin(
+ std::static_pointer_cast<const P::FilterJoin>(physical_plan));
case P::PhysicalType::kHashJoin:
return estimateCardinalityForHashJoin(
std::static_pointer_cast<const P::HashJoin>(physical_plan));
@@ -119,6 +123,11 @@ std::size_t SimpleCostModel::estimateCardinalityForTableGenerator(
return physical_plan->generator_function_handle()->getEstimatedCardinality();
}
+std::size_t SimpleCostModel::estimateCardinalityForFilterJoin(
+ const P::FilterJoinPtr &physical_plan) {
+ return estimateCardinality(physical_plan->left());
+}
+
std::size_t SimpleCostModel::estimateCardinalityForHashJoin(
const P::HashJoinPtr &physical_plan) {
return std::max(estimateCardinality(physical_plan->left()),
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/cost_model/SimpleCostModel.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/SimpleCostModel.hpp b/query_optimizer/cost_model/SimpleCostModel.hpp
index 16366cd..4edc2fe 100644
--- a/query_optimizer/cost_model/SimpleCostModel.hpp
+++ b/query_optimizer/cost_model/SimpleCostModel.hpp
@@ -26,6 +26,7 @@
#include "query_optimizer/cost_model/CostModel.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
#include "query_optimizer/physical/NestedLoopsJoin.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/Selection.hpp"
@@ -80,6 +81,10 @@ class SimpleCostModel : public CostModel {
std::size_t estimateCardinalityForSort(
const physical::SortPtr &physical_plan);
+ // Returns the left child's cardinality
+ std::size_t estimateCardinalityForFilterJoin(
+ const physical::FilterJoinPtr &physical_plan);
+
// Returns the larger value of the estimated cardinalities of two
// input plans.
std::size_t estimateCardinalityForHashJoin(
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index 75b1b2b..7afa1c3 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -21,11 +21,11 @@
#include <algorithm>
#include <memory>
-#include <unordered_map>
#include <vector>
#include "catalog/CatalogRelation.hpp"
#include "catalog/CatalogRelationStatistics.hpp"
+#include "catalog/CatalogTypedefs.hpp"
#include "query_optimizer/cost_model/CostModel.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ComparisonExpression.hpp"
@@ -38,6 +38,7 @@
#include "query_optimizer/expressions/PatternMatcher.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
#include "query_optimizer/physical/NestedLoopsJoin.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/PatternMatcher.hpp"
#include "query_optimizer/physical/Physical.hpp"
@@ -48,6 +49,8 @@
#include "query_optimizer/physical/TableGenerator.hpp"
#include "query_optimizer/physical/TableReference.hpp"
#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "types/TypedValue.hpp"
+#include "types/NullType.hpp"
#include "glog/logging.h"
@@ -73,6 +76,9 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinality(
case P::PhysicalType::kTableGenerator:
return estimateCardinalityForTableGenerator(
std::static_pointer_cast<const P::TableGenerator>(physical_plan));
+ case P::PhysicalType::kFilterJoin:
+ return estimateCardinalityForFilterJoin(
+ std::static_pointer_cast<const P::FilterJoin>(physical_plan));
case P::PhysicalType::kHashJoin:
return estimateCardinalityForHashJoin(
std::static_pointer_cast<const P::HashJoin>(physical_plan));
@@ -134,6 +140,17 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinalityForTableGenerator(
return physical_plan->generator_function_handle()->getEstimatedCardinality();
}
+std::size_t StarSchemaSimpleCostModel::estimateCardinalityForFilterJoin(
+ const P::FilterJoinPtr &physical_plan) {
+ double build_side_filter_selectivity =
+ estimateSelectivityForPredicate(physical_plan->build_side_filter_predicate(),
+ physical_plan->right());
+ std::size_t left_cardinality = estimateCardinality(physical_plan->left());
+ double right_selectivity = estimateSelectivity(physical_plan->right());
+ return static_cast<std::size_t>(
+ left_cardinality * build_side_filter_selectivity * right_selectivity + 0.5);
+}
+
std::size_t StarSchemaSimpleCostModel::estimateCardinalityForHashJoin(
const P::HashJoinPtr &physical_plan) {
std::size_t left_cardinality = estimateCardinality(physical_plan->left());
@@ -216,6 +233,18 @@ std::size_t StarSchemaSimpleCostModel::estimateNumDistinctValues(
}
break;
}
+ case P::PhysicalType::kFilterJoin: {
+ const P::FilterJoinPtr &filter_join =
+ std::static_pointer_cast<const P::FilterJoin>(physical_plan);
+ if (E::ContainsExprId(filter_join->left()->getOutputAttributes(), attribute_id)) {
+ std::size_t left_child_num_distinct_values =
+ estimateNumDistinctValues(attribute_id, filter_join->left());
+ double right_child_selectivity =
+ estimateSelectivity(filter_join->right());
+ return static_cast<std::size_t>(
+ left_child_num_distinct_values * right_child_selectivity + 0.5);
+ }
+ }
case P::PhysicalType::kHashJoin: {
const P::HashJoinPtr &hash_join =
std::static_pointer_cast<const P::HashJoin>(physical_plan);
@@ -254,6 +283,16 @@ double StarSchemaSimpleCostModel::estimateSelectivity(
double child_selectivity = estimateSelectivity(selection->input());
return filter_selectivity * child_selectivity;
}
+ case P::PhysicalType::kFilterJoin: {
+ const P::FilterJoinPtr &filter_join =
+ std::static_pointer_cast<const P::FilterJoin>(physical_plan);
+ double left_selectivity = estimateSelectivity(filter_join->left());
+ double right_selectivity = estimateSelectivity(filter_join->right());
+ double build_side_filter_selectivity =
+ estimateSelectivityForPredicate(filter_join->build_side_filter_predicate(),
+ filter_join->right());
+ return left_selectivity * right_selectivity * build_side_filter_selectivity;
+ }
case P::PhysicalType::kHashJoin: {
const P::HashJoinPtr &hash_join =
std::static_pointer_cast<const P::HashJoin>(physical_plan);
@@ -383,18 +422,124 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
std::size_t StarSchemaSimpleCostModel::getNumDistinctValues(
const E::ExprId attribute_id,
const P::TableReferencePtr &table_reference) {
- const CatalogRelation &relation = *table_reference->relation();
- const std::vector<E::AttributeReferencePtr> &attributes = table_reference->attribute_list();
- for (std::size_t i = 0; i < attributes.size(); ++i) {
- if (attributes[i]->id() == attribute_id) {
- const CatalogRelationStatistics &stat = relation.getStatistics();
- if (stat.hasNumDistinctValues(i)) {
- return stat.getNumDistinctValues(i);
+ const auto rel_attr_id =
+ findCatalogRelationAttributeId(table_reference, attribute_id);
+ if (rel_attr_id != kInvalidAttributeID) {
+ const CatalogRelationStatistics &stat =
+ table_reference->relation()->getStatistics();
+ if (stat.hasNumDistinctValues(rel_attr_id)) {
+ return stat.getNumDistinctValues(rel_attr_id);
+ }
+ }
+ return estimateCardinalityForTableReference(table_reference);
+}
+
+bool StarSchemaSimpleCostModel::impliesUniqueAttributes(
+ const P::PhysicalPtr &physical_plan,
+ const std::vector<E::AttributeReferencePtr> &attributes) {
+ switch (physical_plan->getPhysicalType()) {
+ case P::PhysicalType::kAggregate: {
+ const P::AggregatePtr &aggregate =
+ std::static_pointer_cast<const P::Aggregate>(physical_plan);
+ return E::SubsetOfExpressions(aggregate->grouping_expressions(), attributes);
+ }
+ case P::PhysicalType::kHashJoin: {
+ const P::HashJoinPtr &hash_join =
+ std::static_pointer_cast<const P::HashJoin>(physical_plan);
+ bool unique_from_left =
+ impliesUniqueAttributes(hash_join->right(), hash_join->right_join_attributes())
+ && impliesUniqueAttributes(hash_join->left(), attributes);
+ bool unique_from_right =
+ impliesUniqueAttributes(hash_join->left(), hash_join->left_join_attributes())
+ && impliesUniqueAttributes(hash_join->right(), attributes);
+ return unique_from_left || unique_from_right;
+ }
+ case P::PhysicalType::kTableReference: {
+ const P::TableReferencePtr &table_reference =
+ std::static_pointer_cast<const P::TableReference>(physical_plan);
+ const CatalogRelationStatistics &stat =
+ table_reference->relation()->getStatistics();
+ if (stat.hasNumTuples()) {
+ const std::size_t num_tuples = stat.getNumTuples();
+ for (const auto &attr : attributes) {
+ const attribute_id rel_attr_id =
+ findCatalogRelationAttributeId(table_reference, attr->id());
+ if (rel_attr_id != kInvalidAttributeID &&
+ stat.hasNumDistinctValues(rel_attr_id) &&
+ stat.getNumDistinctValues(rel_attr_id) == num_tuples) {
+ return true;
+ }
+ }
}
+ return false;
+ }
+ case P::PhysicalType::kSample: // Fall through
+ case P::PhysicalType::kSelection:
+ case P::PhysicalType::kSort: {
+ DCHECK_EQ(physical_plan->getNumChildren(), 1u);
+ return impliesUniqueAttributes(physical_plan->children()[0], attributes);
+ }
+ default:
break;
+ }
+ return false;
+}
+
+TypedValue StarSchemaSimpleCostModel::findCatalogRelationStat(
+ const P::PhysicalPtr &physical_plan,
+ const E::ExprId attr_id,
+ const StatType stat_type,
+ bool *is_exact_stat) {
+ P::TableReferencePtr table_reference;
+ if (P::SomeTableReference::MatchesWithConditionalCast(physical_plan, &table_reference)) {
+ const attribute_id rel_attr_id =
+ findCatalogRelationAttributeId(table_reference, attr_id);
+ if (rel_attr_id != kInvalidAttributeID) {
+ const CatalogRelationStatistics &stat =
+ table_reference->relation()->getStatistics();
+
+ if (is_exact_stat != nullptr) {
+ *is_exact_stat = stat.isExact();
+ }
+
+ switch (stat_type) {
+ case StatType::kMin: {
+ if (stat.hasMinValue(rel_attr_id)) {
+ return stat.getMinValue(rel_attr_id);
+ }
+ break;
+ }
+ case StatType::kMax: {
+ if (stat.hasMaxValue(rel_attr_id)) {
+ return stat.getMaxValue(rel_attr_id);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return NullType::InstanceNullable().makeNullValue();
}
}
- return estimateCardinalityForTableReference(table_reference);
+
+ for (const auto &child : physical_plan->children()) {
+ if (E::ContainsExprId(child->getOutputAttributes(), attr_id)) {
+ return findCatalogRelationStat(child, attr_id, stat_type, is_exact_stat);
+ }
+ }
+ return NullType::InstanceNullable().makeNullValue();
+}
+
+attribute_id StarSchemaSimpleCostModel::findCatalogRelationAttributeId(
+ const physical::TableReferencePtr &table_reference,
+ const expressions::ExprId expr_id) {
+ const auto &attribute_list = table_reference->attribute_list();
+ for (std::size_t i = 0; i < attribute_list.size(); ++i) {
+ if (attribute_list[i]->id() == expr_id) {
+ return i;
+ }
+ }
+ return kInvalidAttributeID;
}
} // namespace cost
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
index 6f6aa29..cbe18f4 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
@@ -23,11 +23,14 @@
#include <cstddef>
#include <vector>
+#include "catalog/CatalogTypedefs.hpp"
#include "query_optimizer/cost_model/CostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/expressions/Predicate.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
#include "query_optimizer/physical/NestedLoopsJoin.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/Selection.hpp"
@@ -36,6 +39,7 @@
#include "query_optimizer/physical/TableReference.hpp"
#include "query_optimizer/physical/TopLevelPlan.hpp"
#include "query_optimizer/physical/WindowAggregate.hpp"
+#include "types/TypedValue.hpp"
#include "utility/Macros.hpp"
namespace quickstep {
@@ -105,10 +109,70 @@ class StarSchemaSimpleCostModel : public CostModel {
double estimateSelectivityForFilterPredicate(
const physical::PhysicalPtr &physical_plan);
+ /**
+ * @brief Check whether a set of attributes are unique (i.e. have distinct
+ * values) for a relation.
+ *
+ * @param physical_plan The physical plan that corresponds to a relation.
+ * @param attributes The set of attributes to be checked. Note that each
+ * attribute in this set must be an output attribute of the physical
+ * plan.
+ * @return True if it is guaranteed that the attributes are unique; false
+ * otherwise.
+ */
+ bool impliesUniqueAttributes(
+ const physical::PhysicalPtr &physical_plan,
+ const std::vector<expressions::AttributeReferencePtr> &attributes);
+
+ /**
+ * @brief For a physical plan attribute, find its correponding catalog attribute's
+ * MIN statistic. Returns Null value if there is no corresponding catalog
+ * attribute for the physical plan attribute.
+ *
+ * @param physical_plan The physical plan.
+ * @param attribute The attribute. Must be an output attribute of the given
+ * physical plan.
+ * @param is_exact_stat If this pointer is not null, its pointed content will
+ * be modified by this method to indicate whether the returned statistic
+ * is EXACT for the stored relation (i.e. not outdated or estimated).
+ * @return The MIN statistic for the attribute.
+ */
+ TypedValue findMinValueStat(
+ const physical::PhysicalPtr &physical_plan,
+ const expressions::AttributeReferencePtr &attribute,
+ bool *is_exact_stat = nullptr) {
+ return findCatalogRelationStat(
+ physical_plan, attribute->id(), StatType::kMin, is_exact_stat);
+ }
+
+ /**
+ * @brief For a physical plan attribute, find its correponding catalog attribute's
+ * MAX statistic. Returns Null value if there is no corresponding catalog
+ * attribute for the physical plan attribute.
+ *
+ * @param physical_plan The physical plan.
+ * @param attribute The attribute. Must be an output attribute of the given
+ * physical plan.
+ * @param is_exact_stat If this pointer is not null, its pointed content will
+ * be modified by this method to indicate whether the returned statistic
+ * is EXACT for the stored relation (i.e. not not outdated or estimated).
+ * @return The MAX statistic for the attribute.
+ */
+ TypedValue findMaxValueStat(
+ const physical::PhysicalPtr &physical_plan,
+ const expressions::AttributeReferencePtr &attribute,
+ bool *is_exact_stat = nullptr) {
+ return findCatalogRelationStat(
+ physical_plan, attribute->id(), StatType::kMax, is_exact_stat);
+ }
+
private:
std::size_t estimateCardinalityForAggregate(
const physical::AggregatePtr &physical_plan);
+ std::size_t estimateCardinalityForFilterJoin(
+ const physical::FilterJoinPtr &physical_plan);
+
std::size_t estimateCardinalityForHashJoin(
const physical::HashJoinPtr &physical_plan);
@@ -144,6 +208,25 @@ class StarSchemaSimpleCostModel : public CostModel {
std::size_t getNumDistinctValues(const expressions::ExprId attribute_id,
const physical::TableReferencePtr &table_reference);
+ enum class StatType {
+ kMax = 0,
+ kMin
+ };
+
+ // For a physical plan attribute, find its correponding catalog attribute's
+ // min/max statistics. Returns Null value if there is no corresponding catalog
+ // attribute for the physical plan attribute (e.g. the attribute is the result
+ // of an expression).
+ TypedValue findCatalogRelationStat(
+ const physical::PhysicalPtr &physical_plan,
+ const expressions::ExprId expr_id,
+ const StatType stat_type,
+ bool *is_exact_stat);
+
+ // For a table reference attribute, find its correponding catalog attribute.
+ attribute_id findCatalogRelationAttributeId(
+ const physical::TableReferencePtr &table_reference,
+ const expressions::ExprId expr_id);
DISALLOW_COPY_AND_ASSIGN(StarSchemaSimpleCostModel);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/expressions/ExpressionUtil.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/ExpressionUtil.hpp b/query_optimizer/expressions/ExpressionUtil.hpp
index 422d5ab..6b8666e 100644
--- a/query_optimizer/expressions/ExpressionUtil.hpp
+++ b/query_optimizer/expressions/ExpressionUtil.hpp
@@ -122,12 +122,12 @@ bool ContainsExprId(
* contain the other operand).
* @return True if \p left is a subset of \p right.
*/
-template <class NamedExpressionType>
+template <class LeftNamedExpressionType, class RightNamedExpressionType>
bool SubsetOfExpressions(
- const std::vector<std::shared_ptr<const NamedExpressionType>> &left,
- const std::vector<std::shared_ptr<const NamedExpressionType>> &right) {
+ const std::vector<std::shared_ptr<const LeftNamedExpressionType>> &left,
+ const std::vector<std::shared_ptr<const RightNamedExpressionType>> &right) {
UnorderedNamedExpressionSet supset(right.begin(), right.end());
- for (const std::shared_ptr<const NamedExpressionType> &expr : left) {
+ for (const std::shared_ptr<const LeftNamedExpressionType> &expr : left) {
if (supset.find(expr) == supset.end()) {
return false;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/CMakeLists.txt b/query_optimizer/physical/CMakeLists.txt
index 7f26943..f68ed39 100644
--- a/query_optimizer/physical/CMakeLists.txt
+++ b/query_optimizer/physical/CMakeLists.txt
@@ -23,6 +23,7 @@ add_library(quickstep_queryoptimizer_physical_CreateIndex CreateIndex.cpp Create
add_library(quickstep_queryoptimizer_physical_CreateTable CreateTable.cpp CreateTable.hpp)
add_library(quickstep_queryoptimizer_physical_DeleteTuples DeleteTuples.cpp DeleteTuples.hpp)
add_library(quickstep_queryoptimizer_physical_DropTable DropTable.cpp DropTable.hpp)
+add_library(quickstep_queryoptimizer_physical_FilterJoin FilterJoin.cpp FilterJoin.hpp)
add_library(quickstep_queryoptimizer_physical_HashJoin HashJoin.cpp HashJoin.hpp)
add_library(quickstep_queryoptimizer_physical_InsertSelection InsertSelection.cpp InsertSelection.hpp)
add_library(quickstep_queryoptimizer_physical_InsertTuple InsertTuple.cpp InsertTuple.hpp)
@@ -115,6 +116,18 @@ target_link_libraries(quickstep_queryoptimizer_physical_DropTable
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_physical_PhysicalType
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_physical_FilterJoin
+ glog
+ quickstep_queryoptimizer_OptimizerTree
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_expressions_NamedExpression
+ quickstep_queryoptimizer_expressions_Predicate
+ quickstep_queryoptimizer_physical_BinaryJoin
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_utility_Cast
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_physical_HashJoin
glog
quickstep_queryoptimizer_OptimizerTree
@@ -282,6 +295,7 @@ target_link_libraries(quickstep_queryoptimizer_physical
quickstep_queryoptimizer_physical_CreateTable
quickstep_queryoptimizer_physical_DeleteTuples
quickstep_queryoptimizer_physical_DropTable
+ quickstep_queryoptimizer_physical_FilterJoin
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_InsertSelection
quickstep_queryoptimizer_physical_InsertTuple
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/FilterJoin.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/FilterJoin.cpp b/query_optimizer/physical/FilterJoin.cpp
new file mode 100644
index 0000000..1817a1c
--- /dev/null
+++ b/query_optimizer/physical/FilterJoin.cpp
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/physical/FilterJoin.hpp"
+
+#include <string>
+#include <vector>
+
+#include "query_optimizer/OptimizerTree.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "utility/Cast.hpp"
+
+namespace quickstep {
+namespace optimizer {
+namespace physical {
+
+namespace E = ::quickstep::optimizer::expressions;
+
+std::vector<E::AttributeReferencePtr> FilterJoin::getReferencedAttributes() const {
+ std::vector<E::AttributeReferencePtr> referenced_attributes;
+ for (const auto &project_expression : project_expressions()) {
+ const auto referenced_attributes_in_expression =
+ project_expression->getReferencedAttributes();
+ referenced_attributes.insert(referenced_attributes.end(),
+ referenced_attributes_in_expression.begin(),
+ referenced_attributes_in_expression.end());
+ }
+ referenced_attributes.insert(referenced_attributes.end(),
+ probe_attributes_.begin(),
+ probe_attributes_.end());
+ referenced_attributes.insert(referenced_attributes.end(),
+ build_attributes_.begin(),
+ build_attributes_.end());
+ if (build_side_filter_predicate_ != nullptr) {
+ const auto referenced_attributes_in_predicate =
+ build_side_filter_predicate_->getReferencedAttributes();
+ referenced_attributes.insert(referenced_attributes.end(),
+ referenced_attributes_in_predicate.begin(),
+ referenced_attributes_in_predicate.end());
+ }
+ return referenced_attributes;
+}
+
+bool FilterJoin::maybeCopyWithPrunedExpressions(
+ const expressions::UnorderedNamedExpressionSet &referenced_expressions,
+ PhysicalPtr *output) const {
+ std::vector<E::NamedExpressionPtr> new_project_expressions;
+ const auto ¤t_project_expressions = project_expressions();
+ for (const auto &project_expression : current_project_expressions) {
+ if (referenced_expressions.find(project_expression) != referenced_expressions.end()) {
+ new_project_expressions.emplace_back(project_expression);
+ }
+ }
+ if (new_project_expressions.size() != current_project_expressions.size()) {
+ *output = Create(left(),
+ right(),
+ probe_attributes_,
+ build_attributes_,
+ new_project_expressions,
+ build_side_filter_predicate_,
+ is_anti_join_);
+ return true;
+ }
+ return false;
+}
+
+void FilterJoin::getFieldStringItems(
+ std::vector<std::string> *inline_field_names,
+ std::vector<std::string> *inline_field_values,
+ std::vector<std::string> *non_container_child_field_names,
+ std::vector<OptimizerTreeBaseNodePtr> *non_container_child_fields,
+ std::vector<std::string> *container_child_field_names,
+ std::vector<std::vector<OptimizerTreeBaseNodePtr>> *container_child_fields) const {
+ BinaryJoin::getFieldStringItems(inline_field_names,
+ inline_field_values,
+ non_container_child_field_names,
+ non_container_child_fields,
+ container_child_field_names,
+ container_child_fields);
+
+ inline_field_names->push_back("is_anti_join");
+ inline_field_values->push_back(std::to_string(is_anti_join_));
+
+ if (build_side_filter_predicate_ != nullptr) {
+ non_container_child_field_names->emplace_back("build_side_filter_predicate");
+ non_container_child_fields->emplace_back(build_side_filter_predicate_);
+ }
+
+ container_child_field_names->push_back("probe_attributes");
+ container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(probe_attributes_));
+ container_child_field_names->push_back("build_attributes");
+ container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(build_attributes_));
+}
+
+} // namespace physical
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/FilterJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/FilterJoin.hpp b/query_optimizer/physical/FilterJoin.hpp
new file mode 100644
index 0000000..ad4e18b
--- /dev/null
+++ b/query_optimizer/physical/FilterJoin.hpp
@@ -0,0 +1,187 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_FILTER_JOIN_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_FILTER_JOIN_HPP_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "query_optimizer/OptimizerTree.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/BinaryJoin.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+namespace physical {
+
+/** \addtogroup OptimizerPhysical
+ * @{
+ */
+
+class FilterJoin;
+typedef std::shared_ptr<const FilterJoin> FilterJoinPtr;
+
+/**
+ * @brief Physical filter join node. Semantically, FilterJoin is similar to
+ * HashJoin where the difference is that FilterJoin builds a bit vector
+ * instead of a hash table.
+ *
+ * @note FilterJoin's backend execution relies on LIPFilter injection (attach
+ * the bit vectors as filters into downstream relational operators).
+ */
+class FilterJoin : public BinaryJoin {
+ public:
+ PhysicalType getPhysicalType() const override {
+ return PhysicalType::kFilterJoin;
+ }
+
+ std::string getName() const override {
+ if (is_anti_join_) {
+ return "FilterJoin(Anti)";
+ } else {
+ return "FilterJoin";
+ }
+ }
+
+ /**
+ * @return The probe side attributes.
+ */
+ const std::vector<expressions::AttributeReferencePtr>& probe_attributes() const {
+ return probe_attributes_;
+ }
+
+ /**
+ * @return The build side attributes.
+ */
+ const std::vector<expressions::AttributeReferencePtr>& build_attributes() const {
+ return build_attributes_;
+ }
+
+ /**
+ * @return The build side filter predicate.
+ */
+ const expressions::PredicatePtr& build_side_filter_predicate() const {
+ return build_side_filter_predicate_;
+ }
+
+ /**
+ * @return Whether this is an anti-join.
+ */
+ const bool is_anti_join() const {
+ return is_anti_join_;
+ }
+
+ PhysicalPtr copyWithNewChildren(
+ const std::vector<PhysicalPtr> &new_children) const override {
+ DCHECK_EQ(children().size(), new_children.size());
+ return Create(new_children[0],
+ new_children[1],
+ probe_attributes_,
+ build_attributes_,
+ project_expressions(),
+ build_side_filter_predicate_,
+ is_anti_join_);
+ }
+
+ std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override;
+
+ bool maybeCopyWithPrunedExpressions(
+ const expressions::UnorderedNamedExpressionSet &referenced_expressions,
+ PhysicalPtr *output) const override;
+
+ /**
+ * @brief Creates a physical FilterJoin.
+ * @param probe_child The probe side child plan.
+ * @param build_child The build side child plan.
+ * @param probe_attributes The probe side attributes.
+ * @param build_attributes The build side attributes.
+ * @param project_expressions The project expressions.
+ * @param build_side_filter_predicate Optional filtering predicate to be
+ * applied to the build side child BEFORE join.
+ * @param is_anti_join Whether this is an anti-join.
+ * @return An immutable physical FilterJoin.
+ */
+ static FilterJoinPtr Create(
+ const PhysicalPtr &probe_child,
+ const PhysicalPtr &build_child,
+ const std::vector<expressions::AttributeReferencePtr> &probe_attributes,
+ const std::vector<expressions::AttributeReferencePtr> &build_attributes,
+ const std::vector<expressions::NamedExpressionPtr> &project_expressions,
+ const expressions::PredicatePtr &build_side_filter_predicate,
+ const bool is_anti_join) {
+ return FilterJoinPtr(
+ new FilterJoin(probe_child,
+ build_child,
+ probe_attributes,
+ build_attributes,
+ project_expressions,
+ build_side_filter_predicate,
+ is_anti_join));
+ }
+
+ protected:
+ void getFieldStringItems(
+ std::vector<std::string> *inline_field_names,
+ std::vector<std::string> *inline_field_values,
+ std::vector<std::string> *non_container_child_field_names,
+ std::vector<OptimizerTreeBaseNodePtr> *non_container_child_fields,
+ std::vector<std::string> *container_child_field_names,
+ std::vector<std::vector<OptimizerTreeBaseNodePtr>> *container_child_fields) const override;
+
+ private:
+ FilterJoin(
+ const PhysicalPtr &probe_child,
+ const PhysicalPtr &build_child,
+ const std::vector<expressions::AttributeReferencePtr> &probe_attributes,
+ const std::vector<expressions::AttributeReferencePtr> &build_attributes,
+ const std::vector<expressions::NamedExpressionPtr> &project_expressions,
+ const expressions::PredicatePtr &build_side_filter_predicate,
+ const bool is_anti_join)
+ : BinaryJoin(probe_child, build_child, project_expressions),
+ probe_attributes_(probe_attributes),
+ build_attributes_(build_attributes),
+ build_side_filter_predicate_(build_side_filter_predicate),
+ is_anti_join_(is_anti_join) {
+ }
+
+ std::vector<expressions::AttributeReferencePtr> probe_attributes_;
+ std::vector<expressions::AttributeReferencePtr> build_attributes_;
+ expressions::PredicatePtr build_side_filter_predicate_;
+ bool is_anti_join_;
+
+ DISALLOW_COPY_AND_ASSIGN(FilterJoin);
+};
+
+/** @} */
+
+} // namespace physical
+} // namespace optimizer
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_FILTER_JOIN_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/LIPFilterConfiguration.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/LIPFilterConfiguration.hpp b/query_optimizer/physical/LIPFilterConfiguration.hpp
index 62a6149..90c81fe 100644
--- a/query_optimizer/physical/LIPFilterConfiguration.hpp
+++ b/query_optimizer/physical/LIPFilterConfiguration.hpp
@@ -21,6 +21,7 @@
#define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_LIP_FILTER_CONFIGURATION_HPP_
#include <cstddef>
+#include <cstdint>
#include <map>
#include <memory>
#include <vector>
@@ -40,50 +41,211 @@ namespace physical {
class Physical;
typedef std::shared_ptr<const Physical> PhysicalPtr;
+class LIPFilterBuildInfo;
+typedef std::shared_ptr<const LIPFilterBuildInfo> LIPFilterBuildInfoPtr;
+
+class LIPFilterProbeInfo;
+typedef std::shared_ptr<const LIPFilterProbeInfo> LIPFilterProbeInfoPtr;
+
/**
* @brief Optimizer information for a LIP filter builder.
*/
-struct LIPFilterBuildInfo {
+class LIPFilterBuildInfo {
+ public:
+ /**
+ * @return The LIPFilter's type.
+ */
+ LIPFilterType filter_type() const {
+ return filter_type_;
+ }
+
+ /**
+ * @return The LIPFilter's build attribute.
+ */
+ const expressions::AttributeReferencePtr& build_attribute() const {
+ return build_attribute_;
+ }
+
+ protected:
/**
* @brief Constructor.
*
- * @param build_attribute_in The attribute to build the LIP filter with.
- * @param filter_cardinality_in The LIP filter's cardinality.
* @param filter_type_in The LIP filter's type.
+ * @param build_attribute_in The attribute to build the LIP filter with.
+ */
+ LIPFilterBuildInfo(const LIPFilterType &filter_type,
+ const expressions::AttributeReferencePtr &build_attribute)
+ : filter_type_(filter_type),
+ build_attribute_(build_attribute) {}
+
+ private:
+ const LIPFilterType filter_type_;
+ const expressions::AttributeReferencePtr build_attribute_;
+
+ DISALLOW_COPY_AND_ASSIGN(LIPFilterBuildInfo);
+};
+
+/**
+ * @brief Subclass that contains extra information for SingleIdentityHashFilter
+ * builder.
+ */
+class SingleIdentityHashFilterBuildInfo : public LIPFilterBuildInfo {
+ public:
+ /**
+ * @return The cardinality of this SingleIdentityHashFilter.
*/
- LIPFilterBuildInfo(const expressions::AttributeReferencePtr &build_attribute_in,
- const std::size_t filter_cardinality_in,
- const LIPFilterType &filter_type_in)
- : build_attribute(build_attribute_in),
- filter_cardinality(filter_cardinality_in),
- filter_type(filter_type_in) {
+ std::size_t filter_cardinality() const {
+ return filter_cardinality_;
}
- const expressions::AttributeReferencePtr build_attribute;
- const std::size_t filter_cardinality;
- const LIPFilterType filter_type;
+
+ /**
+ * @brief Creates a shared SingleIdentityHashFilterBuildInfo.
+ *
+ * @param build_attribute The attribute to build the filter with.
+ * @param filter_cardinality The cardinality of this SingleIdentityHashFilter.
+ */
+ static LIPFilterBuildInfoPtr Create(
+ const expressions::AttributeReferencePtr &build_attribute,
+ const std::size_t filter_cardinality) {
+ return LIPFilterBuildInfoPtr(
+ new SingleIdentityHashFilterBuildInfo(build_attribute,
+ filter_cardinality));
+ }
+
+ private:
+ SingleIdentityHashFilterBuildInfo(const expressions::AttributeReferencePtr &build_attribute,
+ const std::size_t filter_cardinality)
+ : LIPFilterBuildInfo(LIPFilterType::kSingleIdentityHashFilter,
+ build_attribute),
+ filter_cardinality_(filter_cardinality) {}
+
+ const std::size_t filter_cardinality_;
+
+ DISALLOW_COPY_AND_ASSIGN(SingleIdentityHashFilterBuildInfo);
};
/**
+ * @brief Subclass that contains extra information for BitVectorExactFilter
+ * builder.
+ */
+class BitVectorExactFilterBuildInfo : public LIPFilterBuildInfo {
+ public:
+ /**
+ * @return The minimum possible value for this BitVectorExactFilter.
+ */
+ std::int64_t min_value() const {
+ return min_value_;
+ }
+
+ /**
+ * @return The maximum possible value for this BitVectorExactFilter.
+ */
+ std::int64_t max_value() const {
+ return max_value_;
+ }
+
+ /**
+ * @return Whether this is an anti-filter.
+ */
+ bool is_anti_filter() const {
+ return is_anti_filter_;
+ }
+
+ /**
+ * @brief Creates a shared BitVectorExactFilterBuildInfo.
+ *
+ * @param build_attribute The attribute to build the filter with.
+ * @param min_value The minimum possible value for this BitVectorExactFilter
+ * to set.
+ * @param max_value The maximum possible value for this BitVectorExactFilter
+ * to set.
+ * @param is_anti_filter Whether this is an anti-filter.
+ */
+ static LIPFilterBuildInfoPtr Create(
+ const expressions::AttributeReferencePtr &build_attribute,
+ const std::int64_t min_value,
+ const std::int64_t max_value,
+ const bool is_anti_filter) {
+ return LIPFilterBuildInfoPtr(
+ new BitVectorExactFilterBuildInfo(build_attribute,
+ min_value,
+ max_value,
+ is_anti_filter));
+ }
+
+ private:
+ BitVectorExactFilterBuildInfo(const expressions::AttributeReferencePtr &build_attribute,
+ const std::int64_t min_value,
+ const std::int64_t max_value,
+ const bool is_anti_filter)
+ : LIPFilterBuildInfo(LIPFilterType::kBitVectorExactFilter,
+ build_attribute),
+ min_value_(min_value),
+ max_value_(max_value),
+ is_anti_filter_(is_anti_filter) {}
+
+ const std::int64_t min_value_;
+ const std::int64_t max_value_;
+ const bool is_anti_filter_;
+
+ DISALLOW_COPY_AND_ASSIGN(BitVectorExactFilterBuildInfo);
+};
+
+
+/**
* @brief Optimizer information for a LIP filter prober.
*/
-struct LIPFilterProbeInfo {
+class LIPFilterProbeInfo {
+ public:
/**
- * @brief Constructor.
+ * @return The attribute to probe the LIP Filter with.
+ */
+ const expressions::AttributeReferencePtr& probe_attribute() const {
+ return probe_attribute_;
+ }
+
+ /**
+ * @return The attribute that the LIP filter is built with.
+ */
+ const expressions::AttributeReferencePtr& build_attribute() const {
+ return build_attribute_;
+ }
+
+ /**
+ * @return The physical node that the LIP filter's builder is attached to.
+ */
+ const PhysicalPtr& builder() const {
+ return builder_;
+ }
+
+ /**
+ * @brief Creates a shared LIPFilterProbeInfo.
*
- * @param probe_attribute_in The attribute to probe the LIP filter with.
- * @param build_attribute_in The attribute that the LIP filter is built with.
- * @param builder_in The physical node that the LIP filter's builder is attached to.
- */
- LIPFilterProbeInfo(const expressions::AttributeReferencePtr &probe_attribute_in,
- const expressions::AttributeReferencePtr &build_attribute_in,
- const PhysicalPtr &builder_in)
- : probe_attribute(probe_attribute_in),
- build_attribute(build_attribute_in),
- builder(builder_in) {
- }
- const expressions::AttributeReferencePtr probe_attribute;
- const expressions::AttributeReferencePtr build_attribute;
- const PhysicalPtr builder;
+ * @param probe_attribute The attribute to probe the LIP filter with.
+ * @param build_attribute The attribute that the LIP filter is built with.
+ * @param builder The physical node that the LIP filter's builder is attached to.
+ */
+ static LIPFilterProbeInfoPtr Create(
+ const expressions::AttributeReferencePtr &probe_attribute,
+ const expressions::AttributeReferencePtr &build_attribute,
+ const PhysicalPtr &builder) {
+ return LIPFilterProbeInfoPtr(
+ new LIPFilterProbeInfo(probe_attribute, build_attribute, builder));
+ }
+
+ private:
+ LIPFilterProbeInfo(const expressions::AttributeReferencePtr &probe_attribute,
+ const expressions::AttributeReferencePtr &build_attribute,
+ const PhysicalPtr &builder)
+ : probe_attribute_(probe_attribute),
+ build_attribute_(build_attribute),
+ builder_(builder) {}
+
+ const expressions::AttributeReferencePtr probe_attribute_;
+ const expressions::AttributeReferencePtr build_attribute_;
+ const PhysicalPtr builder_;
+
+ DISALLOW_COPY_AND_ASSIGN(LIPFilterProbeInfo);
};
@@ -104,33 +266,23 @@ class LIPFilterConfiguration {
/**
* @brief Add information for a LIP filter builder.
*
- * @param build_attribute The attribute to build the LIP filter with.
+ * @param build_info A shared_ptr to LIPFilterBuildInfo.
* @param builder The physical node to attach the LIP filter builder to.
- * @param filter_size The LIP filter's cardinality.
- * @param filter_type The LIP filter's type.
*/
- void addBuildInfo(const expressions::AttributeReferencePtr &build_attribute,
- const PhysicalPtr &builder,
- const std::size_t filter_size,
- const LIPFilterType &filter_type) {
- build_info_map_[builder].emplace_back(
- build_attribute, filter_size, filter_type);
+ void addBuildInfo(const LIPFilterBuildInfoPtr &build_info,
+ const PhysicalPtr &builder) {
+ build_info_map_[builder].emplace_back(build_info);
}
/**
* @brief Add information for a LIP filter prober.
*
- * @param probe_attribute The attribute to probe the LIP filter with.
+ * @param probe_info A shared_ptr to LIPFilterProbeInfo.
* @param prober The physical node to attach the LIP filter prober to.
- * @param build_attribute The attribute that the LIP filter is built with.
- * @param builder The physical node that the LIP filter's builder is attached to.
*/
- void addProbeInfo(const expressions::AttributeReferencePtr &probe_attribute,
- const PhysicalPtr &prober,
- const expressions::AttributeReferencePtr &build_attribute,
- const PhysicalPtr &builder) {
- probe_info_map_[prober].emplace_back(
- probe_attribute, build_attribute, builder);
+ void addProbeInfo(const LIPFilterProbeInfoPtr &probe_info,
+ const PhysicalPtr &prober) {
+ probe_info_map_[prober].emplace_back(probe_info);
}
/**
@@ -140,7 +292,7 @@ class LIPFilterConfiguration {
* a vector of all the LIP filter builders that are attached to the
* physical node.
*/
- const std::map<PhysicalPtr, std::vector<LIPFilterBuildInfo>>& getBuildInfoMap() const {
+ const std::map<PhysicalPtr, std::vector<LIPFilterBuildInfoPtr>>& getBuildInfoMap() const {
return build_info_map_;
}
@@ -151,13 +303,26 @@ class LIPFilterConfiguration {
* a vector of all the LIP filter probers that are attached to the
* physical node.
*/
- const std::map<PhysicalPtr, std::vector<LIPFilterProbeInfo>>& getProbeInfoMap() const {
+ const std::map<PhysicalPtr, std::vector<LIPFilterProbeInfoPtr>>& getProbeInfoMap() const {
return probe_info_map_;
}
+ /**
+ * @brief Clone a copy of this configuration.
+ *
+ * @return A copy of this confiugration. Caller should take ownership of the
+ * returned object.
+ */
+ LIPFilterConfiguration* clone() const {
+ LIPFilterConfiguration *new_conf = new LIPFilterConfiguration();
+ new_conf->build_info_map_ = build_info_map_;
+ new_conf->probe_info_map_ = probe_info_map_;
+ return new_conf;
+ }
+
private:
- std::map<PhysicalPtr, std::vector<LIPFilterBuildInfo>> build_info_map_;
- std::map<PhysicalPtr, std::vector<LIPFilterProbeInfo>> probe_info_map_;
+ std::map<PhysicalPtr, std::vector<LIPFilterBuildInfoPtr>> build_info_map_;
+ std::map<PhysicalPtr, std::vector<LIPFilterProbeInfoPtr>> probe_info_map_;
DISALLOW_COPY_AND_ASSIGN(LIPFilterConfiguration);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/PatternMatcher.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/PatternMatcher.hpp b/query_optimizer/physical/PatternMatcher.hpp
index 5cd6fd3..4336767 100644
--- a/query_optimizer/physical/PatternMatcher.hpp
+++ b/query_optimizer/physical/PatternMatcher.hpp
@@ -35,6 +35,7 @@ class CopyFrom;
class CreateTable;
class DeleteTuples;
class DropTable;
+class FilterJoin;
class HashJoin;
class InsertTuple;
class Join;
@@ -113,6 +114,7 @@ using SomeCopyFrom = SomePhysicalNode<CopyFrom, PhysicalType::kCopyFrom>;
using SomeCreateTable = SomePhysicalNode<CreateTable, PhysicalType::kCreateTable>;
using SomeDeleteTuples = SomePhysicalNode<DeleteTuples, PhysicalType::kDeleteTuples>;
using SomeDropTable = SomePhysicalNode<DropTable, PhysicalType::kDropTable>;
+using SomeFilterJoin = SomePhysicalNode<FilterJoin, PhysicalType::kFilterJoin>;
using SomeHashJoin = SomePhysicalNode<HashJoin, PhysicalType::kHashJoin>;
using SomeInsertTuple = SomePhysicalNode<InsertTuple, PhysicalType::kInsertTuple>;
using SomeJoin = SomePhysicalNode<Join, PhysicalType::kHashJoin, PhysicalType::kNestedLoopsJoin>;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/PhysicalType.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/PhysicalType.hpp b/query_optimizer/physical/PhysicalType.hpp
index f5f35a1..1da5929 100644
--- a/query_optimizer/physical/PhysicalType.hpp
+++ b/query_optimizer/physical/PhysicalType.hpp
@@ -38,6 +38,7 @@ enum class PhysicalType {
kCreateTable,
kDeleteTuples,
kDropTable,
+ kFilterJoin,
kHashJoin,
kInsertSelection,
kInsertTuple,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/TopLevelPlan.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/TopLevelPlan.hpp b/query_optimizer/physical/TopLevelPlan.hpp
index 7dfc2b6..9e567e1 100644
--- a/query_optimizer/physical/TopLevelPlan.hpp
+++ b/query_optimizer/physical/TopLevelPlan.hpp
@@ -126,7 +126,8 @@ class TopLevelPlan : public Physical {
}
return TopLevelPlan::Create(new_children[0],
new_shared_subplans,
- uncorrelated_subquery_map_);
+ uncorrelated_subquery_map_,
+ lip_filter_configuration_);
}
std::vector<expressions::AttributeReferencePtr> getOutputAttributes() const override {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/rules/AttachLIPFilters.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachLIPFilters.cpp b/query_optimizer/rules/AttachLIPFilters.cpp
index b3c57ab..48b68bc 100644
--- a/query_optimizer/rules/AttachLIPFilters.cpp
+++ b/query_optimizer/rules/AttachLIPFilters.cpp
@@ -55,7 +55,14 @@ P::PhysicalPtr AttachLIPFilters::apply(const P::PhysicalPtr &input) {
cost_model_.reset(
new cost::StarSchemaSimpleCostModel(
top_level_plan->shared_subplans()));
- lip_filter_configuration_.reset(new P::LIPFilterConfiguration());
+
+ const P::LIPFilterConfigurationPtr &existing_configuration =
+ top_level_plan->lip_filter_configuration();
+ if (existing_configuration != nullptr) {
+ lip_filter_configuration_.reset(existing_configuration->clone());
+ } else {
+ lip_filter_configuration_.reset(new P::LIPFilterConfiguration());
+ }
std::set<E::ExprId> already_filtered_attributes;
attachLIPFilters(NodeList(input), &already_filtered_attributes);
@@ -101,7 +108,7 @@ void AttachLIPFilters::attachLIPFilters(
}
if (probe_child != nullptr &&
- cost_model_->estimateCardinality(probe_child) > 10000000) {
+ cost_model_->estimateCardinality(probe_child) >= 100000) {
const auto &candidate_lip_filters = getProbeSideInfo(path.cons(probe_child));
if (!candidate_lip_filters.empty()) {
std::map<E::AttributeReferencePtr, LIPFilterInfoPtr> selected_filters;
@@ -119,15 +126,16 @@ void AttachLIPFilters::attachLIPFilters(
if (already_filtered_attributes->find(source_attr_id)
== already_filtered_attributes->end()) {
lip_filter_configuration_->addBuildInfo(
- pair.second->source_attribute,
- pair.second->source,
- pair.second->estimated_cardinality * 8,
- LIPFilterType::kSingleIdentityHashFilter);
- lip_filter_configuration_->addProbeInfo(
- pair.first,
- node,
- pair.second->source_attribute,
+ P::SingleIdentityHashFilterBuildInfo::Create(
+ pair.second->source_attribute,
+ pair.second->estimated_cardinality * 8),
pair.second->source);
+ lip_filter_configuration_->addProbeInfo(
+ P::LIPFilterProbeInfo::Create(
+ pair.first,
+ pair.second->source_attribute,
+ pair.second->source),
+ node);
already_filtered_attributes->emplace(source_attr_id);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 86d1ef7..223c78c 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -22,6 +22,7 @@ add_library(quickstep_queryoptimizer_rules_AttachLIPFilters AttachLIPFilters.cpp
add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp)
add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp)
add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
+add_library(quickstep_queryoptimizer_rules_InjectJoinFilters InjectJoinFilters.cpp InjectJoinFilters.hpp)
add_library(quickstep_queryoptimizer_rules_PruneColumns PruneColumns.cpp PruneColumns.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownFilter PushDownFilter.cpp PushDownFilter.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
@@ -196,6 +197,26 @@ target_link_libraries(quickstep_queryoptimizer_rules_SwapProbeBuild
target_link_libraries(quickstep_queryoptimizer_rules_TopDownRule
quickstep_queryoptimizer_rules_Rule
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_InjectJoinFilters
+ quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_expressions_Predicate
+ quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_FilterJoin
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_LIPFilterConfiguration
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_Selection
+ quickstep_queryoptimizer_physical_TopLevelPlan
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_queryoptimizer_rules_PruneColumns
+ quickstep_types_TypeID
+ quickstep_types_TypedValue
+ quickstep_utility_Macros
+ quickstep_utility_lipfilter_LIPFilter)
target_link_libraries(quickstep_queryoptimizer_rules_UnnestSubqueries
quickstep_queryoptimizer_OptimizerContext
quickstep_queryoptimizer_expressions_AttributeReference
@@ -246,6 +267,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
quickstep_queryoptimizer_rules_BottomUpRule
quickstep_queryoptimizer_rules_CollapseProject
quickstep_queryoptimizer_rules_GenerateJoins
+ quickstep_queryoptimizer_rules_InjectJoinFilters
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_PushDownFilter
quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate