You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/10/11 16:00:12 UTC
[5/5] incubator-quickstep git commit: Optimizer changes for the
LIPFilter feature.
Optimizer changes for the LIPFilter feature.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/72b349d5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/72b349d5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/72b349d5
Branch: refs/heads/lip-refactor-optimizer
Commit: 72b349d5f3ccdd31e24d26f4cba13fe5611e5d92
Parents: 80af233
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Wed Sep 7 13:20:43 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Tue Oct 11 10:59:34 2016 -0500
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 22 +-
query_optimizer/ExecutionGenerator.cpp | 51 +--
query_optimizer/ExecutionGenerator.hpp | 10 +-
query_optimizer/ExecutionHeuristics.cpp | 129 --------
query_optimizer/ExecutionHeuristics.hpp | 157 ----------
query_optimizer/LIPFilterGenerator.cpp | 190 +++++++++++
query_optimizer/LIPFilterGenerator.hpp | 129 ++++++++
query_optimizer/PhysicalGenerator.cpp | 10 +-
.../cost_model/StarSchemaSimpleCostModel.cpp | 2 +-
query_optimizer/physical/CMakeLists.txt | 7 +
.../physical/LIPFilterConfiguration.hpp | 117 +++++++
query_optimizer/physical/TopLevelPlan.hpp | 26 +-
query_optimizer/rules/AttachLIPFilters.cpp | 259 +++++++++++++++
query_optimizer/rules/AttachLIPFilters.hpp | 141 +++++++++
query_optimizer/rules/CMakeLists.txt | 19 ++
.../StarSchemaHashJoinOrderOptimization.cpp | 264 ++++++++++------
.../StarSchemaHashJoinOrderOptimization.hpp | 104 +++++--
query_optimizer/tests/CMakeLists.txt | 16 -
.../tests/ExecutionHeuristics_unittest.cpp | 311 -------------------
relational_operators/AggregationOperator.cpp | 21 +-
relational_operators/AggregationOperator.hpp | 9 +-
relational_operators/BuildHashOperator.cpp | 23 +-
relational_operators/BuildHashOperator.hpp | 18 +-
relational_operators/HashJoinOperator.cpp | 52 +++-
relational_operators/HashJoinOperator.hpp | 43 ++-
relational_operators/RelationalOperator.hpp | 12 +-
relational_operators/SelectOperator.cpp | 67 +++-
relational_operators/SelectOperator.hpp | 18 +-
utility/CMakeLists.txt | 11 +
utility/DisjointTreeForest.hpp | 116 +++++++
utility/PlanVisualizer.cpp | 56 +++-
utility/PlanVisualizer.hpp | 3 +
utility/lip_filter/CMakeLists.txt | 19 ++
utility/lip_filter/LIPFilter.hpp | 78 +++++
34 files changed, 1679 insertions(+), 831 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 988ffd8..e1f36d1 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -41,7 +41,7 @@ add_subdirectory(tests)
# Declare micro-libs:
add_library(quickstep_queryoptimizer_ExecutionGenerator ExecutionGenerator.cpp ExecutionGenerator.hpp)
-add_library(quickstep_queryoptimizer_ExecutionHeuristics ExecutionHeuristics.cpp ExecutionHeuristics.hpp)
+add_library(quickstep_queryoptimizer_LIPFilterGenerator LIPFilterGenerator.cpp LIPFilterGenerator.hpp)
add_library(quickstep_queryoptimizer_LogicalGenerator LogicalGenerator.cpp LogicalGenerator.hpp)
add_library(quickstep_queryoptimizer_LogicalToPhysicalMapper
../empty_src.cpp
@@ -73,7 +73,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
quickstep_expressions_windowaggregation_WindowAggregateFunction_proto
quickstep_queryexecution_QueryContext
quickstep_queryexecution_QueryContext_proto
- quickstep_queryoptimizer_ExecutionHeuristics
+ quickstep_queryoptimizer_LIPFilterGenerator
quickstep_queryoptimizer_OptimizerContext
quickstep_queryoptimizer_QueryHandle
quickstep_queryoptimizer_QueryPlan
@@ -153,14 +153,23 @@ if (ENABLE_DISTRIBUTED)
target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
quickstep_catalog_Catalog_proto)
endif()
-target_link_libraries(quickstep_queryoptimizer_ExecutionHeuristics
+target_link_libraries(quickstep_queryoptimizer_LIPFilterGenerator
glog
- quickstep_catalog_CatalogRelation
+ quickstep_catalog_CatalogAttribute
quickstep_catalog_CatalogTypedefs
quickstep_queryexecution_QueryContext
quickstep_queryexecution_QueryContext_proto
quickstep_queryoptimizer_QueryPlan
- quickstep_utility_Macros)
+ quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_LIPFilterConfiguration
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_Selection
+ quickstep_relationaloperators_RelationalOperator
+ quickstep_types_Type
+ quickstep_utility_lipfilter_LIPFilter
+ quickstep_utility_lipfilter_LIPFilterDeployment
+ quickstep_utility_lipfilter_LIPFilter_proto)
target_link_libraries(quickstep_queryoptimizer_LogicalGenerator
glog
quickstep_parser_ParseStatement
@@ -196,6 +205,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_LogicalToPhysicalMapper
quickstep_queryoptimizer_logical_Logical
quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_rules_AttachLIPFilters
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
quickstep_queryoptimizer_rules_SwapProbeBuild
@@ -233,7 +243,7 @@ target_link_libraries(quickstep_queryoptimizer_Validator
add_library(quickstep_queryoptimizer ../empty_src.cpp QueryOptimizerModule.hpp)
target_link_libraries(quickstep_queryoptimizer
quickstep_queryoptimizer_ExecutionGenerator
- quickstep_queryoptimizer_ExecutionHeuristics
+ quickstep_queryoptimizer_LIPFilterGenerator
quickstep_queryoptimizer_LogicalGenerator
quickstep_queryoptimizer_LogicalToPhysicalMapper
quickstep_queryoptimizer_Optimizer
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 9347c9c..23d9a53 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -54,10 +54,11 @@
#include "expressions/window_aggregation/WindowAggregateFunction.pb.h"
#include "query_execution/QueryContext.hpp"
#include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/ExecutionHeuristics.hpp"
+#include "query_optimizer/LIPFilterGenerator.hpp"
#include "query_optimizer/OptimizerContext.hpp"
#include "query_optimizer/QueryHandle.hpp"
#include "query_optimizer/QueryPlan.hpp"
+#include "query_optimizer/cost_model/SimpleCostModel.hpp"
#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
#include "query_optimizer/expressions/AggregateFunction.hpp"
#include "query_optimizer/expressions/Alias.hpp"
@@ -76,6 +77,7 @@
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/InsertSelection.hpp"
#include "query_optimizer/physical/InsertTuple.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
#include "query_optimizer/physical/NestedLoopsJoin.hpp"
#include "query_optimizer/physical/PatternMatcher.hpp"
#include "query_optimizer/physical/Physical.hpp"
@@ -153,9 +155,6 @@ static const volatile bool aggregate_hashtable_type_dummy
DEFINE_bool(parallelize_load, true, "Parallelize loading data files.");
-DEFINE_bool(optimize_joins, false,
- "Enable post execution plan generation optimizations for joins.");
-
namespace E = ::quickstep::optimizer::expressions;
namespace P = ::quickstep::optimizer::physical;
namespace S = ::quickstep::serialization;
@@ -168,6 +167,13 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) {
cost_model_.reset(
new cost::StarSchemaSimpleCostModel(top_level_physical_plan_->shared_subplans()));
+ simple_cost_model_.reset(
+ new cost::SimpleCostModel(top_level_physical_plan_->shared_subplans()));
+ const P::LIPFilterConfigurationPtr &lip_filter_configuration =
+ top_level_physical_plan_->lip_filter_configuration();
+ if (lip_filter_configuration != nullptr) {
+ lip_filter_generator_.reset(new LIPFilterGenerator(lip_filter_configuration));
+ }
const CatalogRelation *result_relation = nullptr;
@@ -177,6 +183,10 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) {
}
generatePlanInternal(top_level_physical_plan_->plan());
+ if (lip_filter_generator_ != nullptr) {
+ lip_filter_generator_->deployLIPFilters(execution_plan_, query_context_proto_);
+ }
+
// Set the query result relation if the input plan exists in physical_to_execution_map_,
// which indicates the plan is the result of a SELECT query.
const std::unordered_map<P::PhysicalPtr, CatalogRelationInfo>::const_iterator it =
@@ -211,11 +221,6 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) {
temporary_relation_info.producer_operator_index);
}
- // Optimize execution plan based on heuristics captured during execution plan generation, if enabled.
- if (FLAGS_optimize_joins) {
- execution_heuristics_->optimizeExecutionPlan(execution_plan_, query_context_proto_);
- }
-
#ifdef QUICKSTEP_DISTRIBUTED
catalog_database_cache_proto_->set_name(catalog_database_->getName());
@@ -238,6 +243,10 @@ void ExecutionGenerator::generatePlanInternal(
generatePlanInternal(child);
}
+ if (lip_filter_generator_ != nullptr) {
+ lip_filter_generator_->registerAttributeMap(physical_plan, attribute_substitution_map_);
+ }
+
switch (physical_plan->getPhysicalType()) {
case P::PhysicalType::kAggregate:
return convertAggregate(
@@ -569,6 +578,10 @@ void ExecutionGenerator::convertSelection(
std::forward_as_tuple(select_index,
output_relation));
temporary_relation_info_vec_.emplace_back(select_index, output_relation);
+
+ if (lip_filter_generator_ != nullptr) {
+ lip_filter_generator_->addSelectionInfo(physical_selection, select_index);
+ }
}
void ExecutionGenerator::convertSharedSubplanReference(const physical::SharedSubplanReferencePtr &physical_plan) {
@@ -606,7 +619,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
const CatalogRelation *referenced_stored_probe_relation = nullptr;
const CatalogRelation *referenced_stored_build_relation = nullptr;
- std::size_t build_cardinality = cost_model_->estimateCardinality(build_physical);
+ std::size_t build_cardinality = simple_cost_model_->estimateCardinality(build_physical);
bool any_probe_attributes_nullable = false;
bool any_build_attributes_nullable = false;
@@ -829,15 +842,10 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
output_relation));
temporary_relation_info_vec_.emplace_back(join_operator_index, output_relation);
- // Add heuristics for the Hash Join, if enabled.
- if (FLAGS_optimize_joins && !skip_hash_join_optimization) {
- execution_heuristics_->addHashJoinInfo(build_operator_index,
- join_operator_index,
- referenced_stored_build_relation,
- referenced_stored_probe_relation,
- std::move(build_original_attribute_ids),
- std::move(probe_original_attribute_ids),
- join_hash_table_index);
+ if (lip_filter_generator_ != nullptr) {
+ lip_filter_generator_->addHashJoinInfo(physical_plan,
+ build_operator_index,
+ join_operator_index);
}
}
@@ -1466,6 +1474,11 @@ void ExecutionGenerator::convertAggregate(
execution_plan_->addDirectDependency(destroy_aggregation_state_operator_index,
finalize_aggregation_operator_index,
true);
+
+ if (lip_filter_generator_ != nullptr) {
+ lip_filter_generator_->addAggregateInfo(physical_plan,
+ aggregation_operator_index);
+ }
}
void ExecutionGenerator::convertSort(const P::SortPtr &physical_sort) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp
index 2aaf5ab..8890296 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -33,7 +33,7 @@
#include "catalog/CatalogTypedefs.hpp"
#include "query_execution/QueryContext.hpp"
#include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/ExecutionHeuristics.hpp"
+#include "query_optimizer/LIPFilterGenerator.hpp"
#include "query_optimizer/QueryHandle.hpp"
#include "query_optimizer/QueryPlan.hpp"
#include "query_optimizer/cost_model/CostModel.hpp"
@@ -102,8 +102,7 @@ class ExecutionGenerator {
: catalog_database_(DCHECK_NOTNULL(catalog_database)),
query_handle_(DCHECK_NOTNULL(query_handle)),
execution_plan_(DCHECK_NOTNULL(query_handle->getQueryPlanMutable())),
- query_context_proto_(DCHECK_NOTNULL(query_handle->getQueryContextProtoMutable())),
- execution_heuristics_(new ExecutionHeuristics()) {
+ query_context_proto_(DCHECK_NOTNULL(query_handle->getQueryContextProtoMutable())) {
query_context_proto_->set_query_id(query_handle_->query_id());
#ifdef QUICKSTEP_DISTRIBUTED
catalog_database_cache_proto_ = DCHECK_NOTNULL(query_handle->getCatalogDatabaseCacheProtoMutable());
@@ -386,7 +385,6 @@ class ExecutionGenerator {
QueryHandle *query_handle_;
QueryPlan *execution_plan_; // A part of QueryHandle.
serialization::QueryContext *query_context_proto_; // A part of QueryHandle.
- std::unique_ptr<ExecutionHeuristics> execution_heuristics_;
#ifdef QUICKSTEP_DISTRIBUTED
serialization::CatalogDatabase *catalog_database_cache_proto_; // A part of QueryHandle.
@@ -422,9 +420,13 @@ class ExecutionGenerator {
* @brief The cost model to use for creating the execution plan.
*/
std::unique_ptr<cost::CostModel> cost_model_;
+ std::unique_ptr<cost::CostModel> simple_cost_model_;
physical::TopLevelPlanPtr top_level_physical_plan_;
+ // Sub-generator for deploying LIP (lookahead information passing) filters.
+ std::unique_ptr<LIPFilterGenerator> lip_filter_generator_;
+
DISALLOW_COPY_AND_ASSIGN(ExecutionGenerator);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/ExecutionHeuristics.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.cpp b/query_optimizer/ExecutionHeuristics.cpp
deleted file mode 100644
index 4fd7320..0000000
--- a/query_optimizer/ExecutionHeuristics.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- **/
-
-#include "query_optimizer/ExecutionHeuristics.hpp"
-
-#include <cstddef>
-#include <utility>
-#include <unordered_map>
-#include <vector>
-
-#include "catalog/CatalogTypedefs.hpp"
-#include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/QueryPlan.hpp"
-#include "utility/Macros.hpp"
-
-#include "glog/logging.h"
-
-namespace quickstep {
-namespace optimizer {
-
-void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
- serialization::QueryContext *query_context_proto) {
- // Currently this only optimizes left deep joins using bloom filters.
- // It uses a simple algorithm to discover the left deep joins.
- // It starts with the first hash join in the plan and keeps on iterating
- // over the next hash joins, till a probe on a different relation id is found.
- // The set of hash joins found in this way forms a chain and can be recognized
- // as a left deep join. It becomes a candidate for optimization.
-
- // The optimization is done by modifying each of the build operators in the chain
- // to generate a bloom filter on the build key during their hash table creation.
- // The leaf-level probe operator is then modified to query all the bloom
- // filters generated from all the build operators in the chain. These
- // bloom filters are queried to test the membership of the probe key
- // just prior to probing the hash table.
-
- QueryPlan::DAGNodeIndex origin_node = 0;
- while (origin_node < hash_joins_.size() - 1) {
- std::vector<std::size_t> chained_nodes;
- chained_nodes.push_back(origin_node);
- for (std::size_t i = origin_node + 1; i < hash_joins_.size(); ++i) {
- const relation_id checked_relation_id = hash_joins_[origin_node].referenced_stored_probe_relation_->getID();
- const relation_id expected_relation_id = hash_joins_[i].referenced_stored_probe_relation_->getID();
- if (checked_relation_id == expected_relation_id) {
- chained_nodes.push_back(i);
- } else {
- break;
- }
- }
-
- // Only chains of length greater than one are suitable candidates for semi-join optimization.
- if (chained_nodes.size() > 1) {
- std::unordered_map<QueryContext::bloom_filter_id, std::vector<attribute_id>> probe_bloom_filter_info;
- for (const std::size_t node : chained_nodes) {
- // Provision for a new bloom filter to be used by the build operator.
- const QueryContext::bloom_filter_id bloom_filter_id = query_context_proto->bloom_filters_size();
- serialization::BloomFilter *bloom_filter_proto = query_context_proto->add_bloom_filters();
-
- // Modify the bloom filter properties based on the statistics of the relation.
- setBloomFilterProperties(bloom_filter_proto, hash_joins_[node].referenced_stored_build_relation_);
-
- // Add build-side bloom filter information to the corresponding hash table proto.
- query_context_proto->mutable_join_hash_tables(hash_joins_[node].join_hash_table_id_)
- ->add_build_side_bloom_filter_id(bloom_filter_id);
-
- probe_bloom_filter_info.insert(std::make_pair(bloom_filter_id, hash_joins_[node].probe_attributes_));
- }
-
- // Add probe-side bloom filter information to the corresponding hash table proto for each build-side bloom filter.
- for (const std::pair<QueryContext::bloom_filter_id, std::vector<attribute_id>>
- &bloom_filter_info : probe_bloom_filter_info) {
- auto *probe_side_bloom_filter =
- query_context_proto->mutable_join_hash_tables(hash_joins_[origin_node].join_hash_table_id_)
- ->add_probe_side_bloom_filters();
- probe_side_bloom_filter->set_probe_side_bloom_filter_id(bloom_filter_info.first);
- for (const attribute_id &probe_attribute_id : bloom_filter_info.second) {
- probe_side_bloom_filter->add_probe_side_attr_ids(probe_attribute_id);
- }
- }
-
- // Add node dependencies from chained build nodes to origin node probe.
- for (std::size_t i = 1; i < chained_nodes.size(); ++i) { // Note: It starts from index 1.
- query_plan->addDirectDependency(hash_joins_[origin_node].join_operator_index_,
- hash_joins_[origin_node + i].build_operator_index_,
- true /* is_pipeline_breaker */);
- }
- }
-
- // Update the origin node.
- origin_node = chained_nodes.back() + 1;
- }
-}
-
-void ExecutionHeuristics::setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
- const CatalogRelation *relation) {
- const std::size_t cardinality = relation->estimateTupleCardinality();
- if (cardinality < kOneThousand) {
- bloom_filter_proto->set_bloom_filter_size(kOneThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kVeryLowSparsityHash);
- } else if (cardinality < kTenThousand) {
- bloom_filter_proto->set_bloom_filter_size(kTenThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kLowSparsityHash);
- } else if (cardinality < kHundredThousand) {
- bloom_filter_proto->set_bloom_filter_size(kHundredThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kMediumSparsityHash);
- } else {
- bloom_filter_proto->set_bloom_filter_size(kMillion / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kHighSparsityHash);
- }
-}
-
-} // namespace optimizer
-} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/ExecutionHeuristics.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.hpp b/query_optimizer/ExecutionHeuristics.hpp
deleted file mode 100644
index 8ad3b7a..0000000
--- a/query_optimizer/ExecutionHeuristics.hpp
+++ /dev/null
@@ -1,157 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- **/
-
-#ifndef QUICKSTEP_QUERY_OPTIMIZER_EXECUTION_HEURISTICS_HPP_
-#define QUICKSTEP_QUERY_OPTIMIZER_EXECUTION_HEURISTICS_HPP_
-
-#include <vector>
-
-#include "catalog/CatalogRelation.hpp"
-#include "catalog/CatalogTypedefs.hpp"
-#include "query_execution/QueryContext.hpp"
-#include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/QueryPlan.hpp"
-#include "utility/Macros.hpp"
-
-#include "glog/logging.h"
-
-namespace quickstep {
-namespace optimizer {
-
-/** \addtogroup QueryOptimizer
- * @{
- */
-
-/**
- * @brief The ExecutionHeuristics compiles certain heuristics for an execution plan
- * as it is being converted to a physical plan. These heuristics can then be
- * used to optimize the execution plan after it has been generated.
- **/
-class ExecutionHeuristics {
- public:
- static const std::size_t kOneHundred = 100;
- static const std::size_t kOneThousand = 1000;
- static const std::size_t kTenThousand = 10000;
- static const std::size_t kHundredThousand = 100000;
- static const std::size_t kMillion = 1000000;
-
- static const std::size_t kCompressionFactor = 10;
-
- static const std::size_t kVeryLowSparsityHash = 1;
- static const std::size_t kLowSparsityHash = 2;
- static const std::size_t kMediumSparsityHash = 5;
- static const std::size_t kHighSparsityHash = 10;
-
- /**
- * @brief A simple internal class that holds information about various
- * hash joins within the execution plan for a query.
- **/
- struct HashJoinInfo {
- HashJoinInfo(const QueryPlan::DAGNodeIndex build_operator_index,
- const QueryPlan::DAGNodeIndex join_operator_index,
- const CatalogRelation *referenced_stored_build_relation,
- const CatalogRelation *referenced_stored_probe_relation,
- std::vector<attribute_id> &&build_attributes,
- std::vector<attribute_id> &&probe_attributes,
- const QueryContext::join_hash_table_id join_hash_table_id)
- : build_operator_index_(build_operator_index),
- join_operator_index_(join_operator_index),
- referenced_stored_build_relation_(referenced_stored_build_relation),
- referenced_stored_probe_relation_(referenced_stored_probe_relation),
- build_attributes_(std::move(build_attributes)),
- probe_attributes_(std::move(probe_attributes)),
- join_hash_table_id_(join_hash_table_id) {
- }
-
- const QueryPlan::DAGNodeIndex build_operator_index_;
- const QueryPlan::DAGNodeIndex join_operator_index_;
- const CatalogRelation *referenced_stored_build_relation_;
- const CatalogRelation *referenced_stored_probe_relation_;
- const std::vector<attribute_id> build_attributes_;
- const std::vector<attribute_id> probe_attributes_;
- const QueryContext::join_hash_table_id join_hash_table_id_;
- };
-
-
- /**
- * @brief Constructor.
- **/
- ExecutionHeuristics() {}
-
- /**
- * @brief Saves information about a hash join used within the execution plan
- * for a query.
- *
- * @param build_operator_index Index of the build operator of the hash join.
- * @param join_operator_index Index of the join operator of the hash join.
- * @param build_relation_id Id of the relation on which hash table is being built.
- * @param probe_relation_id Id of the relation on which hash table is being probed.
- * @param build_attributes List of attributes on which hash table is being built.
- * @param probe_attributes List of attributes on which hash table is being probed.
- * @param join_hash_table_id Id of the hash table which refers to the actual hash
- * table within the query context.
- **/
- inline void addHashJoinInfo(const QueryPlan::DAGNodeIndex build_operator_index,
- const QueryPlan::DAGNodeIndex join_operator_index,
- const CatalogRelation *referenced_stored_build_relation,
- const CatalogRelation *referenced_stored_probe_relation,
- std::vector<attribute_id> &&build_attributes,
- std::vector<attribute_id> &&probe_attributes,
- const QueryContext::join_hash_table_id join_hash_table_id) {
- hash_joins_.push_back(HashJoinInfo(build_operator_index,
- join_operator_index,
- referenced_stored_build_relation,
- referenced_stored_probe_relation,
- std::move(build_attributes),
- std::move(probe_attributes),
- join_hash_table_id));
- }
-
- /**
- * @brief Optimize the execution plan based on heuristics generated
- * during physical plan to execution plan conversion.
- *
- * @param query_plan A mutable reference to the query execution plan.
- * @param query_context_proto A mutable reference to the protobuf representation
- * of the query context.
- **/
- void optimizeExecutionPlan(QueryPlan *query_plan, serialization::QueryContext *query_context_proto);
-
- /**
- * @brief Set the properties of the bloom filter proto based on the statistics
- * of the given relation.
- *
- * @param bloom_filter_proto A mutable reference to the bloom filter protobuf representation.
- * @param relation The catalog relation on which bloom filter is being built.
- **/
- void setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
- const CatalogRelation *relation);
-
- private:
- std::vector<HashJoinInfo> hash_joins_;
-
- DISALLOW_COPY_AND_ASSIGN(ExecutionHeuristics);
-};
-
-/** @} */
-
-} // namespace optimizer
-} // namespace quickstep
-
-#endif /* QUICKSTEP_QUERY_OPTIMIZER_EXECUTION_HEURISTICS_HPP_ */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/LIPFilterGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/LIPFilterGenerator.cpp b/query_optimizer/LIPFilterGenerator.cpp
new file mode 100644
index 0000000..ef10400
--- /dev/null
+++ b/query_optimizer/LIPFilterGenerator.cpp
@@ -0,0 +1,190 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/LIPFilterGenerator.hpp"
+
+#include <map>
+#include <utility>
+
+#include "catalog/CatalogAttribute.hpp"
+#include "catalog/CatalogTypedefs.hpp"
+#include "query_execution/QueryContext.pb.h"
+#include "relational_operators/RelationalOperator.hpp"
+#include "types/Type.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+#include "utility/lip_filter/LIPFilter.pb.h"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+void LIPFilterGenerator::registerAttributeMap(
+ const P::PhysicalPtr &node,
+ const std::unordered_map<E::ExprId, const CatalogAttribute *> &attribute_substitution_map) {
+ const auto &build_info_map = lip_filter_configuration_->getBuildInfoMap();
+ const auto build_it = build_info_map.find(node);
+ if (build_it != build_info_map.end()) {
+ auto &map_entry = attribute_map_[node];
+ for (const auto &info : build_it->second) {
+ E::ExprId attr_id = info.build_attribute->id();
+ map_entry.emplace(attr_id, attribute_substitution_map.at(attr_id));
+ }
+ }
+ const auto &probe_info_map = lip_filter_configuration_->getProbeInfoMap();
+ const auto probe_it = probe_info_map.find(node);
+ if (probe_it != probe_info_map.end()) {
+ auto &map_entry = attribute_map_[node];
+ for (const auto &info : probe_it->second) {
+ E::ExprId attr_id = info.probe_attribute->id();
+ map_entry.emplace(attr_id, attribute_substitution_map.at(attr_id));
+ }
+ }
+}
+
+void LIPFilterGenerator::deployLIPFilters(QueryPlan *execution_plan,
+ serialization::QueryContext *query_context_proto) const {
+ LIPFilterBuilderMap lip_filter_builder_map;
+
+ // Deploy builders
+ const auto &build_info_map = lip_filter_configuration_->getBuildInfoMap();
+ for (const auto &info : builder_infos_) {
+ const auto build_it = build_info_map.find(info.builder_node);
+ if (build_it != build_info_map.end()) {
+ deployBuilderInternal(execution_plan,
+ query_context_proto,
+ info.builder_node,
+ info.builder_operator_index,
+ build_it->second,
+ &lip_filter_builder_map);
+ }
+ }
+
+ // Deploy probers
+ const auto &probe_info_map = lip_filter_configuration_->getProbeInfoMap();
+ for (const auto &info : prober_infos_) {
+ const auto probe_it = probe_info_map.find(info.prober_node);
+ if (probe_it != probe_info_map.end()) {
+ deployProberInteral(execution_plan,
+ query_context_proto,
+ info.prober_node,
+ info.prober_operator_index,
+ probe_it->second,
+ lip_filter_builder_map);
+ }
+ }
+}
+
+void LIPFilterGenerator::deployBuilderInternal(
+ QueryPlan *execution_plan,
+ serialization::QueryContext *query_context_proto,
+ const physical::PhysicalPtr &builder_node,
+ const QueryPlan::DAGNodeIndex builder_operator_index,
+ const std::vector<physical::LIPFilterBuildInfo> &build_info_vec,
+ LIPFilterBuilderMap *lip_filter_builder_map) const {
+ const auto lip_deployment_index = query_context_proto->lip_filter_deployments_size();
+ auto *lip_filter_deployment_info_proto =
+ query_context_proto->add_lip_filter_deployments();
+ lip_filter_deployment_info_proto->set_action_type(serialization::LIPFilterActionType::BUILD);
+
+ const auto &builder_attribute_map = attribute_map_.at(builder_node);
+ for (const auto &info : build_info_vec) {
+ const QueryContext::lip_filter_id lip_filter_id = query_context_proto->lip_filters_size();
+ serialization::LIPFilter *lip_filter_proto = query_context_proto->add_lip_filters();
+ const CatalogAttribute *target_attr = builder_attribute_map.at(info.build_attribute->id());
+ const Type &attr_type = target_attr->getType();
+
+ switch (info.filter_type) {
+ case LIPFilterType::kSingleIdentityHashFilter: {
+ DCHECK(!attr_type.isVariableLength());
+ lip_filter_proto->set_lip_filter_type(
+ serialization::LIPFilterType::SINGLE_IDENTITY_HASH_FILTER);
+ lip_filter_proto->SetExtension(
+ serialization::SingleIdentityHashFilter::filter_cardinality, info.filter_cardinality);
+ lip_filter_proto->SetExtension(
+ serialization::SingleIdentityHashFilter::attribute_size, attr_type.minimumByteLength());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported LIPFilter type";
+ break;
+ }
+
+ lip_filter_builder_map->emplace(
+ std::make_pair(info.build_attribute->id(), builder_node),
+ std::make_pair(lip_filter_id, builder_operator_index));
+
+ auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_entries();
+ lip_filter_entry_proto->set_lip_filter_id(lip_filter_id);
+ lip_filter_entry_proto->set_attribute_id(target_attr->getID());
+ lip_filter_entry_proto->mutable_attribute_type()->CopyFrom(attr_type.getProto());
+
+ std::cerr << "Build " << info.build_attribute->toString()
+ << " @" << builder_node
+ << " size = " << info.filter_cardinality << "\n";
+ }
+
+ RelationalOperator *relop =
+ execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(builder_operator_index);
+ relop->deployLIPFilter(lip_deployment_index);
+}
+
+void LIPFilterGenerator::deployProberInteral(
+ QueryPlan *execution_plan,
+ serialization::QueryContext *query_context_proto,
+ const physical::PhysicalPtr &prober_node,
+ const QueryPlan::DAGNodeIndex prober_operator_index,
+ const std::vector<physical::LIPFilterProbeInfo> &probe_info_vec,
+ const LIPFilterBuilderMap &lip_filter_builder_map) const {
+ const auto lip_deployment_index = query_context_proto->lip_filter_deployments_size();
+ auto *lip_filter_deployment_info_proto =
+ query_context_proto->add_lip_filter_deployments();
+ lip_filter_deployment_info_proto->set_action_type(serialization::LIPFilterActionType::PROBE);
+
+ const auto &prober_attribute_map = attribute_map_.at(prober_node);
+ for (const auto &info : probe_info_vec) {
+ const auto &builder_info =
+ lip_filter_builder_map.at(
+ std::make_pair(info.build_attribute->id(), info.builder));
+ const CatalogAttribute *target_attr = prober_attribute_map.at(info.probe_attribute->id());
+
+ auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_entries();
+ lip_filter_entry_proto->set_lip_filter_id(builder_info.first);
+ lip_filter_entry_proto->set_attribute_id(target_attr->getID());
+ lip_filter_entry_proto->mutable_attribute_type()->CopyFrom(
+ target_attr->getType().getProto());
+
+ execution_plan->addDirectDependency(prober_operator_index,
+ builder_info.second,
+ true /* is_pipeline_breaker */);
+
+ std::cerr << "Probe " << info.probe_attribute->toString()
+ << " @" << prober_node << "\n";
+ }
+
+ RelationalOperator *relop =
+ execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(prober_operator_index);
+ relop->deployLIPFilter(lip_deployment_index);
+}
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/LIPFilterGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/LIPFilterGenerator.hpp b/query_optimizer/LIPFilterGenerator.hpp
new file mode 100644
index 0000000..4b597cf
--- /dev/null
+++ b/query_optimizer/LIPFilterGenerator.hpp
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_LIP_FILTER_GENERATOR_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_LIP_FILTER_GENERATOR_HPP_
+
+#include <map>
+#include <unordered_map>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "query_execution/QueryContext.hpp"
+#include "query_execution/QueryContext.pb.h"
+#include "query_optimizer/QueryPlan.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+class CatalogAttribute;
+
+namespace optimizer {
+
+/** \addtogroup QueryOptimizer
+ * @{
+ */
+
+class LIPFilterGenerator {
+ public:
+ LIPFilterGenerator(const physical::LIPFilterConfigurationPtr &lip_filter_configuration)
+ : lip_filter_configuration_(lip_filter_configuration) {
+ DCHECK(lip_filter_configuration_ != nullptr);
+ }
+
+ void registerAttributeMap(
+ const physical::PhysicalPtr &node,
+ const std::unordered_map<expressions::ExprId, const CatalogAttribute *> &attribute_substitution_map);
+
+ void addAggregateInfo(const physical::AggregatePtr &aggregate,
+ const QueryPlan::DAGNodeIndex aggregate_operator_index) {
+ prober_infos_.emplace_back(aggregate, aggregate_operator_index);
+ }
+
+ void addHashJoinInfo(const physical::HashJoinPtr &hash_join,
+ const QueryPlan::DAGNodeIndex build_operator_index,
+ const QueryPlan::DAGNodeIndex join_operator_index) {
+ builder_infos_.emplace_back(hash_join, build_operator_index);
+ prober_infos_.emplace_back(hash_join, join_operator_index);
+ }
+
+ void addSelectionInfo(const physical::SelectionPtr &selection,
+ const QueryPlan::DAGNodeIndex select_operator_index) {
+ prober_infos_.emplace_back(selection, select_operator_index);
+ }
+
+ void deployLIPFilters(QueryPlan *execution_plan,
+ serialization::QueryContext *query_context_proto) const;
+
+ private:
+ struct BuilderInfo {
+ BuilderInfo(const physical::PhysicalPtr &builder_node_in,
+ const QueryPlan::DAGNodeIndex builder_operator_index_in)
+ : builder_node(builder_node_in),
+ builder_operator_index(builder_operator_index_in) {
+ }
+ const physical::PhysicalPtr builder_node;
+ const QueryPlan::DAGNodeIndex builder_operator_index;
+ };
+
+ struct ProberInfo {
+ ProberInfo(const physical::PhysicalPtr &prober_node_in,
+ const QueryPlan::DAGNodeIndex prober_operator_index_in)
+ : prober_node(prober_node_in),
+ prober_operator_index(prober_operator_index_in) {
+ }
+ const physical::PhysicalPtr prober_node;
+ const QueryPlan::DAGNodeIndex prober_operator_index;
+ };
+
+ typedef std::map<std::pair<expressions::ExprId, physical::PhysicalPtr>,
+ std::pair<QueryContext::lip_filter_id, QueryPlan::DAGNodeIndex>> LIPFilterBuilderMap;
+
+ void deployBuilderInternal(QueryPlan *execution_plan,
+ serialization::QueryContext *query_context_proto,
+ const physical::PhysicalPtr &builder_node,
+ const QueryPlan::DAGNodeIndex builder_operator_index,
+ const std::vector<physical::LIPFilterBuildInfo> &build_info_vec,
+ LIPFilterBuilderMap *lip_filter_builder_map) const;
+
+ void deployProberInteral(QueryPlan *execution_plan,
+ serialization::QueryContext *query_context_proto,
+ const physical::PhysicalPtr &prober_node,
+ const QueryPlan::DAGNodeIndex prober_operator_index,
+ const std::vector<physical::LIPFilterProbeInfo> &probe_info_vec,
+ const LIPFilterBuilderMap &lip_filter_builder_map) const;
+
+ const physical::LIPFilterConfigurationPtr lip_filter_configuration_;
+ std::map<physical::PhysicalPtr, std::map<expressions::ExprId, const CatalogAttribute *>> attribute_map_;
+ std::vector<BuilderInfo> builder_infos_;
+ std::vector<ProberInfo> prober_infos_;
+};
+
+
+/** @} */
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif /* QUICKSTEP_QUERY_OPTIMIZER_LIP_FILTER_GENERATOR_HPP_ */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 8f19702..7781e9b 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -26,6 +26,7 @@
#include "query_optimizer/Validator.hpp"
#include "query_optimizer/logical/Logical.hpp"
#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/AttachLIPFilters.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
#include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
#include "query_optimizer/rules/SwapProbeBuild.hpp"
@@ -95,11 +96,14 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan(
P::PhysicalPtr PhysicalGenerator::optimizePlan() {
std::vector<std::unique_ptr<Rule<P::Physical>>> rules;
+ rules.emplace_back(new PruneColumns());
if (FLAGS_reorder_hash_joins) {
rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
+ rules.emplace_back(new PruneColumns());
+ } else {
+ rules.emplace_back(new SwapProbeBuild());
}
- rules.emplace_back(new PruneColumns());
- rules.emplace_back(new SwapProbeBuild());
+ rules.emplace_back(new AttachLIPFilters());
for (std::unique_ptr<Rule<P::Physical>> &rule : rules) {
physical_plan_ = rule->apply(physical_plan_);
@@ -110,7 +114,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
DVLOG(4) << "Optimized physical plan:\n" << physical_plan_->toString();
if (FLAGS_visualize_plan) {
- quickstep::PlanVisualizer plan_visualizer;
+ quickstep::PlanVisualizer plan_visualizer;
std::cerr << "\n" << plan_visualizer.visualize(physical_plan_) << "\n";
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index 8d254fa..1075739 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -358,7 +358,7 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
std::static_pointer_cast<const E::LogicalAnd>(filter_predicate);
double selectivity = 1.0;
for (const auto &predicate : logical_and->operands()) {
- selectivity = selectivity * estimateSelectivityForPredicate(predicate, physical_plan);
+ selectivity = std::min(selectivity, estimateSelectivityForPredicate(predicate, physical_plan));
}
return selectivity;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/physical/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/CMakeLists.txt b/query_optimizer/physical/CMakeLists.txt
index 3b7d3f0..5c2cd0b 100644
--- a/query_optimizer/physical/CMakeLists.txt
+++ b/query_optimizer/physical/CMakeLists.txt
@@ -27,6 +27,7 @@ add_library(quickstep_queryoptimizer_physical_HashJoin HashJoin.cpp HashJoin.hpp
add_library(quickstep_queryoptimizer_physical_InsertSelection InsertSelection.cpp InsertSelection.hpp)
add_library(quickstep_queryoptimizer_physical_InsertTuple InsertTuple.cpp InsertTuple.hpp)
add_library(quickstep_queryoptimizer_physical_Join ../../empty_src.cpp Join.hpp)
+add_library(quickstep_queryoptimizer_physical_LIPFilterConfiguration ../../empty_src.cpp LIPFilterConfiguration.hpp)
add_library(quickstep_queryoptimizer_physical_NestedLoopsJoin NestedLoopsJoin.cpp NestedLoopsJoin.hpp)
add_library(quickstep_queryoptimizer_physical_PatternMatcher ../../empty_src.cpp PatternMatcher.hpp)
add_library(quickstep_queryoptimizer_physical_Physical ../../empty_src.cpp Physical.hpp)
@@ -150,6 +151,10 @@ target_link_libraries(quickstep_queryoptimizer_physical_Join
quickstep_queryoptimizer_expressions_NamedExpression
quickstep_queryoptimizer_physical_Physical
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_physical_LIPFilterConfiguration
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_utility_Macros
+ quickstep_utility_lipfilter_LIPFilter)
target_link_libraries(quickstep_queryoptimizer_physical_NestedLoopsJoin
glog
quickstep_queryoptimizer_OptimizerTree
@@ -237,6 +242,7 @@ target_link_libraries(quickstep_queryoptimizer_physical_TopLevelPlan
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ExprId
quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_physical_LIPFilterConfiguration
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_physical_PhysicalType
quickstep_utility_Cast
@@ -279,6 +285,7 @@ target_link_libraries(quickstep_queryoptimizer_physical
quickstep_queryoptimizer_physical_InsertSelection
quickstep_queryoptimizer_physical_InsertTuple
quickstep_queryoptimizer_physical_Join
+ quickstep_queryoptimizer_physical_LIPFilterConfiguration
quickstep_queryoptimizer_physical_NestedLoopsJoin
quickstep_queryoptimizer_physical_PatternMatcher
quickstep_queryoptimizer_physical_Physical
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/physical/LIPFilterConfiguration.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/LIPFilterConfiguration.hpp b/query_optimizer/physical/LIPFilterConfiguration.hpp
new file mode 100644
index 0000000..f9236e5
--- /dev/null
+++ b/query_optimizer/physical/LIPFilterConfiguration.hpp
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_LIP_FILTER_CONFIGURATION_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_LIP_FILTER_CONFIGURATION_HPP_
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+namespace physical {
+
+/** \addtogroup OptimizerPhysical
+ * @{
+ */
+
+class Physical;
+typedef std::shared_ptr<const Physical> PhysicalPtr;
+
+struct LIPFilterBuildInfo {
+ LIPFilterBuildInfo(const expressions::AttributeReferencePtr &build_attribute_in,
+ const std::size_t filter_cardinality_in,
+ const LIPFilterType &filter_type_in)
+ : build_attribute(build_attribute_in),
+ filter_cardinality(filter_cardinality_in),
+ filter_type(filter_type_in) {
+ }
+ expressions::AttributeReferencePtr build_attribute;
+ std::size_t filter_cardinality;
+ LIPFilterType filter_type;
+};
+
+struct LIPFilterProbeInfo {
+ LIPFilterProbeInfo(const expressions::AttributeReferencePtr &probe_attribute_in,
+ const expressions::AttributeReferencePtr &build_attribute_in,
+ const PhysicalPtr &builder_in)
+ : probe_attribute(probe_attribute_in),
+ build_attribute(build_attribute_in),
+ builder(builder_in) {
+ }
+ expressions::AttributeReferencePtr probe_attribute;
+ expressions::AttributeReferencePtr build_attribute;
+ PhysicalPtr builder;
+};
+
+
+class LIPFilterConfiguration;
+typedef std::shared_ptr<const LIPFilterConfiguration> LIPFilterConfigurationPtr;
+
+class LIPFilterConfiguration {
+ public:
+ LIPFilterConfiguration() {
+ }
+
+ void addBuildInfo(const expressions::AttributeReferencePtr &build_attribute,
+ const PhysicalPtr &builder,
+ const std::size_t filter_size,
+ const LIPFilterType &filter_type) {
+ build_info_map_[builder].emplace_back(
+ build_attribute, filter_size, filter_type);
+ }
+
+ void addProbeInfo(const expressions::AttributeReferencePtr &probe_attribute,
+ const PhysicalPtr &prober,
+ const expressions::AttributeReferencePtr &build_attribute,
+ const PhysicalPtr &builder) {
+ probe_info_map_[prober].emplace_back(
+ probe_attribute, build_attribute, builder);
+ }
+
+ const std::map<PhysicalPtr, std::vector<LIPFilterBuildInfo>>& getBuildInfoMap() const {
+ return build_info_map_;
+ }
+
+ const std::map<PhysicalPtr, std::vector<LIPFilterProbeInfo>>& getProbeInfoMap() const {
+ return probe_info_map_;
+ }
+
+ private:
+ std::map<PhysicalPtr, std::vector<LIPFilterBuildInfo>> build_info_map_;
+ std::map<PhysicalPtr, std::vector<LIPFilterProbeInfo>> probe_info_map_;
+
+ DISALLOW_COPY_AND_ASSIGN(LIPFilterConfiguration);
+};
+
+/** @} */
+
+} // namespace physical
+} // namespace optimizer
+} // namespace quickstep
+
+#endif /* QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_LIP_FILTER_CONFIGURATION_HPP_ */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/physical/TopLevelPlan.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/TopLevelPlan.hpp b/query_optimizer/physical/TopLevelPlan.hpp
index 8f07dec..c6ea940 100644
--- a/query_optimizer/physical/TopLevelPlan.hpp
+++ b/query_optimizer/physical/TopLevelPlan.hpp
@@ -29,6 +29,7 @@
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/PhysicalType.hpp"
#include "utility/Macros.hpp"
@@ -120,6 +121,18 @@ class TopLevelPlan : public Physical {
return false;
}
+ TopLevelPlanPtr copyWithLIPFilterConfiguration(
+ const LIPFilterConfigurationPtr &new_lip_filter_configuration) const {
+ return TopLevelPlan::Create(plan_,
+ shared_subplans_,
+ uncorrelated_subquery_map_,
+ new_lip_filter_configuration);
+ }
+
+ const LIPFilterConfigurationPtr& lip_filter_configuration() const {
+ return lip_filter_configuration_;
+ }
+
/**
* @brief Creates a TopLevelPlan.
*
@@ -133,10 +146,12 @@ class TopLevelPlan : public Physical {
const PhysicalPtr &plan,
const std::vector<PhysicalPtr> &shared_subplans = {},
const std::unordered_map<expressions::ExprId, int> &uncorrelated_subquery_map
- = std::unordered_map<expressions::ExprId, int>()) {
+ = std::unordered_map<expressions::ExprId, int>(),
+ const LIPFilterConfigurationPtr &lip_filter_configuration = nullptr) {
return TopLevelPlanPtr(new TopLevelPlan(plan,
shared_subplans,
- uncorrelated_subquery_map));
+ uncorrelated_subquery_map,
+ lip_filter_configuration));
}
protected:
@@ -151,10 +166,12 @@ class TopLevelPlan : public Physical {
private:
TopLevelPlan(const PhysicalPtr &plan,
const std::vector<PhysicalPtr> &shared_subplans,
- const std::unordered_map<expressions::ExprId, int> &uncorrelated_subquery_map)
+ const std::unordered_map<expressions::ExprId, int> &uncorrelated_subquery_map,
+ const LIPFilterConfigurationPtr &lip_filter_configuration)
: plan_(plan),
shared_subplans_(shared_subplans),
- uncorrelated_subquery_map_(uncorrelated_subquery_map) {
+ uncorrelated_subquery_map_(uncorrelated_subquery_map),
+ lip_filter_configuration_(lip_filter_configuration) {
addChild(plan);
for (const PhysicalPtr &shared_subplan : shared_subplans) {
addChild(shared_subplan);
@@ -165,6 +182,7 @@ class TopLevelPlan : public Physical {
// Stored in the topological ordering based on dependencies.
std::vector<PhysicalPtr> shared_subplans_;
std::unordered_map<expressions::ExprId, int> uncorrelated_subquery_map_;
+ LIPFilterConfigurationPtr lip_filter_configuration_;
DISALLOW_COPY_AND_ASSIGN(TopLevelPlan);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/rules/AttachLIPFilters.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachLIPFilters.cpp b/query_optimizer/rules/AttachLIPFilters.cpp
new file mode 100644
index 0000000..d67eacd
--- /dev/null
+++ b/query_optimizer/rules/AttachLIPFilters.cpp
@@ -0,0 +1,259 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/AttachLIPFilters.hpp"
+
+#include <algorithm>
+#include <iterator>
+#include <memory>
+#include <set>
+#include <stack>
+#include <unordered_set>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr AttachLIPFilters::apply(const P::PhysicalPtr &input) {
+ DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+ const P::TopLevelPlanPtr top_level_plan =
+ std::static_pointer_cast<const P::TopLevelPlan>(input);
+ cost_model_.reset(
+ new cost::StarSchemaSimpleCostModel(
+ top_level_plan->shared_subplans()));
+ lip_filter_configuration_.reset(new P::LIPFilterConfiguration());
+
+ std::set<E::ExprId> already_filtered_attributes;
+ attachLIPFilters(NodeList(input), &already_filtered_attributes);
+
+// for (const auto &pair : attaches_) {
+// for (const auto &build_info : pair.second->getBuildInfo()) {
+// std::cout << "Build: " << build_info.build_attribute->attribute_alias() << "\n";
+// }
+// for (const auto &probe_info : pair.second->getProbeInfo()) {
+// std::cout << "Probe: " << probe_info.probe_attribute->attribute_alias() << "\n";
+// }
+// }
+
+ P::PhysicalPtr output;
+ if (!lip_filter_configuration_->getBuildInfoMap().empty() ||
+ !lip_filter_configuration_->getProbeInfoMap().empty()) {
+ output = top_level_plan->copyWithLIPFilterConfiguration(
+ P::LIPFilterConfigurationPtr(lip_filter_configuration_.release()));
+ } else {
+ output = input;
+ }
+ return output;
+}
+
+void AttachLIPFilters::attachLIPFilters(
+ const NodeList &path,
+ std::set<expressions::ExprId> *already_filtered_attributes) {
+ const P::PhysicalPtr &node = path.node;
+
+ // First process child nodes
+ for (const auto &child : node->children()) {
+ std::set<E::ExprId> child_filtered_attributes;
+ attachLIPFilters(path.cons(child), &child_filtered_attributes);
+ already_filtered_attributes->insert(child_filtered_attributes.begin(),
+ child_filtered_attributes.end());
+ }
+
+ // Attach LIP filters to HashJoin/Selection/Aggregate nodes
+ P::PhysicalPtr probe_child = nullptr;
+ switch (node->getPhysicalType()) {
+ case P::PhysicalType::kHashJoin:
+ probe_child = std::static_pointer_cast<const P::HashJoin>(node)->left();
+ break;
+ case P::PhysicalType::kSelection:
+ probe_child = std::static_pointer_cast<const P::Selection>(node)->input();
+ break;
+ case P::PhysicalType::kAggregate:
+ probe_child = std::static_pointer_cast<const P::Aggregate>(node)->input();
+ break;
+ default:
+ break;
+ }
+
+ if (probe_child != nullptr &&
+ cost_model_->estimateCardinality(probe_child) > 10000000) {
+ const auto &candidate_lip_filters = getProbeSideInfo(path.cons(probe_child));
+ if (!candidate_lip_filters.empty()) {
+ std::map<E::AttributeReferencePtr, LIPFilterInfoPtr> selected_filters;
+ for (const auto &info : candidate_lip_filters) {
+ auto it = selected_filters.find(info->attribute);
+ if (it == selected_filters.end()) {
+ selected_filters.emplace(info->attribute, info);
+ } else if (LIPFilterInfo::isBetterThan(*info, *it->second)) {
+ it->second = info;
+ }
+ }
+
+ for (const auto &pair : selected_filters) {
+ const E::ExprId source_attr_id = pair.second->source_attribute->id();
+ if (already_filtered_attributes->find(source_attr_id)
+ == already_filtered_attributes->end()) {
+ lip_filter_configuration_->addBuildInfo(
+ pair.second->source_attribute,
+ pair.second->source,
+ pair.second->estimated_cardinality * 8,
+ LIPFilterType::kSingleIdentityHashFilter);
+ lip_filter_configuration_->addProbeInfo(
+ pair.first,
+ node,
+ pair.second->source_attribute,
+ pair.second->source);
+ already_filtered_attributes->emplace(source_attr_id);
+ }
+ }
+ }
+ }
+}
+
+const std::vector<AttachLIPFilters::LIPFilterInfoPtr>& AttachLIPFilters
+ ::getBuildSideInfo(const NodeList &path) {
+ const P::PhysicalPtr &node = path.node;
+ if (build_side_info_.find(node) == build_side_info_.end()) {
+ std::vector<LIPFilterInfoPtr> lip_filters;
+
+ // 1. Gather candidate LIP filters propagated from descendant nodes.
+ std::unordered_set<E::ExprId> output_attribute_ids;
+ for (const auto &attr : node->getOutputAttributes()) {
+ output_attribute_ids.emplace(attr->id());
+ }
+ switch (node->getPhysicalType()) {
+ case P::PhysicalType::kAggregate:
+ case P::PhysicalType::kSelection:
+ case P::PhysicalType::kHashJoin: {
+ for (const P::PhysicalPtr &child : node->children()) {
+ for (const LIPFilterInfoPtr &info : getBuildSideInfo(path.cons(child))) {
+ lip_filters.emplace_back(info);
+ }
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ // 2. Consider the parent physical node. If it is a HashJoin,
+ // then each build-side join attribute is a candidate LIP filter
+ // which can be built by the BuildHashOperator that corresponds
+ // to the parent HashJoin node.
+ P::HashJoinPtr hash_join;
+ if (path.cdr() != nullptr &&
+ P::SomeHashJoin::MatchesWithConditionalCast(path.cdr()->node, &hash_join)) {
+ const P::PhysicalPtr &build_node = hash_join->right();
+ // TODO(jianqiao): consider probe-side info to allow cascading propagation.
+ double selectivity = cost_model_->estimateSelectivity(build_node);
+ // Only consider attributes that are selective.
+ if (selectivity < 1.0) {
+ std::size_t cardinality = cost_model_->estimateCardinality(build_node);
+ for (const auto &attr : hash_join->right_join_attributes()) {
+ lip_filters.emplace_back(
+ std::make_shared<LIPFilterInfo>(attr,
+ path.cdr()->node,
+ path.depth,
+ selectivity,
+ cardinality));
+ }
+ }
+ }
+ build_side_info_.emplace(node, std::move(lip_filters));
+ }
+ return build_side_info_.at(node);
+}
+
+const std::vector<AttachLIPFilters::LIPFilterInfoPtr>& AttachLIPFilters
+ ::getProbeSideInfo(const NodeList &path) {
+ const P::PhysicalPtr &node = path.node;
+ if (probe_side_info_.find(node) == probe_side_info_.end()) {
+ std::vector<LIPFilterInfoPtr> lip_filters;
+ if (path.cdr() != nullptr) {
+ // 1. Gather candidate LIP filters propagated from ancestor nodes.
+ const auto &parent_lip_filters = getProbeSideInfo(*path.cdr());
+ if (!parent_lip_filters.empty()) {
+ std::unordered_set<E::ExprId> output_attribute_ids;
+ for (const auto &attr : node->getOutputAttributes()) {
+ output_attribute_ids.emplace(attr->id());
+ }
+ for (const auto &info : parent_lip_filters) {
+ if (output_attribute_ids.find(info->attribute->id()) != output_attribute_ids.end()) {
+ lip_filters.emplace_back(info);
+ }
+ }
+ }
+
+ // 2. Consider the parent physical node. If it is an InnerHashJoin or
+ // LeftSemiHashJoin, then we can propagate the build-side LIP filters
+ // to the probe-side.
+ P::HashJoinPtr hash_join;
+ if (P::SomeHashJoin::MatchesWithConditionalCast(path.cdr()->node, &hash_join) &&
+ (hash_join->join_type() == P::HashJoin::JoinType::kInnerJoin ||
+ hash_join->join_type() == P::HashJoin::JoinType::kLeftSemiJoin)) {
+ const P::PhysicalPtr &build_side_child = hash_join->right();
+ std::unordered_map<E::ExprId, E::AttributeReferencePtr> join_attribute_pairs;
+ for (std::size_t i = 0; i < hash_join->left_join_attributes().size(); ++i) {
+ const E::AttributeReferencePtr probe_side_join_attribute =
+ hash_join->left_join_attributes()[i];
+ const E::AttributeReferencePtr build_side_join_attribute =
+ hash_join->right_join_attributes()[i];
+ join_attribute_pairs.emplace(build_side_join_attribute->id(),
+ probe_side_join_attribute);
+ }
+ for (const auto &info : getBuildSideInfo(path.cdr()->cons(build_side_child))) {
+ const auto pair_it = join_attribute_pairs.find(info->attribute->id());
+ if (pair_it != join_attribute_pairs.end()) {
+ lip_filters.emplace_back(
+ std::make_shared<LIPFilterInfo>(pair_it->second,
+ info->source,
+ info->depth,
+ info->estimated_selectivity,
+ info->estimated_cardinality,
+ info->attribute));
+ }
+ }
+ }
+ }
+ probe_side_info_.emplace(node, std::move(lip_filters));
+ }
+ return probe_side_info_.at(node);
+}
+
+} // namespace optimizer
+} // namespace quickstep
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/rules/AttachLIPFilters.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachLIPFilters.hpp b/query_optimizer/rules/AttachLIPFilters.hpp
new file mode 100644
index 0000000..d0a6fa5
--- /dev/null
+++ b/query_optimizer/rules/AttachLIPFilters.hpp
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_LIP_FILTERS_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_LIP_FILTERS_HPP_
+
+#include <cstddef>
+#include <map>
+#include <memory>
+#include <set>
+#include <stack>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+class AttachLIPFilters : public Rule<physical::Physical> {
+ public:
+ AttachLIPFilters() {}
+
+ ~AttachLIPFilters() override {}
+
+ std::string getName() const override {
+ return "AttachLIPFilters";
+ }
+
+ physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+ struct LIPFilterInfo {
+ LIPFilterInfo(const expressions::AttributeReferencePtr &attribute_in,
+ const physical::PhysicalPtr &source_in,
+ const int depth_in,
+ const double estimated_selectivity_in,
+ const std::size_t estimated_cardinality_in,
+ const expressions::AttributeReferencePtr &source_attribute_in = nullptr)
+ : attribute(attribute_in),
+ source(source_in),
+ depth(depth_in),
+ estimated_selectivity(estimated_selectivity_in),
+ estimated_cardinality(estimated_cardinality_in),
+ source_attribute(
+ source_attribute_in == nullptr
+ ? attribute_in
+ : source_attribute_in) {
+
+ }
+ static bool isBetterThan(const LIPFilterInfo &a, const LIPFilterInfo &b) {
+ if (a.estimated_selectivity == b.estimated_selectivity) {
+ return a.depth > b.depth;
+ } else {
+ return a.estimated_selectivity < b.estimated_selectivity;
+ }
+ }
+ expressions::AttributeReferencePtr attribute;
+ physical::PhysicalPtr source;
+ int depth;
+ double estimated_selectivity;
+ std::size_t estimated_cardinality;
+ expressions::AttributeReferencePtr source_attribute;
+ };
+
+ typedef std::shared_ptr<const LIPFilterInfo> LIPFilterInfoPtr;
+
+ struct NodeList {
+ NodeList(const physical::PhysicalPtr &node_in)
+ : node(node_in),
+ next(nullptr),
+ depth(0) {
+ }
+ NodeList(const physical::PhysicalPtr &node_in,
+ const NodeList *next_in,
+ const int depth_in)
+ : node(node_in),
+ next(next_in),
+ depth(depth_in) {
+ }
+ inline const NodeList *cdr() const {
+ return next;
+ }
+ inline const NodeList cons(const physical::PhysicalPtr &new_node) const {
+ return NodeList(new_node, this, depth+1);
+ }
+ const physical::PhysicalPtr node;
+ const NodeList *next;
+ const int depth;
+ };
+
+ void attachLIPFilters(const NodeList &path,
+ std::set<expressions::ExprId> *already_filtered_attributes);
+
+ const std::vector<LIPFilterInfoPtr>& getBuildSideInfo(const NodeList &path);
+
+ const std::vector<LIPFilterInfoPtr>& getProbeSideInfo(const NodeList &path);
+
+ std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+ std::map<physical::PhysicalPtr, std::vector<LIPFilterInfoPtr>> build_side_info_;
+ std::map<physical::PhysicalPtr, std::vector<LIPFilterInfoPtr>> probe_side_info_;
+ std::unique_ptr<physical::LIPFilterConfiguration> lip_filter_configuration_;
+
+ DISALLOW_COPY_AND_ASSIGN(AttachLIPFilters);
+};
+
+/** @} */
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif /* QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_LIP_FILTERS_HPP_ */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72b349d5/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index d9709ce..944bfd6 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -18,6 +18,7 @@
add_subdirectory(tests)
# Declare micro-libs:
+add_library(quickstep_queryoptimizer_rules_AttachLIPFilters AttachLIPFilters.cpp AttachLIPFilters.hpp)
add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp)
add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp)
add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
@@ -36,6 +37,22 @@ add_library(quickstep_queryoptimizer_rules_UnnestSubqueries UnnestSubqueries.cpp
# Link dependencies:
+target_link_libraries(quickstep_queryoptimizer_rules_AttachLIPFilters
+ quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExprId
+ quickstep_queryoptimizer_expressions_NamedExpression
+ quickstep_queryoptimizer_expressions_PatternMatcher
+ quickstep_queryoptimizer_expressions_Predicate
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_LIPFilterConfiguration
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_TopLevelPlan
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_Macros
+ quickstep_utility_lipfilter_LIPFilter)
target_link_libraries(quickstep_queryoptimizer_rules_BottomUpRule
glog
quickstep_queryoptimizer_rules_Rule
@@ -127,6 +144,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOpti
quickstep_queryoptimizer_physical_PhysicalType
quickstep_queryoptimizer_physical_TopLevelPlan
quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_DisjointTreeForest
quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_SwapProbeBuild
quickstep_queryoptimizer_costmodel_SimpleCostModel
@@ -187,6 +205,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_UpdateExpression
# Module all-in-one library:
add_library(quickstep_queryoptimizer_rules ../../empty_src.cpp OptimizerRulesModule.hpp)
target_link_libraries(quickstep_queryoptimizer_rules
+ quickstep_queryoptimizer_rules_AttachLIPFilters
quickstep_queryoptimizer_rules_BottomUpRule
quickstep_queryoptimizer_rules_CollapseProject
quickstep_queryoptimizer_rules_GenerateJoins