You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/10/12 23:26:09 UTC
[1/2] incubator-quickstep git commit: Optimizer changes for the
LIPFilter feature. [Forced Update!]
Repository: incubator-quickstep
Updated Branches:
refs/heads/lip-refactor-optimizer 530cf2761 -> 8740f50a7 (forced update)
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/tests/ExecutionHeuristics_unittest.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/ExecutionHeuristics_unittest.cpp b/query_optimizer/tests/ExecutionHeuristics_unittest.cpp
deleted file mode 100644
index 73b3e84..0000000
--- a/query_optimizer/tests/ExecutionHeuristics_unittest.cpp
+++ /dev/null
@@ -1,311 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- **/
-
-#include <cstddef>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "catalog/Catalog.hpp"
-#include "catalog/CatalogDatabase.hpp"
-#include "catalog/CatalogTypedefs.hpp"
-#include "query_execution/QueryContext.hpp"
-#include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/ExecutionHeuristics.hpp"
-#include "query_optimizer/QueryPlan.hpp"
-#include "relational_operators/BuildHashOperator.hpp"
-#include "relational_operators/HashJoinOperator.hpp"
-#include "utility/Macros.hpp"
-
-#include "glog/logging.h"
-#include "gtest/gtest.h"
-
-namespace quickstep {
-namespace optimizer {
-
-namespace {
-constexpr std::size_t kQueryId = 0;
-}
-
-class ExecutionHeuristicsTest : public ::testing::Test {
- protected:
- virtual void SetUp() {
- db_ = cat_.getDatabaseByIdMutable(cat_.addDatabase(new CatalogDatabase(nullptr, "db")));
- execution_heuristics_.reset(new ExecutionHeuristics());
- query_plan_.reset(new QueryPlan());
- query_context_proto_.reset(new serialization::QueryContext());
- }
-
- CatalogRelation* createCatalogRelation(const std::string &name, bool temporary = false) {
- return db_->getRelationByIdMutable(db_->addRelation(new CatalogRelation(nullptr, name, -1, temporary)));
- }
-
- void addDummyHashJoinInfo(ExecutionHeuristics *execution_heuristics,
- const QueryPlan::DAGNodeIndex build_operator_index,
- const QueryPlan::DAGNodeIndex join_operator_index,
- const CatalogRelation *build_relation,
- const CatalogRelation *probe_relation,
- const attribute_id build_attribute_id,
- const attribute_id probe_attribute_id,
- const QueryContext::join_hash_table_id join_hash_table_id) {
- std::vector<attribute_id> build_attribute_ids(1, build_attribute_id);
- std::vector<attribute_id> probe_attribute_ids(1, probe_attribute_id);
- execution_heuristics->addHashJoinInfo(build_operator_index,
- join_operator_index,
- build_relation,
- probe_relation,
- std::move(build_attribute_ids),
- std::move(probe_attribute_ids),
- join_hash_table_id);
- }
-
- QueryPlan::DAGNodeIndex createDummyBuildHashOperator(QueryPlan *query_plan,
- const CatalogRelation *build_relation,
- const attribute_id build_attribute_id,
- const QueryContext::join_hash_table_id join_hash_table_index) {
- std::vector<attribute_id> build_attribute_ids;
- build_attribute_ids.push_back(build_attribute_id);
- QueryPlan::DAGNodeIndex build_operator_index =
- query_plan->addRelationalOperator(new BuildHashOperator(kQueryId,
- *build_relation,
- true,
- build_attribute_ids,
- false,
- join_hash_table_index));
- return build_operator_index;
- }
-
- QueryPlan::DAGNodeIndex createDummyHashJoinOperator(QueryPlan *query_plan,
- const CatalogRelation *build_relation,
- const CatalogRelation *probe_relation,
- const attribute_id probe_attribute_id,
- const QueryContext::join_hash_table_id join_hash_table_index) {
- std::vector<attribute_id> probe_attribute_ids;
- probe_attribute_ids.push_back(probe_attribute_id);
- QueryPlan::DAGNodeIndex join_operator_index =
- query_plan->addRelationalOperator(
- new HashJoinOperator(kQueryId,
- *build_relation,
- *probe_relation,
- true,
- probe_attribute_ids,
- false,
- *probe_relation,
- 0,
- join_hash_table_index,
- 0,
- 0));
- return join_operator_index;
- }
-
- Catalog cat_;
- CatalogDatabase *db_; // db_ is owned by cat_.
- std::unique_ptr<QueryPlan> query_plan_;
- std::unique_ptr<serialization::QueryContext> query_context_proto_;
- std::unique_ptr<ExecutionHeuristics> execution_heuristics_;
-};
-
-TEST_F(ExecutionHeuristicsTest, HashJoinOptimizedTest) {
- // This test case creates three hash joins, all of which are being probed on the same relation.
- // Since the probe are being made on the same relation, ExecutionHeuristics should optimize
- // these hash joins using bloom filters.
-
- const CatalogRelation *build_relation_1 = createCatalogRelation("build_relation_1");
- const CatalogRelation *build_relation_2 = createCatalogRelation("build_relation_2");
- const CatalogRelation *build_relation_3 = createCatalogRelation("build_relation_3");
- const CatalogRelation *probe_relation_1 = createCatalogRelation("probe_relation_1");
-
- const attribute_id build_attribute_id_1 = 0;
- const attribute_id build_attribute_id_2 = 0;
- const attribute_id build_attribute_id_3 = 0;
- const attribute_id probe_attribute_id_1 = 1;
- const attribute_id probe_attribute_id_2 = 2;
- const attribute_id probe_attribute_id_3 = 3;
-
- const QueryContext::join_hash_table_id join_hash_table_index_1 = 0;
- const QueryContext::join_hash_table_id join_hash_table_index_2 = 1;
- const QueryContext::join_hash_table_id join_hash_table_index_3 = 2;
- query_context_proto_->add_join_hash_tables();
- query_context_proto_->add_join_hash_tables();
- query_context_proto_->add_join_hash_tables();
-
- const QueryPlan::DAGNodeIndex build_operator_index_1 = createDummyBuildHashOperator(query_plan_.get(),
- build_relation_1,
- build_attribute_id_1,
- join_hash_table_index_1);
- const QueryPlan::DAGNodeIndex probe_operator_index_1 = createDummyHashJoinOperator(query_plan_.get(),
- build_relation_1,
- probe_relation_1,
- probe_attribute_id_1,
- join_hash_table_index_1);
- const QueryPlan::DAGNodeIndex build_operator_index_2 = createDummyBuildHashOperator(query_plan_.get(),
- build_relation_2,
- build_attribute_id_2,
- join_hash_table_index_2);
- const QueryPlan::DAGNodeIndex probe_operator_index_2 = createDummyHashJoinOperator(query_plan_.get(),
- build_relation_2,
- probe_relation_1,
- probe_attribute_id_2,
- join_hash_table_index_2);
- const QueryPlan::DAGNodeIndex build_operator_index_3 = createDummyBuildHashOperator(query_plan_.get(),
- build_relation_3,
- build_attribute_id_3,
- join_hash_table_index_3);
- const QueryPlan::DAGNodeIndex probe_operator_index_3 = createDummyHashJoinOperator(query_plan_.get(),
- build_relation_3,
- probe_relation_1,
- probe_attribute_id_3,
- join_hash_table_index_3);
-
- addDummyHashJoinInfo(execution_heuristics_.get(),
- build_operator_index_1,
- probe_operator_index_1,
- build_relation_1,
- probe_relation_1,
- build_attribute_id_1,
- probe_attribute_id_1,
- join_hash_table_index_1);
- addDummyHashJoinInfo(execution_heuristics_.get(),
- build_operator_index_2,
- probe_operator_index_2,
- build_relation_2,
- probe_relation_1,
- build_attribute_id_2,
- probe_attribute_id_2,
- join_hash_table_index_2);
- addDummyHashJoinInfo(execution_heuristics_.get(),
- build_operator_index_3,
- probe_operator_index_3,
- build_relation_3,
- probe_relation_1,
- build_attribute_id_3,
- probe_attribute_id_3,
- join_hash_table_index_3);
-
- execution_heuristics_->optimizeExecutionPlan(query_plan_.get(), query_context_proto_.get());
-
- // Test whether correct number of bloom filters were added.
- EXPECT_EQ(1, query_context_proto_->join_hash_tables(0).build_side_bloom_filter_id_size());
- EXPECT_EQ(1, query_context_proto_->join_hash_tables(1).build_side_bloom_filter_id_size());
- EXPECT_EQ(1, query_context_proto_->join_hash_tables(2).build_side_bloom_filter_id_size());
- EXPECT_EQ(3, query_context_proto_->join_hash_tables(0).probe_side_bloom_filters_size());
-
- // Test that the DAG was modified correctly or not.
- // Probe operator 1 should have now build operator 1 and build operator 2 added as dependencies.
- auto const probe_node_dependencies = query_plan_->getQueryPlanDAG().getDependencies(probe_operator_index_1);
- EXPECT_EQ(1u, probe_node_dependencies.count(build_operator_index_2));
- EXPECT_EQ(1u, probe_node_dependencies.count(build_operator_index_3));
-}
-
-TEST_F(ExecutionHeuristicsTest, HashJoinNotOptimizedTest) {
- // This test case creates three hash joins, all of which are being probed on different relations.
- // Since the probe are being made on the different relations, ExecutionHeuristics should optimize
- // these hash joins using bloom filters.
-
- const CatalogRelation *build_relation_1 = createCatalogRelation("build_relation_1");
- const CatalogRelation *build_relation_2 = createCatalogRelation("build_relation_2");
- const CatalogRelation *build_relation_3 = createCatalogRelation("build_relation_3");
- const CatalogRelation *probe_relation_1 = createCatalogRelation("probe_relation_1");
- const CatalogRelation *probe_relation_2 = createCatalogRelation("probe_relation_2");
- const CatalogRelation *probe_relation_3 = createCatalogRelation("probe_relation_3");
-
- const attribute_id build_attribute_id_1 = 0;
- const attribute_id build_attribute_id_2 = 0;
- const attribute_id build_attribute_id_3 = 0;
- const attribute_id probe_attribute_id_1 = 1;
- const attribute_id probe_attribute_id_2 = 2;
- const attribute_id probe_attribute_id_3 = 3;
-
- const QueryContext::join_hash_table_id join_hash_table_index_1 = 0;
- const QueryContext::join_hash_table_id join_hash_table_index_2 = 1;
- const QueryContext::join_hash_table_id join_hash_table_index_3 = 2;
- query_context_proto_->add_join_hash_tables();
- query_context_proto_->add_join_hash_tables();
- query_context_proto_->add_join_hash_tables();
-
- const QueryPlan::DAGNodeIndex build_operator_index_1 = createDummyBuildHashOperator(query_plan_.get(),
- build_relation_1,
- build_attribute_id_1,
- join_hash_table_index_1);
- const QueryPlan::DAGNodeIndex probe_operator_index_1 = createDummyHashJoinOperator(query_plan_.get(),
- build_relation_1,
- probe_relation_1,
- probe_attribute_id_1,
- join_hash_table_index_1);
- const QueryPlan::DAGNodeIndex build_operator_index_2 = createDummyBuildHashOperator(query_plan_.get(),
- build_relation_2,
- build_attribute_id_2,
- join_hash_table_index_2);
- const QueryPlan::DAGNodeIndex probe_operator_index_2 = createDummyHashJoinOperator(query_plan_.get(),
- build_relation_2,
- probe_relation_2,
- probe_attribute_id_2,
- join_hash_table_index_2);
- const QueryPlan::DAGNodeIndex build_operator_index_3 = createDummyBuildHashOperator(query_plan_.get(),
- build_relation_3,
- build_attribute_id_3,
- join_hash_table_index_3);
- const QueryPlan::DAGNodeIndex probe_operator_index_3 = createDummyHashJoinOperator(query_plan_.get(),
- build_relation_3,
- probe_relation_3,
- probe_attribute_id_3,
- join_hash_table_index_3);
-
- addDummyHashJoinInfo(execution_heuristics_.get(),
- build_operator_index_1,
- probe_operator_index_1,
- build_relation_1,
- probe_relation_1,
- build_attribute_id_1,
- probe_attribute_id_1,
- join_hash_table_index_1);
- addDummyHashJoinInfo(execution_heuristics_.get(),
- build_operator_index_2,
- probe_operator_index_2,
- build_relation_2,
- probe_relation_2,
- build_attribute_id_2,
- probe_attribute_id_2,
- join_hash_table_index_2);
- addDummyHashJoinInfo(execution_heuristics_.get(),
- build_operator_index_3,
- probe_operator_index_3,
- build_relation_3,
- probe_relation_3,
- build_attribute_id_3,
- probe_attribute_id_3,
- join_hash_table_index_3);
-
- execution_heuristics_->optimizeExecutionPlan(query_plan_.get(), query_context_proto_.get());
-
- // Test that no bloom filters were added.
- EXPECT_EQ(0, query_context_proto_->join_hash_tables(0).build_side_bloom_filter_id_size());
- EXPECT_EQ(0, query_context_proto_->join_hash_tables(1).build_side_bloom_filter_id_size());
- EXPECT_EQ(0, query_context_proto_->join_hash_tables(2).build_side_bloom_filter_id_size());
- EXPECT_EQ(0, query_context_proto_->join_hash_tables(0).probe_side_bloom_filters_size());
-
- // Test that the DAG was not modified at all.
- // Probe operator 1 should not have build operator 1 and build operator 2 added as dependencies.
- auto probe_node_dependencies = query_plan_->getQueryPlanDAG().getDependencies(probe_operator_index_1);
- EXPECT_EQ(0u, probe_node_dependencies.count(build_operator_index_2));
- EXPECT_EQ(0u, probe_node_dependencies.count(build_operator_index_3));
-}
-
-} // namespace optimizer
-} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/third_party/iwyu/iwyu_helper.py
----------------------------------------------------------------------
diff --git a/third_party/iwyu/iwyu_helper.py b/third_party/iwyu/iwyu_helper.py
index dff4d55..9c34a46 100755
--- a/third_party/iwyu/iwyu_helper.py
+++ b/third_party/iwyu/iwyu_helper.py
@@ -154,7 +154,7 @@ def GetConfiguration():
# Run IWYU for the header/source specified in `path' and the specified IWYU
# configuration.
def RunIWYU(path, config):
- args = [IWYU_CMD]
+ args = [IWYU_CMD, '-ferror-limit=0']
if config.has_key(CFG_MAPPINGS):
args += list(itertools.chain.from_iterable(
map(MappingArg, config[CFG_MAPPINGS])))
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index ddaae45..395e264 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -156,6 +156,8 @@ QS_PROTOBUF_GENERATE_CPP(quickstep_utility_SortConfiguration_proto_srcs
quickstep_utility_SortConfiguration_proto_hdrs
SortConfiguration.proto)
+add_subdirectory(lip_filter)
+
# Declare micro-libs:
add_library(quickstep_utility_Alignment ../empty_src.cpp Alignment.hpp)
add_library(quickstep_utility_BitManipulation ../empty_src.cpp BitManipulation.hpp)
@@ -168,6 +170,7 @@ add_library(quickstep_utility_CalculateInstalledMemory CalculateInstalledMemory.
add_library(quickstep_utility_Cast ../empty_src.cpp Cast.hpp)
add_library(quickstep_utility_CheckSnprintf ../empty_src.cpp CheckSnprintf.hpp)
add_library(quickstep_utility_DAG ../empty_src.cpp DAG.hpp)
+add_library(quickstep_utility_DisjointTreeForest ../empty_src.cpp DisjointTreeForest.hpp)
add_library(quickstep_utility_EqualsAnyConstant ../empty_src.cpp EqualsAnyConstant.hpp)
add_library(quickstep_utility_ExecutionDAGVisualizer
ExecutionDAGVisualizer.cpp
@@ -230,6 +233,8 @@ target_link_libraries(quickstep_utility_CheckSnprintf
target_link_libraries(quickstep_utility_DAG
glog
quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_DisjointTreeForest
+ glog)
target_link_libraries(quickstep_utility_ExecutionDAGVisualizer
quickstep_catalog_CatalogRelationSchema
quickstep_queryexecution_QueryExecutionTypedefs
@@ -253,7 +258,9 @@ target_link_libraries(quickstep_utility_PlanVisualizer
quickstep_catalog_CatalogRelation
quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExprId
quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_LIPFilterConfiguration
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_physical_PhysicalType
quickstep_queryoptimizer_physical_TableReference
@@ -319,6 +326,7 @@ target_link_libraries(quickstep_utility
quickstep_utility_Cast
quickstep_utility_CheckSnprintf
quickstep_utility_DAG
+ quickstep_utility_DisjointTreeForest
quickstep_utility_EqualsAnyConstant
quickstep_utility_ExecutionDAGVisualizer
quickstep_utility_Glob
@@ -375,6 +383,13 @@ target_link_libraries(DAG_unittest
${LIBS})
add_test(DAG_unittest DAG_unittest)
+add_executable(DisjointTreeForest_unittest "${CMAKE_CURRENT_SOURCE_DIR}/tests/DisjointTreeForest_unittest.cpp")
+target_link_libraries(DisjointTreeForest_unittest
+ gtest
+ gtest_main
+ quickstep_utility_DisjointTreeForest)
+add_test(DisjointTreeForest_unittest DisjointTreeForest_unittest)
+
add_executable(EqualsAnyConstant_unittest "${CMAKE_CURRENT_SOURCE_DIR}/tests/EqualsAnyConstant_unittest.cpp")
target_link_libraries(EqualsAnyConstant_unittest
gtest
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/utility/DisjointTreeForest.hpp
----------------------------------------------------------------------
diff --git a/utility/DisjointTreeForest.hpp b/utility/DisjointTreeForest.hpp
new file mode 100644
index 0000000..971ba10
--- /dev/null
+++ b/utility/DisjointTreeForest.hpp
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_DISJOINT_TREE_FOREST_HPP_
+#define QUICKSTEP_UTILITY_DISJOINT_TREE_FOREST_HPP_
+
+#include <algorithm>
+#include <cstddef>
+#include <unordered_map>
+#include <vector>
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ * @{
+ */
+
+/**
+ * @brief Disjoint sets implemented with tree data structures so that the
+ * union/find operations have nearly O(1) time complexity.
+ */
+template <typename ElementT,
+ class MapperT = std::unordered_map<ElementT, std::size_t>>
+class DisjointTreeForest {
+ public:
+ /**
+ * @brief Whether the given element is in a subset.
+ *
+ * @param element The element.
+ * @return True if the element is in a subset.
+ */
+ bool hasElement(const ElementT &element) const {
+ return elements_map_.find(element) != elements_map_.end();
+ }
+
+ /**
+ * @brief If the given element is not in any subset yet, make a singleton
+ * subset for it. Otherwise do nothing.
+ *
+ * @param element The element.
+ */
+ void makeSet(const ElementT &element) {
+ if (!hasElement(element)) {
+ std::size_t loc = nodes_.size();
+ nodes_.emplace_back(0, loc);
+ elements_map_.emplace(element, loc);
+ }
+ }
+
+ /**
+ * @brief Find the subset id for the given element.
+ *
+ * @param element The element.
+ */
+ std::size_t find(const ElementT &element) {
+ DCHECK(hasElement(element));
+
+ const std::size_t node_id = elements_map_.at(element);
+ std::size_t root_id = node_id;
+ std::size_t parent_id;
+ while ((parent_id = nodes_[root_id].parent) != root_id) {
+ root_id = parent_id;
+ }
+ compress_path(node_id, root_id);
+ return root_id;
+ }
+
+ /**
+ * @brief Union the two subsets that the two given elements belong to.
+ *
+ * @param element1 The first element.
+ * @param element2 The second element.
+ */
+ void merge(const ElementT &element1, const ElementT &element2) {
+ std::size_t root_id1 = find(element1);
+ std::size_t root_id2 = find(element2);
+ if (root_id1 != root_id2) {
+ Node &n1 = nodes_[root_id1];
+ Node &n2 = nodes_[root_id2];
+ if (n1.rank > n2.rank) {
+ n2.parent = root_id1;
+ } else if (n1.rank < n2.rank) {
+ n1.parent = root_id2;
+ } else {
+ n1.parent = root_id2;
+ n2.rank += 1;
+ }
+ }
+ }
+
+ /**
+ * @brief Whether the two given elements are in the same subset.
+ *
+ * @param element1 The first element.
+ * @param element2 The second element.
+ * @return True if the twos elements are in the same subset, false otherwise.
+ */
+ bool isConnected(const ElementT &element1, const ElementT &element2) {
+ return find(element1) == find(element2);
+ }
+
+ private:
+ struct Node {
+ Node(const std::size_t rank_in, const std::size_t parent_in)
+ : rank(rank_in), parent(parent_in) {
+ }
+ std::size_t rank;
+ std::size_t parent;
+ };
+
+ inline void compress_path(const std::size_t leaf_node_id,
+ const std::size_t root_node_id) {
+ std::size_t node_id = leaf_node_id;
+ std::size_t max_rank = 0;
+ while (node_id != root_node_id) {
+ const Node &node = nodes_[node_id];
+ max_rank = std::max(max_rank, node.rank);
+
+ const std::size_t parent_id = node.parent;
+ nodes_[node_id].parent = root_node_id;
+ node_id = parent_id;
+ }
+ nodes_[root_node_id].rank = max_rank + 1;
+ }
+
+ std::vector<Node> nodes_;
+ MapperT elements_map_;
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_UTILITY_DISJOINT_TREE_FOREST_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/utility/PlanVisualizer.cpp
----------------------------------------------------------------------
diff --git a/utility/PlanVisualizer.cpp b/utility/PlanVisualizer.cpp
index 50cf7f0..2adf674 100644
--- a/utility/PlanVisualizer.cpp
+++ b/utility/PlanVisualizer.cpp
@@ -21,15 +21,16 @@
#include <cstddef>
#include <memory>
+#include <set>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>
#include "catalog/CatalogRelation.hpp"
-
#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/PhysicalType.hpp"
@@ -47,9 +48,12 @@ namespace C = ::quickstep::optimizer::cost;
std::string PlanVisualizer::visualize(const P::PhysicalPtr &input) {
DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+ const P::TopLevelPlanPtr top_level_plan =
+ std::static_pointer_cast<const P::TopLevelPlan>(input);
cost_model_.reset(
new C::StarSchemaSimpleCostModel(
- std::static_pointer_cast<const P::TopLevelPlan>(input)->shared_subplans()));
+ top_level_plan->shared_subplans()));
+ lip_filter_conf_ = top_level_plan->lip_filter_configuration();
color_map_["TableReference"] = "skyblue";
color_map_["Selection"] = "#90EE90";
@@ -86,6 +90,9 @@ std::string PlanVisualizer::visualize(const P::PhysicalPtr &input) {
for (const EdgeInfo &edge_info : edges_) {
graph_oss << " " << edge_info.src_node_id << " -> "
<< edge_info.dst_node_id << " [";
+ if (edge_info.dashed) {
+ graph_oss << "style=dashed ";
+ }
if (!edge_info.labels.empty()) {
graph_oss << "label=\""
<< EscapeSpecialChars(JoinToString(edge_info.labels, " "))
@@ -103,6 +110,10 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
int node_id = ++id_counter_;
node_id_map_.emplace(input, node_id);
+ std::set<E::ExprId> referenced_ids;
+ for (const auto &attr : input->getReferencedAttributes()) {
+ referenced_ids.emplace(attr->id());
+ }
for (const auto &child : input->children()) {
visit(child);
@@ -112,12 +123,18 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
EdgeInfo &edge_info = edges_.back();
edge_info.src_node_id = child_id;
edge_info.dst_node_id = node_id;
+ edge_info.dashed = false;
- // Print output attributes except for TableReference -- there are just too many
- // attributes out of TableReference.
- if (child->getPhysicalType() != P::PhysicalType::kTableReference) {
- for (const auto &attr : child->getOutputAttributes()) {
- edge_info.labels.emplace_back(attr->attribute_alias());
+ if (input->getPhysicalType() == P::PhysicalType::kHashJoin &&
+ child == input->children()[1]) {
+ edge_info.dashed = true;
+ }
+
+ for (const auto &attr : child->getOutputAttributes()) {
+ if (referenced_ids.find(attr->id()) != referenced_ids.end()) {
+ edge_info.labels.emplace_back(
+ attr->attribute_alias() + ", est # distinct = " +
+ std::to_string(cost_model_->estimateNumDistinctValues(attr->id(), child)));
}
}
}
@@ -154,6 +171,26 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
break;
}
}
+
+ if (lip_filter_conf_ != nullptr) {
+ const auto &build_filters = lip_filter_conf_->getBuildInfoMap();
+ const auto build_it = build_filters.find(input);
+ if (build_it != build_filters.end()) {
+ for (const auto &build_info : build_it->second) {
+ node_info.labels.emplace_back(
+ std::string("[LIP build] ") + build_info.build_attribute->attribute_alias());
+ }
+ }
+ const auto &probe_filters = lip_filter_conf_->getProbeInfoMap();
+ const auto probe_it = probe_filters.find(input);
+ if (probe_it != probe_filters.end()) {
+ for (const auto &probe_info : probe_it->second) {
+ node_info.labels.emplace_back(
+ std::string("[LIP probe] ") + probe_info.probe_attribute->attribute_alias());
+ }
+ }
+ }
+
node_info.labels.emplace_back(
"est. # = " + std::to_string(cost_model_->estimateCardinality(input)));
node_info.labels.emplace_back(
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/utility/PlanVisualizer.hpp
----------------------------------------------------------------------
diff --git a/utility/PlanVisualizer.hpp b/utility/PlanVisualizer.hpp
index 1c0df77..9b8b0db 100644
--- a/utility/PlanVisualizer.hpp
+++ b/utility/PlanVisualizer.hpp
@@ -26,6 +26,7 @@
#include <vector>
#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "utility/Macros.hpp"
@@ -73,6 +74,7 @@ class PlanVisualizer {
int src_node_id;
int dst_node_id;
std::vector<std::string> labels;
+ bool dashed;
};
void visit(const optimizer::physical::PhysicalPtr &input);
@@ -85,6 +87,7 @@ class PlanVisualizer {
std::vector<EdgeInfo> edges_;
std::unique_ptr<optimizer::cost::StarSchemaSimpleCostModel> cost_model_;
+ optimizer::physical::LIPFilterConfigurationPtr lip_filter_conf_;
DISALLOW_COPY_AND_ASSIGN(PlanVisualizer);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/utility/lip_filter/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/lip_filter/CMakeLists.txt b/utility/lip_filter/CMakeLists.txt
new file mode 100644
index 0000000..2232abe
--- /dev/null
+++ b/utility/lip_filter/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Declare micro-libs:
+add_library(quickstep_utility_lipfilter_LIPFilter ../../empty_src.cpp LIPFilter.hpp)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/utility/lip_filter/LIPFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilter.hpp b/utility/lip_filter/LIPFilter.hpp
new file mode 100644
index 0000000..33165ed
--- /dev/null
+++ b/utility/lip_filter/LIPFilter.hpp
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_HPP_
+#define QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_HPP_
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ * @{
+ */
+
+enum class LIPFilterType {
+ kBloomFilter,
+ kExactFilter,
+ kSingleIdentityHashFilter
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/utility/tests/DisjointTreeForest_unittest.cpp
----------------------------------------------------------------------
diff --git a/utility/tests/DisjointTreeForest_unittest.cpp b/utility/tests/DisjointTreeForest_unittest.cpp
new file mode 100644
index 0000000..2e12fad
--- /dev/null
+++ b/utility/tests/DisjointTreeForest_unittest.cpp
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "utility/DisjointTreeForest.hpp"
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+namespace quickstep {
+
+TEST(DisjointTreeForestTest, IntTest) {
+ DisjointTreeForest<int> forest;
+ for (int i = 10; i < 20; ++i) {
+ forest.makeSet(i);
+ }
+
+ for (int i = 10; i < 20; i += 2) {
+ EXPECT_NE(forest.find(i), forest.find(i+1));
+ EXPECT_FALSE(forest.isConnected(i, i+1));
+
+ forest.merge(i, i+1);
+ EXPECT_EQ(forest.find(i), forest.find(i+1));
+ EXPECT_TRUE(forest.isConnected(i, i+1));
+
+ forest.merge(i+1, i);
+ EXPECT_EQ(forest.find(i), forest.find(i+1));
+ EXPECT_TRUE(forest.isConnected(i, i+1));
+ }
+
+ for (int i = 12; i < 20; i += 2) {
+ EXPECT_NE(forest.find(i), forest.find(i-1));
+ EXPECT_FALSE(forest.isConnected(i, i-1));
+ }
+
+ forest.merge(10, 17);
+ forest.merge(11, 18);
+ EXPECT_EQ(forest.find(11), forest.find(16));
+ EXPECT_EQ(forest.find(10), forest.find(19));
+ EXPECT_NE(forest.find(10), forest.find(12));
+ EXPECT_NE(forest.find(15), forest.find(17));
+
+ forest.merge(12, 14);
+ forest.merge(15, 16);
+ const std::size_t id = forest.find(10);
+ for (int i = 10; i < 20; ++i) {
+ EXPECT_EQ(forest.find(i), id);
+ }
+}
+
+TEST(DisjointTreeForestTest, StringTest) {
+ DisjointTreeForest<std::string> forest;
+ const std::vector<std::string> elements = { "aaa", "bbb", "ccc", "ddd" };
+ for (const std::string &element : elements) {
+ forest.makeSet(element);
+ }
+
+ EXPECT_NE(forest.find("aaa"), forest.find("bbb"));
+ forest.merge("aaa", "bbb");
+ EXPECT_EQ(forest.find("aaa"), forest.find("bbb"));
+
+ EXPECT_NE(forest.find("ccc"), forest.find("ddd"));
+ forest.merge("ccc", "ddd");
+ EXPECT_EQ(forest.find("ccc"), forest.find("ddd"));
+
+ EXPECT_NE(forest.find("aaa"), forest.find("ccc"));
+ EXPECT_NE(forest.find("aaa"), forest.find("ddd"));
+ EXPECT_NE(forest.find("bbb"), forest.find("ccc"));
+ EXPECT_NE(forest.find("bbb"), forest.find("ddd"));
+
+ forest.merge("aaa", "ddd");
+ for (const std::string &e1 : elements) {
+ for (const std::string &e2 : elements) {
+ EXPECT_EQ(forest.find(e1), forest.find(e2));
+ EXPECT_TRUE(forest.isConnected(e1, e2));
+ }
+ }
+}
+
+} // namespace quickstep
[2/2] incubator-quickstep git commit: Optimizer changes for the
LIPFilter feature.
Posted by ji...@apache.org.
Optimizer changes for the LIPFilter feature.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/8740f50a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/8740f50a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/8740f50a
Branch: refs/heads/lip-refactor-optimizer
Commit: 8740f50a710d0390155dc3a9a68dbbc2abe2cb7f
Parents: 17ffbb0
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Wed Sep 7 13:20:43 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Wed Oct 12 18:23:29 2016 -0500
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 12 +-
query_optimizer/ExecutionGenerator.cpp | 17 -
query_optimizer/ExecutionGenerator.hpp | 5 +-
query_optimizer/ExecutionHeuristics.cpp | 129 --------
query_optimizer/ExecutionHeuristics.hpp | 157 ----------
query_optimizer/PhysicalGenerator.cpp | 18 +-
.../cost_model/StarSchemaSimpleCostModel.cpp | 2 +-
query_optimizer/physical/CMakeLists.txt | 7 +
.../physical/LIPFilterConfiguration.hpp | 171 ++++++++++
query_optimizer/physical/TopLevelPlan.hpp | 43 ++-
query_optimizer/rules/AttachLIPFilters.cpp | 248 +++++++++++++++
query_optimizer/rules/AttachLIPFilters.hpp | 151 +++++++++
query_optimizer/rules/CMakeLists.txt | 19 ++
.../StarSchemaHashJoinOrderOptimization.cpp | 273 ++++++++++------
.../StarSchemaHashJoinOrderOptimization.hpp | 118 +++++--
query_optimizer/tests/CMakeLists.txt | 16 -
.../tests/ExecutionHeuristics_unittest.cpp | 311 -------------------
third_party/iwyu/iwyu_helper.py | 2 +-
utility/CMakeLists.txt | 15 +
utility/DisjointTreeForest.hpp | 152 +++++++++
utility/PlanVisualizer.cpp | 51 ++-
utility/PlanVisualizer.hpp | 3 +
utility/lip_filter/CMakeLists.txt | 19 ++
utility/lip_filter/LIPFilter.hpp | 39 +++
utility/tests/DisjointTreeForest_unittest.cpp | 98 ++++++
25 files changed, 1278 insertions(+), 798 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 988ffd8..fa9141c 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -41,7 +41,6 @@ add_subdirectory(tests)
# Declare micro-libs:
add_library(quickstep_queryoptimizer_ExecutionGenerator ExecutionGenerator.cpp ExecutionGenerator.hpp)
-add_library(quickstep_queryoptimizer_ExecutionHeuristics ExecutionHeuristics.cpp ExecutionHeuristics.hpp)
add_library(quickstep_queryoptimizer_LogicalGenerator LogicalGenerator.cpp LogicalGenerator.hpp)
add_library(quickstep_queryoptimizer_LogicalToPhysicalMapper
../empty_src.cpp
@@ -73,7 +72,6 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
quickstep_expressions_windowaggregation_WindowAggregateFunction_proto
quickstep_queryexecution_QueryContext
quickstep_queryexecution_QueryContext_proto
- quickstep_queryoptimizer_ExecutionHeuristics
quickstep_queryoptimizer_OptimizerContext
quickstep_queryoptimizer_QueryHandle
quickstep_queryoptimizer_QueryPlan
@@ -153,14 +151,6 @@ if (ENABLE_DISTRIBUTED)
target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
quickstep_catalog_Catalog_proto)
endif()
-target_link_libraries(quickstep_queryoptimizer_ExecutionHeuristics
- glog
- quickstep_catalog_CatalogRelation
- quickstep_catalog_CatalogTypedefs
- quickstep_queryexecution_QueryContext
- quickstep_queryexecution_QueryContext_proto
- quickstep_queryoptimizer_QueryPlan
- quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_LogicalGenerator
glog
quickstep_parser_ParseStatement
@@ -196,6 +186,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_LogicalToPhysicalMapper
quickstep_queryoptimizer_logical_Logical
quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_rules_AttachLIPFilters
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
quickstep_queryoptimizer_rules_SwapProbeBuild
@@ -233,7 +224,6 @@ target_link_libraries(quickstep_queryoptimizer_Validator
add_library(quickstep_queryoptimizer ../empty_src.cpp QueryOptimizerModule.hpp)
target_link_libraries(quickstep_queryoptimizer
quickstep_queryoptimizer_ExecutionGenerator
- quickstep_queryoptimizer_ExecutionHeuristics
quickstep_queryoptimizer_LogicalGenerator
quickstep_queryoptimizer_LogicalToPhysicalMapper
quickstep_queryoptimizer_Optimizer
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 9347c9c..29dec38 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -54,7 +54,6 @@
#include "expressions/window_aggregation/WindowAggregateFunction.pb.h"
#include "query_execution/QueryContext.hpp"
#include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/ExecutionHeuristics.hpp"
#include "query_optimizer/OptimizerContext.hpp"
#include "query_optimizer/QueryHandle.hpp"
#include "query_optimizer/QueryPlan.hpp"
@@ -211,11 +210,6 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) {
temporary_relation_info.producer_operator_index);
}
- // Optimize execution plan based on heuristics captured during execution plan generation, if enabled.
- if (FLAGS_optimize_joins) {
- execution_heuristics_->optimizeExecutionPlan(execution_plan_, query_context_proto_);
- }
-
#ifdef QUICKSTEP_DISTRIBUTED
catalog_database_cache_proto_->set_name(catalog_database_->getName());
@@ -828,17 +822,6 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
std::forward_as_tuple(join_operator_index,
output_relation));
temporary_relation_info_vec_.emplace_back(join_operator_index, output_relation);
-
- // Add heuristics for the Hash Join, if enabled.
- if (FLAGS_optimize_joins && !skip_hash_join_optimization) {
- execution_heuristics_->addHashJoinInfo(build_operator_index,
- join_operator_index,
- referenced_stored_build_relation,
- referenced_stored_probe_relation,
- std::move(build_original_attribute_ids),
- std::move(probe_original_attribute_ids),
- join_hash_table_index);
- }
}
void ExecutionGenerator::convertNestedLoopsJoin(
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp
index 2aaf5ab..495955e 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -33,7 +33,6 @@
#include "catalog/CatalogTypedefs.hpp"
#include "query_execution/QueryContext.hpp"
#include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/ExecutionHeuristics.hpp"
#include "query_optimizer/QueryHandle.hpp"
#include "query_optimizer/QueryPlan.hpp"
#include "query_optimizer/cost_model/CostModel.hpp"
@@ -102,8 +101,7 @@ class ExecutionGenerator {
: catalog_database_(DCHECK_NOTNULL(catalog_database)),
query_handle_(DCHECK_NOTNULL(query_handle)),
execution_plan_(DCHECK_NOTNULL(query_handle->getQueryPlanMutable())),
- query_context_proto_(DCHECK_NOTNULL(query_handle->getQueryContextProtoMutable())),
- execution_heuristics_(new ExecutionHeuristics()) {
+ query_context_proto_(DCHECK_NOTNULL(query_handle->getQueryContextProtoMutable())) {
query_context_proto_->set_query_id(query_handle_->query_id());
#ifdef QUICKSTEP_DISTRIBUTED
catalog_database_cache_proto_ = DCHECK_NOTNULL(query_handle->getCatalogDatabaseCacheProtoMutable());
@@ -386,7 +384,6 @@ class ExecutionGenerator {
QueryHandle *query_handle_;
QueryPlan *execution_plan_; // A part of QueryHandle.
serialization::QueryContext *query_context_proto_; // A part of QueryHandle.
- std::unique_ptr<ExecutionHeuristics> execution_heuristics_;
#ifdef QUICKSTEP_DISTRIBUTED
serialization::CatalogDatabase *catalog_database_cache_proto_; // A part of QueryHandle.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/ExecutionHeuristics.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.cpp b/query_optimizer/ExecutionHeuristics.cpp
deleted file mode 100644
index 4fd7320..0000000
--- a/query_optimizer/ExecutionHeuristics.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- **/
-
-#include "query_optimizer/ExecutionHeuristics.hpp"
-
-#include <cstddef>
-#include <utility>
-#include <unordered_map>
-#include <vector>
-
-#include "catalog/CatalogTypedefs.hpp"
-#include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/QueryPlan.hpp"
-#include "utility/Macros.hpp"
-
-#include "glog/logging.h"
-
-namespace quickstep {
-namespace optimizer {
-
-void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
- serialization::QueryContext *query_context_proto) {
- // Currently this only optimizes left deep joins using bloom filters.
- // It uses a simple algorithm to discover the left deep joins.
- // It starts with the first hash join in the plan and keeps on iterating
- // over the next hash joins, till a probe on a different relation id is found.
- // The set of hash joins found in this way forms a chain and can be recognized
- // as a left deep join. It becomes a candidate for optimization.
-
- // The optimization is done by modifying each of the build operators in the chain
- // to generate a bloom filter on the build key during their hash table creation.
- // The leaf-level probe operator is then modified to query all the bloom
- // filters generated from all the build operators in the chain. These
- // bloom filters are queried to test the membership of the probe key
- // just prior to probing the hash table.
-
- QueryPlan::DAGNodeIndex origin_node = 0;
- while (origin_node < hash_joins_.size() - 1) {
- std::vector<std::size_t> chained_nodes;
- chained_nodes.push_back(origin_node);
- for (std::size_t i = origin_node + 1; i < hash_joins_.size(); ++i) {
- const relation_id checked_relation_id = hash_joins_[origin_node].referenced_stored_probe_relation_->getID();
- const relation_id expected_relation_id = hash_joins_[i].referenced_stored_probe_relation_->getID();
- if (checked_relation_id == expected_relation_id) {
- chained_nodes.push_back(i);
- } else {
- break;
- }
- }
-
- // Only chains of length greater than one are suitable candidates for semi-join optimization.
- if (chained_nodes.size() > 1) {
- std::unordered_map<QueryContext::bloom_filter_id, std::vector<attribute_id>> probe_bloom_filter_info;
- for (const std::size_t node : chained_nodes) {
- // Provision for a new bloom filter to be used by the build operator.
- const QueryContext::bloom_filter_id bloom_filter_id = query_context_proto->bloom_filters_size();
- serialization::BloomFilter *bloom_filter_proto = query_context_proto->add_bloom_filters();
-
- // Modify the bloom filter properties based on the statistics of the relation.
- setBloomFilterProperties(bloom_filter_proto, hash_joins_[node].referenced_stored_build_relation_);
-
- // Add build-side bloom filter information to the corresponding hash table proto.
- query_context_proto->mutable_join_hash_tables(hash_joins_[node].join_hash_table_id_)
- ->add_build_side_bloom_filter_id(bloom_filter_id);
-
- probe_bloom_filter_info.insert(std::make_pair(bloom_filter_id, hash_joins_[node].probe_attributes_));
- }
-
- // Add probe-side bloom filter information to the corresponding hash table proto for each build-side bloom filter.
- for (const std::pair<QueryContext::bloom_filter_id, std::vector<attribute_id>>
- &bloom_filter_info : probe_bloom_filter_info) {
- auto *probe_side_bloom_filter =
- query_context_proto->mutable_join_hash_tables(hash_joins_[origin_node].join_hash_table_id_)
- ->add_probe_side_bloom_filters();
- probe_side_bloom_filter->set_probe_side_bloom_filter_id(bloom_filter_info.first);
- for (const attribute_id &probe_attribute_id : bloom_filter_info.second) {
- probe_side_bloom_filter->add_probe_side_attr_ids(probe_attribute_id);
- }
- }
-
- // Add node dependencies from chained build nodes to origin node probe.
- for (std::size_t i = 1; i < chained_nodes.size(); ++i) { // Note: It starts from index 1.
- query_plan->addDirectDependency(hash_joins_[origin_node].join_operator_index_,
- hash_joins_[origin_node + i].build_operator_index_,
- true /* is_pipeline_breaker */);
- }
- }
-
- // Update the origin node.
- origin_node = chained_nodes.back() + 1;
- }
-}
-
-void ExecutionHeuristics::setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
- const CatalogRelation *relation) {
- const std::size_t cardinality = relation->estimateTupleCardinality();
- if (cardinality < kOneThousand) {
- bloom_filter_proto->set_bloom_filter_size(kOneThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kVeryLowSparsityHash);
- } else if (cardinality < kTenThousand) {
- bloom_filter_proto->set_bloom_filter_size(kTenThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kLowSparsityHash);
- } else if (cardinality < kHundredThousand) {
- bloom_filter_proto->set_bloom_filter_size(kHundredThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kMediumSparsityHash);
- } else {
- bloom_filter_proto->set_bloom_filter_size(kMillion / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kHighSparsityHash);
- }
-}
-
-} // namespace optimizer
-} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/ExecutionHeuristics.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.hpp b/query_optimizer/ExecutionHeuristics.hpp
deleted file mode 100644
index 8ad3b7a..0000000
--- a/query_optimizer/ExecutionHeuristics.hpp
+++ /dev/null
@@ -1,157 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- **/
-
-#ifndef QUICKSTEP_QUERY_OPTIMIZER_EXECUTION_HEURISTICS_HPP_
-#define QUICKSTEP_QUERY_OPTIMIZER_EXECUTION_HEURISTICS_HPP_
-
-#include <vector>
-
-#include "catalog/CatalogRelation.hpp"
-#include "catalog/CatalogTypedefs.hpp"
-#include "query_execution/QueryContext.hpp"
-#include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/QueryPlan.hpp"
-#include "utility/Macros.hpp"
-
-#include "glog/logging.h"
-
-namespace quickstep {
-namespace optimizer {
-
-/** \addtogroup QueryOptimizer
- * @{
- */
-
-/**
- * @brief The ExecutionHeuristics compiles certain heuristics for an execution plan
- * as it is being converted to a physical plan. These heuristics can then be
- * used to optimize the execution plan after it has been generated.
- **/
-class ExecutionHeuristics {
- public:
- static const std::size_t kOneHundred = 100;
- static const std::size_t kOneThousand = 1000;
- static const std::size_t kTenThousand = 10000;
- static const std::size_t kHundredThousand = 100000;
- static const std::size_t kMillion = 1000000;
-
- static const std::size_t kCompressionFactor = 10;
-
- static const std::size_t kVeryLowSparsityHash = 1;
- static const std::size_t kLowSparsityHash = 2;
- static const std::size_t kMediumSparsityHash = 5;
- static const std::size_t kHighSparsityHash = 10;
-
- /**
- * @brief A simple internal class that holds information about various
- * hash joins within the execution plan for a query.
- **/
- struct HashJoinInfo {
- HashJoinInfo(const QueryPlan::DAGNodeIndex build_operator_index,
- const QueryPlan::DAGNodeIndex join_operator_index,
- const CatalogRelation *referenced_stored_build_relation,
- const CatalogRelation *referenced_stored_probe_relation,
- std::vector<attribute_id> &&build_attributes,
- std::vector<attribute_id> &&probe_attributes,
- const QueryContext::join_hash_table_id join_hash_table_id)
- : build_operator_index_(build_operator_index),
- join_operator_index_(join_operator_index),
- referenced_stored_build_relation_(referenced_stored_build_relation),
- referenced_stored_probe_relation_(referenced_stored_probe_relation),
- build_attributes_(std::move(build_attributes)),
- probe_attributes_(std::move(probe_attributes)),
- join_hash_table_id_(join_hash_table_id) {
- }
-
- const QueryPlan::DAGNodeIndex build_operator_index_;
- const QueryPlan::DAGNodeIndex join_operator_index_;
- const CatalogRelation *referenced_stored_build_relation_;
- const CatalogRelation *referenced_stored_probe_relation_;
- const std::vector<attribute_id> build_attributes_;
- const std::vector<attribute_id> probe_attributes_;
- const QueryContext::join_hash_table_id join_hash_table_id_;
- };
-
-
- /**
- * @brief Constructor.
- **/
- ExecutionHeuristics() {}
-
- /**
- * @brief Saves information about a hash join used within the execution plan
- * for a query.
- *
- * @param build_operator_index Index of the build operator of the hash join.
- * @param join_operator_index Index of the join operator of the hash join.
- * @param build_relation_id Id of the relation on which hash table is being built.
- * @param probe_relation_id Id of the relation on which hash table is being probed.
- * @param build_attributes List of attributes on which hash table is being built.
- * @param probe_attributes List of attributes on which hash table is being probed.
- * @param join_hash_table_id Id of the hash table which refers to the actual hash
- * table within the query context.
- **/
- inline void addHashJoinInfo(const QueryPlan::DAGNodeIndex build_operator_index,
- const QueryPlan::DAGNodeIndex join_operator_index,
- const CatalogRelation *referenced_stored_build_relation,
- const CatalogRelation *referenced_stored_probe_relation,
- std::vector<attribute_id> &&build_attributes,
- std::vector<attribute_id> &&probe_attributes,
- const QueryContext::join_hash_table_id join_hash_table_id) {
- hash_joins_.push_back(HashJoinInfo(build_operator_index,
- join_operator_index,
- referenced_stored_build_relation,
- referenced_stored_probe_relation,
- std::move(build_attributes),
- std::move(probe_attributes),
- join_hash_table_id));
- }
-
- /**
- * @brief Optimize the execution plan based on heuristics generated
- * during physical plan to execution plan conversion.
- *
- * @param query_plan A mutable reference to the query execution plan.
- * @param query_context_proto A mutable reference to the protobuf representation
- * of the query context.
- **/
- void optimizeExecutionPlan(QueryPlan *query_plan, serialization::QueryContext *query_context_proto);
-
- /**
- * @brief Set the properties of the bloom filter proto based on the statistics
- * of the given relation.
- *
- * @param bloom_filter_proto A mutable reference to the bloom filter protobuf representation.
- * @param relation The catalog relation on which bloom filter is being built.
- **/
- void setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
- const CatalogRelation *relation);
-
- private:
- std::vector<HashJoinInfo> hash_joins_;
-
- DISALLOW_COPY_AND_ASSIGN(ExecutionHeuristics);
-};
-
-/** @} */
-
-} // namespace optimizer
-} // namespace quickstep
-
-#endif /* QUICKSTEP_QUERY_OPTIMIZER_EXECUTION_HEURISTICS_HPP_ */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 8f19702..9db4037 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -26,6 +26,7 @@
#include "query_optimizer/Validator.hpp"
#include "query_optimizer/logical/Logical.hpp"
#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/AttachLIPFilters.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
#include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
#include "query_optimizer/rules/SwapProbeBuild.hpp"
@@ -49,6 +50,12 @@ DEFINE_bool(reorder_hash_joins, true,
"cardinality and selective tables to be joined first, which is suitable "
"for queries on star-schema tables.");
+DEFINE_bool(use_lip_filters, false,
+ "If true, use LIP (Lookahead Information Passing) filters to accelerate "
+ "query processing. LIP filters are effective for queries on star schema "
+ "tables (e.g. the SSB benchmark) and snowflake schema tables (e.g. the "
+ "TPC-H benchmark).");
+
DEFINE_bool(visualize_plan, false,
"If true, visualize the final physical plan into a graph in DOT format "
"(DOT is a plain text graph description language). Then print the "
@@ -95,11 +102,16 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan(
P::PhysicalPtr PhysicalGenerator::optimizePlan() {
std::vector<std::unique_ptr<Rule<P::Physical>>> rules;
+ rules.emplace_back(new PruneColumns());
if (FLAGS_reorder_hash_joins) {
rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
+ rules.emplace_back(new PruneColumns());
+ } else {
+ rules.emplace_back(new SwapProbeBuild());
+ }
+ if (FLAGS_use_lip_filters) {
+ rules.emplace_back(new AttachLIPFilters());
}
- rules.emplace_back(new PruneColumns());
- rules.emplace_back(new SwapProbeBuild());
for (std::unique_ptr<Rule<P::Physical>> &rule : rules) {
physical_plan_ = rule->apply(physical_plan_);
@@ -110,7 +122,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
DVLOG(4) << "Optimized physical plan:\n" << physical_plan_->toString();
if (FLAGS_visualize_plan) {
- quickstep::PlanVisualizer plan_visualizer;
+ quickstep::PlanVisualizer plan_visualizer;
std::cerr << "\n" << plan_visualizer.visualize(physical_plan_) << "\n";
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index 8d254fa..1075739 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -358,7 +358,7 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
std::static_pointer_cast<const E::LogicalAnd>(filter_predicate);
double selectivity = 1.0;
for (const auto &predicate : logical_and->operands()) {
- selectivity = selectivity * estimateSelectivityForPredicate(predicate, physical_plan);
+ selectivity = std::min(selectivity, estimateSelectivityForPredicate(predicate, physical_plan));
}
return selectivity;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/physical/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/CMakeLists.txt b/query_optimizer/physical/CMakeLists.txt
index 3b7d3f0..5c2cd0b 100644
--- a/query_optimizer/physical/CMakeLists.txt
+++ b/query_optimizer/physical/CMakeLists.txt
@@ -27,6 +27,7 @@ add_library(quickstep_queryoptimizer_physical_HashJoin HashJoin.cpp HashJoin.hpp
add_library(quickstep_queryoptimizer_physical_InsertSelection InsertSelection.cpp InsertSelection.hpp)
add_library(quickstep_queryoptimizer_physical_InsertTuple InsertTuple.cpp InsertTuple.hpp)
add_library(quickstep_queryoptimizer_physical_Join ../../empty_src.cpp Join.hpp)
+add_library(quickstep_queryoptimizer_physical_LIPFilterConfiguration ../../empty_src.cpp LIPFilterConfiguration.hpp)
add_library(quickstep_queryoptimizer_physical_NestedLoopsJoin NestedLoopsJoin.cpp NestedLoopsJoin.hpp)
add_library(quickstep_queryoptimizer_physical_PatternMatcher ../../empty_src.cpp PatternMatcher.hpp)
add_library(quickstep_queryoptimizer_physical_Physical ../../empty_src.cpp Physical.hpp)
@@ -150,6 +151,10 @@ target_link_libraries(quickstep_queryoptimizer_physical_Join
quickstep_queryoptimizer_expressions_NamedExpression
quickstep_queryoptimizer_physical_Physical
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_physical_LIPFilterConfiguration
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_utility_Macros
+ quickstep_utility_lipfilter_LIPFilter)
target_link_libraries(quickstep_queryoptimizer_physical_NestedLoopsJoin
glog
quickstep_queryoptimizer_OptimizerTree
@@ -237,6 +242,7 @@ target_link_libraries(quickstep_queryoptimizer_physical_TopLevelPlan
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ExprId
quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_physical_LIPFilterConfiguration
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_physical_PhysicalType
quickstep_utility_Cast
@@ -279,6 +285,7 @@ target_link_libraries(quickstep_queryoptimizer_physical
quickstep_queryoptimizer_physical_InsertSelection
quickstep_queryoptimizer_physical_InsertTuple
quickstep_queryoptimizer_physical_Join
+ quickstep_queryoptimizer_physical_LIPFilterConfiguration
quickstep_queryoptimizer_physical_NestedLoopsJoin
quickstep_queryoptimizer_physical_PatternMatcher
quickstep_queryoptimizer_physical_Physical
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/physical/LIPFilterConfiguration.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/LIPFilterConfiguration.hpp b/query_optimizer/physical/LIPFilterConfiguration.hpp
new file mode 100644
index 0000000..62a6149
--- /dev/null
+++ b/query_optimizer/physical/LIPFilterConfiguration.hpp
@@ -0,0 +1,171 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_LIP_FILTER_CONFIGURATION_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_LIP_FILTER_CONFIGURATION_HPP_
+
+#include <cstddef>
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+
+namespace quickstep {
+namespace optimizer {
+namespace physical {
+
+/** \addtogroup OptimizerPhysical
+ * @{
+ */
+
+class Physical;
+typedef std::shared_ptr<const Physical> PhysicalPtr;
+
+/**
+ * @brief Optimizer information for a LIP filter builder.
+ */
+struct LIPFilterBuildInfo {
+ /**
+ * @brief Constructor.
+ *
+ * @param build_attribute_in The attribute to build the LIP filter with.
+ * @param filter_cardinality_in The LIP filter's cardinality.
+ * @param filter_type_in The LIP filter's type.
+ */
+ LIPFilterBuildInfo(const expressions::AttributeReferencePtr &build_attribute_in,
+ const std::size_t filter_cardinality_in,
+ const LIPFilterType &filter_type_in)
+ : build_attribute(build_attribute_in),
+ filter_cardinality(filter_cardinality_in),
+ filter_type(filter_type_in) {
+ }
+ const expressions::AttributeReferencePtr build_attribute;
+ const std::size_t filter_cardinality;
+ const LIPFilterType filter_type;
+};
+
+/**
+ * @brief Optimizer information for a LIP filter prober.
+ */
+struct LIPFilterProbeInfo {
+ /**
+ * @brief Constructor.
+ *
+ * @param probe_attribute_in The attribute to probe the LIP filter with.
+ * @param build_attribute_in The attribute that the LIP filter is built with.
+ * @param builder_in The physical node that the LIP filter's builder is attached to.
+ */
+ LIPFilterProbeInfo(const expressions::AttributeReferencePtr &probe_attribute_in,
+ const expressions::AttributeReferencePtr &build_attribute_in,
+ const PhysicalPtr &builder_in)
+ : probe_attribute(probe_attribute_in),
+ build_attribute(build_attribute_in),
+ builder(builder_in) {
+ }
+ const expressions::AttributeReferencePtr probe_attribute;
+ const expressions::AttributeReferencePtr build_attribute;
+ const PhysicalPtr builder;
+};
+
+
+class LIPFilterConfiguration;
+typedef std::shared_ptr<const LIPFilterConfiguration> LIPFilterConfigurationPtr;
+
+/**
+ * @brief Configuration information of all the LIP filters in a query plan.
+ */
+class LIPFilterConfiguration {
+ public:
+ /**
+ * @brief Constructor.
+ */
+ LIPFilterConfiguration() {
+ }
+
+ /**
+ * @brief Add information for a LIP filter builder.
+ *
+ * @param build_attribute The attribute to build the LIP filter with.
+ * @param builder The physical node to attach the LIP filter builder to.
+ * @param filter_size The LIP filter's cardinality.
+ * @param filter_type The LIP filter's type.
+ */
+ void addBuildInfo(const expressions::AttributeReferencePtr &build_attribute,
+ const PhysicalPtr &builder,
+ const std::size_t filter_size,
+ const LIPFilterType &filter_type) {
+ build_info_map_[builder].emplace_back(
+ build_attribute, filter_size, filter_type);
+ }
+
+ /**
+ * @brief Add information for a LIP filter prober.
+ *
+ * @param probe_attribute The attribute to probe the LIP filter with.
+ * @param prober The physical node to attach the LIP filter prober to.
+ * @param build_attribute The attribute that the LIP filter is built with.
+ * @param builder The physical node that the LIP filter's builder is attached to.
+ */
+ void addProbeInfo(const expressions::AttributeReferencePtr &probe_attribute,
+ const PhysicalPtr &prober,
+ const expressions::AttributeReferencePtr &build_attribute,
+ const PhysicalPtr &builder) {
+ probe_info_map_[prober].emplace_back(
+ probe_attribute, build_attribute, builder);
+ }
+
+ /**
+ * @brief Get all the LIP filter builders.
+ *
+ * @return A map where each key is a physical node and each mapped value is
+ * a vector of all the LIP filter builders that are attached to the
+ * physical node.
+ */
+ const std::map<PhysicalPtr, std::vector<LIPFilterBuildInfo>>& getBuildInfoMap() const {
+ return build_info_map_;
+ }
+
+ /**
+ * @brief Get all the LIP filter probers.
+ *
+ * @return A map where each key is a physical node and each mapped value is
+ * a vector of all the LIP filter probers that are attached to the
+ * physical node.
+ */
+ const std::map<PhysicalPtr, std::vector<LIPFilterProbeInfo>>& getProbeInfoMap() const {
+ return probe_info_map_;
+ }
+
+ private:
+ std::map<PhysicalPtr, std::vector<LIPFilterBuildInfo>> build_info_map_;
+ std::map<PhysicalPtr, std::vector<LIPFilterProbeInfo>> probe_info_map_;
+
+ DISALLOW_COPY_AND_ASSIGN(LIPFilterConfiguration);
+};
+
+/** @} */
+
+} // namespace physical
+} // namespace optimizer
+} // namespace quickstep
+
+#endif /* QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_LIP_FILTER_CONFIGURATION_HPP_ */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/physical/TopLevelPlan.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/TopLevelPlan.hpp b/query_optimizer/physical/TopLevelPlan.hpp
index 8f07dec..7dfc2b6 100644
--- a/query_optimizer/physical/TopLevelPlan.hpp
+++ b/query_optimizer/physical/TopLevelPlan.hpp
@@ -29,6 +29,7 @@
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/PhysicalType.hpp"
#include "utility/Macros.hpp"
@@ -89,6 +90,29 @@ class TopLevelPlan : public Physical {
return shared_subplans_[index];
}
+ /**
+ * @brief Creates a copy of the TopLevelPlan with lip_filter_configuration_
+ * replaced by \p new_lip_filter_configuration.
+ *
+ * @param new_lip_filter_configuration The new lip_filter_configuration to be
+ * substituted for the existing one.
+ * @return A copy of this TopLevelPlan with the new lip_filter_configuration.
+ */
+ TopLevelPlanPtr copyWithLIPFilterConfiguration(
+ const LIPFilterConfigurationPtr &new_lip_filter_configuration) const {
+ return TopLevelPlan::Create(plan_,
+ shared_subplans_,
+ uncorrelated_subquery_map_,
+ new_lip_filter_configuration);
+ }
+
+ /**
+ * @return The LIPFilter configuration information for the overall query plan.
+ */
+ const LIPFilterConfigurationPtr& lip_filter_configuration() const {
+ return lip_filter_configuration_;
+ }
+
PhysicalPtr copyWithNewChildren(
const std::vector<PhysicalPtr> &new_children) const override {
DCHECK_EQ(getNumChildren(), new_children.size());
@@ -125,18 +149,22 @@ class TopLevelPlan : public Physical {
*
* @param plan The query plan.
* @param shared_subplans The subplans referenced in the main input plan.
- * @param Map from the expression ID of an attribute reference to the
- * uncorrelated subquery that produces the attribute.
+ * @param uncorrelated_subquery_map Map from the expression ID of an attribute
+ * reference to the uncorrelated subquery that produces the attribute.
+ * @param lip_filter_configuration The LIPFilter configuration information
+ * for the overall query plan.
* @return An immutable TopLevelPlan.
*/
static TopLevelPlanPtr Create(
const PhysicalPtr &plan,
const std::vector<PhysicalPtr> &shared_subplans = {},
const std::unordered_map<expressions::ExprId, int> &uncorrelated_subquery_map
- = std::unordered_map<expressions::ExprId, int>()) {
+ = std::unordered_map<expressions::ExprId, int>(),
+ const LIPFilterConfigurationPtr &lip_filter_configuration = nullptr) {
return TopLevelPlanPtr(new TopLevelPlan(plan,
shared_subplans,
- uncorrelated_subquery_map));
+ uncorrelated_subquery_map,
+ lip_filter_configuration));
}
protected:
@@ -151,10 +179,12 @@ class TopLevelPlan : public Physical {
private:
TopLevelPlan(const PhysicalPtr &plan,
const std::vector<PhysicalPtr> &shared_subplans,
- const std::unordered_map<expressions::ExprId, int> &uncorrelated_subquery_map)
+ const std::unordered_map<expressions::ExprId, int> &uncorrelated_subquery_map,
+ const LIPFilterConfigurationPtr &lip_filter_configuration)
: plan_(plan),
shared_subplans_(shared_subplans),
- uncorrelated_subquery_map_(uncorrelated_subquery_map) {
+ uncorrelated_subquery_map_(uncorrelated_subquery_map),
+ lip_filter_configuration_(lip_filter_configuration) {
addChild(plan);
for (const PhysicalPtr &shared_subplan : shared_subplans) {
addChild(shared_subplan);
@@ -165,6 +195,7 @@ class TopLevelPlan : public Physical {
// Stored in the topological ordering based on dependencies.
std::vector<PhysicalPtr> shared_subplans_;
std::unordered_map<expressions::ExprId, int> uncorrelated_subquery_map_;
+ LIPFilterConfigurationPtr lip_filter_configuration_;
DISALLOW_COPY_AND_ASSIGN(TopLevelPlan);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/rules/AttachLIPFilters.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachLIPFilters.cpp b/query_optimizer/rules/AttachLIPFilters.cpp
new file mode 100644
index 0000000..090fb8c
--- /dev/null
+++ b/query_optimizer/rules/AttachLIPFilters.cpp
@@ -0,0 +1,248 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/AttachLIPFilters.hpp"
+
+#include <map>
+#include <set>
+#include <unordered_set>
+#include <unordered_map>
+#include <vector>
+#include <utility>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr AttachLIPFilters::apply(const P::PhysicalPtr &input) {
+ DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+
+ const P::TopLevelPlanPtr top_level_plan =
+ std::static_pointer_cast<const P::TopLevelPlan>(input);
+ cost_model_.reset(
+ new cost::StarSchemaSimpleCostModel(
+ top_level_plan->shared_subplans()));
+ lip_filter_configuration_.reset(new P::LIPFilterConfiguration());
+
+ std::set<E::ExprId> already_filtered_attributes;
+ attachLIPFilters(NodeList(input), &already_filtered_attributes);
+
+ P::PhysicalPtr output;
+ if (!lip_filter_configuration_->getBuildInfoMap().empty() ||
+ !lip_filter_configuration_->getProbeInfoMap().empty()) {
+ output = top_level_plan->copyWithLIPFilterConfiguration(
+ P::LIPFilterConfigurationPtr(lip_filter_configuration_.release()));
+ } else {
+ output = input;
+ }
+ return output;
+}
+
+void AttachLIPFilters::attachLIPFilters(
+ const NodeList &path,
+ std::set<expressions::ExprId> *already_filtered_attributes) {
+ const P::PhysicalPtr &node = path.node;
+
+ // First process child nodes
+ for (const auto &child : node->children()) {
+ std::set<E::ExprId> child_filtered_attributes;
+ attachLIPFilters(path.cons(child), &child_filtered_attributes);
+ already_filtered_attributes->insert(child_filtered_attributes.begin(),
+ child_filtered_attributes.end());
+ }
+
+ // Attach LIP filters to HashJoin/Selection/Aggregate nodes
+ P::PhysicalPtr probe_child = nullptr;
+ switch (node->getPhysicalType()) {
+ case P::PhysicalType::kHashJoin:
+ probe_child = std::static_pointer_cast<const P::HashJoin>(node)->left();
+ break;
+ case P::PhysicalType::kSelection:
+ probe_child = std::static_pointer_cast<const P::Selection>(node)->input();
+ break;
+ case P::PhysicalType::kAggregate:
+ probe_child = std::static_pointer_cast<const P::Aggregate>(node)->input();
+ break;
+ default:
+ break;
+ }
+
+ if (probe_child != nullptr &&
+ cost_model_->estimateCardinality(probe_child) > 10000000) {
+ const auto &candidate_lip_filters = getProbeSideInfo(path.cons(probe_child));
+ if (!candidate_lip_filters.empty()) {
+ std::map<E::AttributeReferencePtr, LIPFilterInfoPtr> selected_filters;
+ for (const auto &info : candidate_lip_filters) {
+ auto it = selected_filters.find(info->attribute);
+ if (it == selected_filters.end()) {
+ selected_filters.emplace(info->attribute, info);
+ } else if (LIPFilterInfo::isBetterThan(*info, *it->second)) {
+ it->second = info;
+ }
+ }
+
+ for (const auto &pair : selected_filters) {
+ const E::ExprId source_attr_id = pair.second->source_attribute->id();
+ if (already_filtered_attributes->find(source_attr_id)
+ == already_filtered_attributes->end()) {
+ lip_filter_configuration_->addBuildInfo(
+ pair.second->source_attribute,
+ pair.second->source,
+ pair.second->estimated_cardinality * 8,
+ LIPFilterType::kSingleIdentityHashFilter);
+ lip_filter_configuration_->addProbeInfo(
+ pair.first,
+ node,
+ pair.second->source_attribute,
+ pair.second->source);
+ already_filtered_attributes->emplace(source_attr_id);
+ }
+ }
+ }
+ }
+}
+
+const std::vector<AttachLIPFilters::LIPFilterInfoPtr>& AttachLIPFilters
+ ::getBuildSideInfo(const NodeList &path) {
+ const P::PhysicalPtr &node = path.node;
+ if (build_side_info_.find(node) == build_side_info_.end()) {
+ std::vector<LIPFilterInfoPtr> lip_filters;
+
+ // 1. Gather candidate LIP filters propagated from descendant nodes.
+ std::unordered_set<E::ExprId> output_attribute_ids;
+ for (const auto &attr : node->getOutputAttributes()) {
+ output_attribute_ids.emplace(attr->id());
+ }
+ switch (node->getPhysicalType()) {
+ case P::PhysicalType::kAggregate:
+ case P::PhysicalType::kSelection:
+ case P::PhysicalType::kHashJoin: {
+ for (const P::PhysicalPtr &child : node->children()) {
+ for (const LIPFilterInfoPtr &info : getBuildSideInfo(path.cons(child))) {
+ lip_filters.emplace_back(info);
+ }
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ // 2. Consider the parent physical node. If it is a HashJoin,
+ // then each build-side join attribute is a candidate LIP filter
+ // which can be built by the BuildHashOperator that corresponds
+ // to the parent HashJoin node.
+ P::HashJoinPtr hash_join;
+ if (path.cdr() != nullptr &&
+ P::SomeHashJoin::MatchesWithConditionalCast(path.cdr()->node, &hash_join)) {
+ const P::PhysicalPtr &build_node = hash_join->right();
+ // TODO(jianqiao): consider probe-side info to allow cascading propagation.
+ double selectivity = cost_model_->estimateSelectivity(build_node);
+ // Only consider attributes that are selective.
+ if (selectivity < 1.0) {
+ std::size_t cardinality = cost_model_->estimateCardinality(build_node);
+ for (const auto &attr : hash_join->right_join_attributes()) {
+ lip_filters.emplace_back(
+ std::make_shared<LIPFilterInfo>(attr,
+ path.cdr()->node,
+ path.depth,
+ selectivity,
+ cardinality));
+ }
+ }
+ }
+ build_side_info_.emplace(node, std::move(lip_filters));
+ }
+ return build_side_info_.at(node);
+}
+
+const std::vector<AttachLIPFilters::LIPFilterInfoPtr>& AttachLIPFilters
+ ::getProbeSideInfo(const NodeList &path) {
+ const P::PhysicalPtr &node = path.node;
+ if (probe_side_info_.find(node) == probe_side_info_.end()) {
+ std::vector<LIPFilterInfoPtr> lip_filters;
+ if (path.cdr() != nullptr) {
+ // 1. Gather candidate LIP filters propagated from ancestor nodes.
+ const auto &parent_lip_filters = getProbeSideInfo(*path.cdr());
+ if (!parent_lip_filters.empty()) {
+ std::unordered_set<E::ExprId> output_attribute_ids;
+ for (const auto &attr : node->getOutputAttributes()) {
+ output_attribute_ids.emplace(attr->id());
+ }
+ for (const auto &info : parent_lip_filters) {
+ if (output_attribute_ids.find(info->attribute->id()) != output_attribute_ids.end()) {
+ lip_filters.emplace_back(info);
+ }
+ }
+ }
+
+ // 2. Consider the parent physical node. If it is an InnerHashJoin or
+ // LeftSemiHashJoin, then we can propagate the build-side LIP filters
+ // to the probe-side.
+ P::HashJoinPtr hash_join;
+ if (P::SomeHashJoin::MatchesWithConditionalCast(path.cdr()->node, &hash_join) &&
+ (hash_join->join_type() == P::HashJoin::JoinType::kInnerJoin ||
+ hash_join->join_type() == P::HashJoin::JoinType::kLeftSemiJoin)) {
+ const P::PhysicalPtr &build_side_child = hash_join->right();
+ std::unordered_map<E::ExprId, E::AttributeReferencePtr> join_attribute_pairs;
+ for (std::size_t i = 0; i < hash_join->left_join_attributes().size(); ++i) {
+ const E::AttributeReferencePtr probe_side_join_attribute =
+ hash_join->left_join_attributes()[i];
+ const E::AttributeReferencePtr build_side_join_attribute =
+ hash_join->right_join_attributes()[i];
+ join_attribute_pairs.emplace(build_side_join_attribute->id(),
+ probe_side_join_attribute);
+ }
+ for (const auto &info : getBuildSideInfo(path.cdr()->cons(build_side_child))) {
+ const auto pair_it = join_attribute_pairs.find(info->attribute->id());
+ if (pair_it != join_attribute_pairs.end()) {
+ lip_filters.emplace_back(
+ std::make_shared<LIPFilterInfo>(pair_it->second,
+ info->source,
+ info->depth,
+ info->estimated_selectivity,
+ info->estimated_cardinality,
+ info->attribute));
+ }
+ }
+ }
+ }
+ probe_side_info_.emplace(node, std::move(lip_filters));
+ }
+ return probe_side_info_.at(node);
+}
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/rules/AttachLIPFilters.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachLIPFilters.hpp b/query_optimizer/rules/AttachLIPFilters.hpp
new file mode 100644
index 0000000..b8cfc39
--- /dev/null
+++ b/query_optimizer/rules/AttachLIPFilters.hpp
@@ -0,0 +1,151 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_LIP_FILTERS_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_LIP_FILTERS_HPP_
+
+#include <cstddef>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+/**
+ * @brief Rule that applies to a physical plan to attach LIPFilters.
+ */
+class AttachLIPFilters : public Rule<physical::Physical> {
+ public:
+ /**
+ * @brief Constructor.
+ */
+ AttachLIPFilters() {}
+
+ ~AttachLIPFilters() override {}
+
+ std::string getName() const override {
+ return "AttachLIPFilters";
+ }
+
+ physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+ /**
+ * @brief Internal data structure for passing around LIPFilter information.
+ */
+ struct LIPFilterInfo {
+ LIPFilterInfo(const expressions::AttributeReferencePtr &attribute_in,
+ const physical::PhysicalPtr &source_in,
+ const int depth_in,
+ const double estimated_selectivity_in,
+ const std::size_t estimated_cardinality_in,
+ const expressions::AttributeReferencePtr &source_attribute_in = nullptr)
+ : attribute(attribute_in),
+ source(source_in),
+ depth(depth_in),
+ estimated_selectivity(estimated_selectivity_in),
+ estimated_cardinality(estimated_cardinality_in),
+ source_attribute(
+ source_attribute_in == nullptr
+ ? attribute_in
+ : source_attribute_in) {}
+
+ static bool isBetterThan(const LIPFilterInfo &a, const LIPFilterInfo &b) {
+ if (a.estimated_selectivity == b.estimated_selectivity) {
+ return a.depth > b.depth;
+ } else {
+ return a.estimated_selectivity < b.estimated_selectivity;
+ }
+ }
+
+ expressions::AttributeReferencePtr attribute;
+ physical::PhysicalPtr source;
+ int depth;
+ double estimated_selectivity;
+ std::size_t estimated_cardinality;
+ expressions::AttributeReferencePtr source_attribute;
+ };
+
+ typedef std::shared_ptr<const LIPFilterInfo> LIPFilterInfoPtr;
+
+ /**
+ * @brief Functional list data structure for internal use.
+ */
+ struct NodeList {
+ explicit NodeList(const physical::PhysicalPtr &node_in)
+ : node(node_in),
+ next(nullptr),
+ depth(0) {}
+
+ NodeList(const physical::PhysicalPtr &node_in,
+ const NodeList *next_in,
+ const int depth_in)
+ : node(node_in),
+ next(next_in),
+ depth(depth_in) {}
+
+ inline const NodeList *cdr() const {
+ return next;
+ }
+
+ inline const NodeList cons(const physical::PhysicalPtr &new_node) const {
+ return NodeList(new_node, this, depth+1);
+ }
+
+ const physical::PhysicalPtr node;
+ const NodeList *next;
+ const int depth;
+ };
+
+ void attachLIPFilters(const NodeList &path,
+ std::set<expressions::ExprId> *already_filtered_attributes);
+
+ const std::vector<LIPFilterInfoPtr>& getBuildSideInfo(const NodeList &path);
+
+ const std::vector<LIPFilterInfoPtr>& getProbeSideInfo(const NodeList &path);
+
+ std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+ std::map<physical::PhysicalPtr, std::vector<LIPFilterInfoPtr>> build_side_info_;
+ std::map<physical::PhysicalPtr, std::vector<LIPFilterInfoPtr>> probe_side_info_;
+ std::unique_ptr<physical::LIPFilterConfiguration> lip_filter_configuration_;
+
+ DISALLOW_COPY_AND_ASSIGN(AttachLIPFilters);
+};
+
+/** @} */
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif /* QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_LIP_FILTERS_HPP_ */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index d9709ce..29875f6 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -18,6 +18,7 @@
add_subdirectory(tests)
# Declare micro-libs:
+add_library(quickstep_queryoptimizer_rules_AttachLIPFilters AttachLIPFilters.cpp AttachLIPFilters.hpp)
add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp)
add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp)
add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
@@ -36,6 +37,21 @@ add_library(quickstep_queryoptimizer_rules_UnnestSubqueries UnnestSubqueries.cpp
# Link dependencies:
+target_link_libraries(quickstep_queryoptimizer_rules_AttachLIPFilters
+ quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExprId
+ quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_LIPFilterConfiguration
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_Selection
+ quickstep_queryoptimizer_physical_TopLevelPlan
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_Macros
+ quickstep_utility_lipfilter_LIPFilter)
target_link_libraries(quickstep_queryoptimizer_rules_BottomUpRule
glog
quickstep_queryoptimizer_rules_Rule
@@ -121,12 +137,14 @@ target_link_libraries(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOpti
quickstep_queryoptimizer_expressions_NamedExpression
quickstep_queryoptimizer_expressions_PatternMatcher
quickstep_queryoptimizer_expressions_Predicate
+ quickstep_queryoptimizer_physical_Aggregate
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_PatternMatcher
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_physical_PhysicalType
quickstep_queryoptimizer_physical_TopLevelPlan
quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_DisjointTreeForest
quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_SwapProbeBuild
quickstep_queryoptimizer_costmodel_SimpleCostModel
@@ -187,6 +205,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_UpdateExpression
# Module all-in-one library:
add_library(quickstep_queryoptimizer_rules ../../empty_src.cpp OptimizerRulesModule.hpp)
target_link_libraries(quickstep_queryoptimizer_rules
+ quickstep_queryoptimizer_rules_AttachLIPFilters
quickstep_queryoptimizer_rules_BottomUpRule
quickstep_queryoptimizer_rules_CollapseProject
quickstep_queryoptimizer_rules_GenerateJoins
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
index 946d316..5906b98 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
@@ -19,6 +19,8 @@
#include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
+#include <algorithm>
+#include <map>
#include <memory>
#include <set>
#include <unordered_map>
@@ -28,11 +30,13 @@
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/NamedExpression.hpp"
#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/PatternMatcher.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/PhysicalType.hpp"
#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "utility/DisjointTreeForest.hpp"
#include "glog/logging.h"
@@ -74,6 +78,9 @@ P::PhysicalPtr StarSchemaHashJoinOrderOptimization::applyInternal(const P::Physi
JoinGroupInfo *join_group = nullptr;
if (parent_join_group == nullptr || !is_valid_cascading_hash_join) {
new_join_group.reset(new JoinGroupInfo());
+ for (const auto &attr : input->getOutputAttributes()) {
+ new_join_group->referenced_attributes.emplace(attr->id());
+ }
join_group = new_join_group.get();
} else {
join_group = parent_join_group;
@@ -146,7 +153,10 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
i,
tables[i],
cost_model_->estimateCardinality(tables[i]),
- cost_model_->estimateSelectivity(tables[i]));
+ cost_model_->estimateSelectivity(tables[i]),
+ CountSharedAttributes(join_group.referenced_attributes,
+ tables[i]->getOutputAttributes()),
+ tables[i]->getPhysicalType() == physical::PhysicalType::kAggregate);
}
// Auxiliary mapping info.
@@ -163,9 +173,25 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
}
}
- // Create a join graph where tables are vertices, and add an edge between vertices
- // t1 and t2 for each join predicate t1.x = t2.y
- std::vector<std::unordered_set<std::size_t>> join_graph(table_info_storage.size());
+ // The pool of tables.
+ std::set<TableInfo*> remaining_tables;
+ for (auto &table_info : table_info_storage) {
+ remaining_tables.emplace(&table_info);
+ }
+
+ // The equal-join (e.g. =) operator defines an equivalence relation on the
+ // set of all the attributes. The disjoint set data structure is used to keep
+ // track of the equivalence classes that each attribute belongs to.
+ DisjointTreeForest<E::ExprId> join_attribute_forest;
+ for (const auto &attr_id_pair : join_group.join_attribute_pairs) {
+ join_attribute_forest.makeSet(attr_id_pair.first);
+ join_attribute_forest.makeSet(attr_id_pair.second);
+ join_attribute_forest.merge(attr_id_pair.first, attr_id_pair.second);
+ }
+
+ // Map each equivalence class id to the members (e.g. <table id, attribute id>
+ // pairs) in that equivalence class.
+ std::map<std::size_t, std::map<std::size_t, E::ExprId>> join_attribute_groups;
for (const auto &attr_id_pair : join_group.join_attribute_pairs) {
DCHECK(attribute_id_to_table_info_index_map.find(attr_id_pair.first)
!= attribute_id_to_table_info_index_map.end());
@@ -178,128 +204,148 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
attribute_id_to_table_info_index_map[attr_id_pair.second];
DCHECK_NE(first_table_idx, second_table_idx);
- table_info_storage[first_table_idx].join_attribute_pairs.emplace(
- attr_id_pair.first, attr_id_pair.second);
- table_info_storage[second_table_idx].join_attribute_pairs.emplace(
- attr_id_pair.second, attr_id_pair.first);
-
- join_graph[first_table_idx].emplace(second_table_idx);
- join_graph[second_table_idx].emplace(first_table_idx);
- }
-
- std::set<TableInfo*, TableInfoPtrLessComparator> table_info_ordered_by_priority;
- for (std::size_t i = 0; i < table_info_storage.size(); ++i) {
- table_info_ordered_by_priority.emplace(&table_info_storage[i]);
+ DCHECK_EQ(join_attribute_forest.find(attr_id_pair.first),
+ join_attribute_forest.find(attr_id_pair.second));
+ const std::size_t attr_group_id = join_attribute_forest.find(attr_id_pair.first);
+ auto &attr_group = join_attribute_groups[attr_group_id];
+ attr_group.emplace(first_table_idx, attr_id_pair.first);
+ attr_group.emplace(second_table_idx, attr_id_pair.second);
}
- // Contruct hash join tree.
while (true) {
- TableInfo *first_table_info = *table_info_ordered_by_priority.begin();
- table_info_ordered_by_priority.erase(
- table_info_ordered_by_priority.begin());
- const std::size_t first_table_info_id = first_table_info->table_info_id;
-
- TableInfo *second_table_info = nullptr;
- std::set<TableInfo*, TableInfoPtrLessComparator>::iterator second_table_info_it;
- for (auto candidate_table_info_it = table_info_ordered_by_priority.begin();
- candidate_table_info_it != table_info_ordered_by_priority.end();
- ++candidate_table_info_it) {
- TableInfo *candidate_table_info = *candidate_table_info_it;
- const std::size_t candidate_table_info_id = candidate_table_info->table_info_id;
-
- if (join_graph[first_table_info_id].find(candidate_table_info_id)
- == join_graph[first_table_info_id].end() &&
- join_graph[candidate_table_info_id].find(first_table_info_id)
- == join_graph[candidate_table_info_id].end()) {
- continue;
- } else if (second_table_info == nullptr) {
- second_table_info = candidate_table_info;
- second_table_info_it = candidate_table_info_it;
- }
-
- bool is_likely_many_to_many_join = false;
- for (const auto join_attr_pair : first_table_info->join_attribute_pairs) {
- if (candidate_table_info->joined_attribute_set.find(join_attr_pair.second)
- != candidate_table_info->joined_attribute_set.end()) {
- is_likely_many_to_many_join = true;
- break;
+ // Find the best probe/build pair out of the remaining tables.
+ // TODO(jianqiao): design better data structure to improve efficiency here.
+ std::unique_ptr<JoinPair> best_join = nullptr;
+ for (TableInfo *probe_table_info : remaining_tables) {
+ for (TableInfo *build_table_info : remaining_tables) {
+ if (probe_table_info != build_table_info) {
+ const std::size_t probe_table_id = probe_table_info->table_info_id;
+ const std::size_t build_table_id = build_table_info->table_info_id;
+ std::size_t num_join_attributes = 0;
+ double build_side_uniqueness = 1.0;
+ for (const auto &attr_group_pair : join_attribute_groups) {
+ const auto &attr_group = attr_group_pair.second;
+ auto probe_it = attr_group.find(probe_table_id);
+ auto build_it = attr_group.find(build_table_id);
+ if (probe_it != attr_group.end() && build_it != attr_group.end()) {
+ ++num_join_attributes;
+ build_side_uniqueness *= std::max(
+ 1uL,
+ cost_model_->estimateNumDistinctValues(
+ build_it->second, build_table_info->table));
+ }
+ }
+ build_side_uniqueness /= build_table_info->estimated_cardinality;
+
+ if (num_join_attributes > 0) {
+ std::unique_ptr<JoinPair> new_join(
+ new JoinPair(probe_table_info,
+ build_table_info,
+ build_side_uniqueness >= 0.9,
+ num_join_attributes));
+ if (best_join == nullptr || new_join->isBetterThan(*best_join)) {
+ best_join.reset(new_join.release());
+ }
+ }
}
}
- for (const auto join_attr_pair : candidate_table_info->join_attribute_pairs) {
- if (first_table_info->joined_attribute_set.find(join_attr_pair.second)
- != first_table_info->joined_attribute_set.end()) {
- is_likely_many_to_many_join = true;
- break;
- }
- }
- if (!is_likely_many_to_many_join) {
- second_table_info = candidate_table_info;
- second_table_info_it = candidate_table_info_it;
- break;
+ }
+
+ CHECK(best_join != nullptr);
+
+ TableInfo *selected_probe_table_info = best_join->probe;
+ TableInfo *selected_build_table_info = best_join->build;
+
+ // Swap probe/build sides if:
+ // (1) Build side is an aggregation with large number of groups, so that
+ // there is a change to push LIPFilters down the aggregation.
+ // (2) Build side's join attributes are not unique, and it has larger
+ // cardinality than the probe side.
+ const std::size_t probe_num_groups_as_agg =
+ getEstimatedNumGroups(selected_probe_table_info->table);
+ const std::size_t build_num_groups_as_agg =
+ getEstimatedNumGroups(selected_build_table_info->table);
+ if (build_num_groups_as_agg > 1000000 || probe_num_groups_as_agg > 1000000) {
+ if (build_num_groups_as_agg > probe_num_groups_as_agg) {
+ std::swap(selected_probe_table_info, selected_build_table_info);
}
+ } else if ((!best_join->build_side_unique || best_join->num_join_attributes > 1) &&
+ selected_probe_table_info->estimated_cardinality < selected_build_table_info->estimated_cardinality) {
+ std::swap(selected_probe_table_info, selected_build_table_info);
}
- DCHECK(second_table_info != nullptr);
- table_info_ordered_by_priority.erase(second_table_info_it);
- const P::PhysicalPtr &left_child = first_table_info->table;
- const P::PhysicalPtr &right_child = second_table_info->table;
+ remaining_tables.erase(selected_probe_table_info);
+ remaining_tables.erase(selected_build_table_info);
+
+ // Figure out the output attributes.
+ const P::PhysicalPtr &probe_child = selected_probe_table_info->table;
+ const P::PhysicalPtr &build_child = selected_build_table_info->table;
std::vector<E::NamedExpressionPtr> output_attributes;
- for (const E::AttributeReferencePtr &left_attr : left_child->getOutputAttributes()) {
- output_attributes.emplace_back(left_attr);
+ for (const E::AttributeReferencePtr &probe_attr : probe_child->getOutputAttributes()) {
+ output_attributes.emplace_back(probe_attr);
}
- for (const E::AttributeReferencePtr &right_attr : right_child->getOutputAttributes()) {
- output_attributes.emplace_back(right_attr);
+ for (const E::AttributeReferencePtr &build_attr : build_child->getOutputAttributes()) {
+ output_attributes.emplace_back(build_attr);
}
- std::vector<E::AttributeReferencePtr> left_join_attributes;
- std::vector<E::AttributeReferencePtr> right_join_attributes;
- std::unordered_set<expressions::ExprId> new_joined_attribute_set;
- for (const auto &join_attr_pair : first_table_info->join_attribute_pairs) {
- if (second_table_info->join_attribute_pairs.find(join_attr_pair.second)
- != second_table_info->join_attribute_pairs.end()) {
- left_join_attributes.emplace_back(
- attribute_id_to_reference_map[join_attr_pair.first]);
- right_join_attributes.emplace_back(
- attribute_id_to_reference_map[join_attr_pair.second]);
-
- new_joined_attribute_set.emplace(join_attr_pair.first);
- new_joined_attribute_set.emplace(join_attr_pair.second);
+ // Figure out the join attributes.
+ std::vector<E::AttributeReferencePtr> probe_attributes;
+ std::vector<E::AttributeReferencePtr> build_attributes;
+ const std::size_t probe_table_id = selected_probe_table_info->table_info_id;
+ const std::size_t build_table_id = selected_build_table_info->table_info_id;
+ for (const auto &attr_group_pair : join_attribute_groups) {
+ const auto &attr_group = attr_group_pair.second;
+ auto probe_it = attr_group.find(probe_table_id);
+ auto build_it = attr_group.find(build_table_id);
+ if (probe_it != attr_group.end() && build_it != attr_group.end()) {
+ probe_attributes.emplace_back(
+ attribute_id_to_reference_map.at(probe_it->second));
+ build_attributes.emplace_back(
+ attribute_id_to_reference_map.at(build_it->second));
}
}
- DCHECK_GE(left_join_attributes.size(), static_cast<std::size_t>(1));
- if (table_info_ordered_by_priority.size() > 0) {
+ // Create a hash join from the choosen probe/build pair and put it back to
+ // the table pool. Return the last table in the table pool if there is only
+ // one table left.
+ if (remaining_tables.size() > 0) {
P::PhysicalPtr output =
- P::HashJoin::Create(left_child,
- right_child,
- left_join_attributes,
- right_join_attributes,
+ P::HashJoin::Create(probe_child,
+ build_child,
+ probe_attributes,
+ build_attributes,
nullptr,
output_attributes,
P::HashJoin::JoinType::kInnerJoin);
- second_table_info->table = output;
+ selected_probe_table_info->table = output;
// TODO(jianqiao): Cache the estimated cardinality for each plan in cost
// model to avoid duplicated estimation.
- second_table_info->estimated_cardinality = cost_model_->estimateCardinality(output);
-
- second_table_info->join_attribute_pairs.insert(first_table_info->join_attribute_pairs.begin(),
- first_table_info->join_attribute_pairs.end());
- second_table_info->joined_attribute_set.insert(first_table_info->joined_attribute_set.begin(),
- first_table_info->joined_attribute_set.end());
- second_table_info->joined_attribute_set.insert(new_joined_attribute_set.begin(),
- new_joined_attribute_set.end());
- table_info_ordered_by_priority.emplace(second_table_info);
-
- join_graph[second_table_info->table_info_id].insert(join_graph[first_table_info_id].begin(),
- join_graph[first_table_info_id].end());
-
+ selected_probe_table_info->estimated_cardinality = cost_model_->estimateCardinality(output);
+ selected_probe_table_info->estimated_selectivity = cost_model_->estimateSelectivity(output);
+
+ selected_probe_table_info->estimated_num_output_attributes =
+ CountSharedAttributes(join_group.referenced_attributes,
+ output->getOutputAttributes());
+
+ remaining_tables.emplace(selected_probe_table_info);
+
+ // Update join attribute groups.
+ for (auto &attr_group_pair : join_attribute_groups) {
+ auto &attr_group = attr_group_pair.second;
+ auto build_it = attr_group.find(build_table_id);
+ if (build_it != attr_group.end()) {
+ const E::ExprId attr_id = build_it->second;
+ attr_group.erase(build_it);
+ attr_group.emplace(probe_table_id, attr_id);
+ }
+ }
} else {
- return P::HashJoin::Create(left_child,
- right_child,
- left_join_attributes,
- right_join_attributes,
+ return P::HashJoin::Create(probe_child,
+ build_child,
+ probe_attributes,
+ build_attributes,
residual_predicate,
project_expressions,
P::HashJoin::JoinType::kInnerJoin);
@@ -307,5 +353,28 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
}
}
+std::size_t StarSchemaHashJoinOrderOptimization::CountSharedAttributes(
+ const std::unordered_set<expressions::ExprId> &attr_set1,
+ const std::vector<expressions::AttributeReferencePtr> &attr_set2) {
+ std::size_t cnt = 0;
+ for (const auto &attr : attr_set2) {
+ if (attr_set1.find(attr->id()) != attr_set1.end()) {
+ ++cnt;
+ }
+ }
+ return cnt;
+}
+
+std::size_t StarSchemaHashJoinOrderOptimization::getEstimatedNumGroups(
+ const physical::PhysicalPtr &input) {
+ P::AggregatePtr aggregate;
+ if (P::SomeAggregate::MatchesWithConditionalCast(input, &aggregate)) {
+ return cost_model_->estimateNumGroupsForAggregate(aggregate);
+ } else {
+ return 0;
+ }
+}
+
+
} // namespace optimizer
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
index c1a7bae..64e2478 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
@@ -20,16 +20,15 @@
#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_STAR_SCHEMA_HASH_JOIN_ORDER_OPTIMIZATION_HPP_
#define QUICKSTEP_QUERY_OPTIMIZER_RULES_STAR_SCHEMA_HASH_JOIN_ORDER_OPTIMIZATION_HPP_
-#include <algorithm>
#include <cstddef>
#include <memory>
#include <string>
-#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/expressions/NamedExpression.hpp"
#include "query_optimizer/expressions/Predicate.hpp"
@@ -45,7 +44,11 @@ namespace optimizer {
*/
/**
- * @brief TODO
+ * @brief Rule that applies to a physical plan to optimize hash join orders.
+ *
+ * This optimization applies a greedy algorithm to favor smaller cardinality
+ * and selective tables to be joined first, which is suitable for queries on
+ * star-schema or snowflake-schema tables.
*/
class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
public:
@@ -64,6 +67,7 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
* @brief A group of tables to form a hash join tree.
*/
struct JoinGroupInfo {
+ std::unordered_set<expressions::ExprId> referenced_attributes;
std::vector<physical::PhysicalPtr> tables;
std::vector<std::pair<expressions::ExprId, expressions::ExprId>> join_attribute_pairs;
};
@@ -72,49 +76,91 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
* @brief Auxiliary information of a table for the optimizer.
*/
struct TableInfo {
- TableInfo(const std::size_t in_table_info_id,
- const physical::PhysicalPtr &in_table,
- const std::size_t in_estimated_cardinality,
- const double in_estimated_selectivity)
- : table_info_id(in_table_info_id),
- table(in_table),
- estimated_cardinality(in_estimated_cardinality),
- estimated_selectivity(in_estimated_selectivity) {
+ TableInfo(const std::size_t table_info_id_in,
+ const physical::PhysicalPtr &table_in,
+ const std::size_t estimated_cardinality_in,
+ const double estimated_selectivity_in,
+ const std::size_t estimated_num_output_attributes_in,
+ const bool is_aggregation_in)
+ : table_info_id(table_info_id_in),
+ table(table_in),
+ estimated_cardinality(estimated_cardinality_in),
+ estimated_selectivity(estimated_selectivity_in),
+ estimated_num_output_attributes(estimated_num_output_attributes_in) {
}
const std::size_t table_info_id;
physical::PhysicalPtr table;
std::size_t estimated_cardinality;
double estimated_selectivity;
- std::unordered_multimap<expressions::ExprId, expressions::ExprId> join_attribute_pairs;
- std::unordered_set<expressions::ExprId> joined_attribute_set;
+ std::size_t estimated_num_output_attributes;
};
- /**
- * @brief Comparator that compares the join priorities between two tables.
- */
- struct TableInfoPtrLessComparator {
- inline bool operator() (const TableInfo *lhs, const TableInfo *rhs) {
- bool swapped = false;
- if (lhs->estimated_cardinality > rhs->estimated_cardinality) {
- std::swap(lhs, rhs);
- swapped = true;
+ struct JoinPair {
+ JoinPair(TableInfo *probe_in,
+ TableInfo *build_in,
+ const bool build_side_unique_in,
+ const std::size_t num_join_attributes_in)
+ : probe(probe_in),
+ build(build_in),
+ build_side_unique(build_side_unique_in),
+ num_join_attributes(num_join_attributes_in) {
+ }
+
+ inline bool isBetterThan(const JoinPair &rhs) const {
+ const auto &lhs = *this;
+
+ // Avoid carrying too many output attributes all the way through a long
+ // chain of hash joins.
+ const bool lhs_has_large_output =
+ lhs.build->estimated_num_output_attributes
+ + lhs.probe->estimated_num_output_attributes > 5;
+ const bool rhs_has_large_output =
+ rhs.build->estimated_num_output_attributes
+ + rhs.probe->estimated_num_output_attributes > 5;
+ if (lhs_has_large_output != rhs_has_large_output) {
+ return rhs_has_large_output;
+ }
+
+ // Prefer foreign-key primary-key style hash joins.
+ if (lhs.build_side_unique != rhs.build_side_unique) {
+ return lhs.build_side_unique;
+ }
+
+ // Prefer hash joins where the build side table is small.
+ const bool lhs_has_small_build = lhs.build->estimated_cardinality < 0x100;
+ const bool rhs_has_small_build = rhs.build->estimated_cardinality < 0x100;
+ if (lhs_has_small_build != rhs_has_small_build) {
+ return lhs_has_small_build;
}
- if (lhs->estimated_selectivity < rhs->estimated_selectivity) {
- return !swapped;
- } else if (lhs->estimated_cardinality < 100u &&
- rhs->estimated_cardinality > 10000u &&
- lhs->estimated_selectivity < rhs->estimated_selectivity * 1.5) {
- return !swapped;
- } else if (lhs->estimated_selectivity > rhs->estimated_selectivity) {
- return swapped;
- } else if (lhs->estimated_cardinality != rhs->estimated_cardinality) {
- return !swapped;
+ // Prefer hash joins where the probe side table is small. This is effective
+ // for TPCH style (snowflake schema) queries, with the help of LIPFilters.
+ if (lhs.probe->estimated_cardinality != rhs.probe->estimated_cardinality) {
+ return lhs.probe->estimated_cardinality < rhs.probe->estimated_cardinality;
+ }
+
+ // Prefer build side tables with better selectivity. This is effective
+ // for SSB style queries.
+ if (lhs.build->estimated_selectivity != rhs.build->estimated_selectivity) {
+ return lhs.build->estimated_selectivity < rhs.build->estimated_selectivity;
+ }
+
+ // Residual rules that help provide a total order.
+ if (lhs.build->estimated_cardinality != rhs.build->estimated_cardinality) {
+ return lhs.build->estimated_cardinality < rhs.build->estimated_cardinality;
+ }
+ if (lhs.probe->table != rhs.probe->table) {
+ return lhs.probe->table < rhs.probe->table;
} else {
- return swapped ^ (lhs->table < rhs->table);
+ return lhs.build->table < rhs.build->table;
}
}
+
+ TableInfo *probe;
+ TableInfo *build;
+ const bool build_side_unique;
+ const std::size_t num_join_attributes;
};
physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input,
@@ -125,6 +171,12 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
const expressions::PredicatePtr &residual_predicate,
const std::vector<expressions::NamedExpressionPtr> &project_expressions);
+ std::size_t getEstimatedNumGroups(const physical::PhysicalPtr &input);
+
+ static std::size_t CountSharedAttributes(
+ const std::unordered_set<expressions::ExprId> &attr_set1,
+ const std::vector<expressions::AttributeReferencePtr> &attr_set2);
+
std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
DISALLOW_COPY_AND_ASSIGN(StarSchemaHashJoinOrderOptimization);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8740f50a/query_optimizer/tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/CMakeLists.txt b/query_optimizer/tests/CMakeLists.txt
index 597dbe0..ac4548a 100644
--- a/query_optimizer/tests/CMakeLists.txt
+++ b/query_optimizer/tests/CMakeLists.txt
@@ -94,22 +94,6 @@ add_executable(quickstep_queryoptimizer_tests_ExecutionGeneratorTest
ExecutionGeneratorTestRunner.hpp
"${PROJECT_SOURCE_DIR}/utility/textbased_test/TextBasedTest.cpp"
"${PROJECT_SOURCE_DIR}/utility/textbased_test/TextBasedTest.hpp")
-add_executable(ExecutionHeuristics_unittest ExecutionHeuristics_unittest.cpp)
-target_link_libraries(ExecutionHeuristics_unittest
- gtest
- gtest_main
- quickstep_catalog_Catalog
- quickstep_catalog_CatalogDatabase
- quickstep_catalog_CatalogTypedefs
- quickstep_queryexecution_QueryContext
- quickstep_queryexecution_QueryContext_proto
- quickstep_queryoptimizer_ExecutionHeuristics
- quickstep_queryoptimizer_QueryPlan
- quickstep_relationaloperators_BuildHashOperator
- quickstep_relationaloperators_HashJoinOperator
- quickstep_utility_Macros)
-add_test(ExecutionHeuristics_unittest ExecutionHeuristics_unittest)
-
add_executable(quickstep_queryoptimizer_tests_OptimizerTextTest
OptimizerTextTest.cpp
OptimizerTextTestRunner.cpp