You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2017/02/02 18:09:44 UTC
[01/10] incubator-quickstep git commit: Add unit test for
CatalogRelationStatistics [Forced Update!]
Repository: incubator-quickstep
Updated Branches:
refs/heads/reduce-group-by-attrs 46ae7487f -> 0f5b0fbb4 (forced update)
Add unit test for CatalogRelationStatistics
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/0f4938ca
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/0f4938ca
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/0f4938ca
Branch: refs/heads/reduce-group-by-attrs
Commit: 0f4938caa29096f18bb699c8f746a733f2262698
Parents: 0780b84
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Mon Jan 23 20:54:51 2017 -0600
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Sun Jan 29 23:13:45 2017 -0800
----------------------------------------------------------------------
catalog/CMakeLists.txt | 27 +++
.../CatalogRelationStatistics_unittest.cpp | 219 +++++++++++++++++++
2 files changed, 246 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f4938ca/catalog/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/catalog/CMakeLists.txt b/catalog/CMakeLists.txt
index 7de9a67..3c64e97 100644
--- a/catalog/CMakeLists.txt
+++ b/catalog/CMakeLists.txt
@@ -225,6 +225,31 @@ target_link_libraries(Catalog_unittest
quickstep_utility_PtrVector)
add_test(Catalog_unittest Catalog_unittest)
+add_executable(CatalogRelationStatistics_unittest
+ "${CMAKE_CURRENT_SOURCE_DIR}/tests/CatalogRelationStatistics_unittest.cpp")
+target_link_libraries(CatalogRelationStatistics_unittest
+ gtest
+ gtest_main
+ quickstep_catalog_Catalog
+ quickstep_catalog_CatalogDatabase
+ quickstep_catalog_CatalogRelation
+ quickstep_catalog_CatalogRelationStatistics
+ quickstep_cli_CommandExecutor
+ quickstep_cli_DropRelation
+ quickstep_parser_ParseStatement
+ quickstep_parser_SqlParserWrapper
+ quickstep_queryexecution_ForemanSingleNode
+ quickstep_queryexecution_QueryExecutionTypedefs
+ quickstep_queryexecution_QueryExecutionUtil
+ quickstep_queryexecution_Worker
+ quickstep_queryexecution_WorkerDirectory
+ quickstep_queryoptimizer_QueryHandle
+ quickstep_queryoptimizer_QueryProcessor
+ quickstep_storage_StorageConstants
+ quickstep_storage_StorageManager
+ tmb)
+add_test(CatalogRelationStatistics_unittest CatalogRelationStatistics_unittest)
+
if(QUICKSTEP_HAVE_LIBNUMA)
add_executable(NUMAPlacementScheme_unittest
"${CMAKE_CURRENT_SOURCE_DIR}/tests/NUMAPlacementScheme_unittest.cpp")
@@ -253,3 +278,5 @@ target_link_libraries(PartitionScheme_unittest
quickstep_types_operations_comparisons_Comparison
quickstep_types_operations_comparisons_EqualComparison)
add_test(PartitionScheme_unittest PartitionScheme_unittest)
+
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/catalog_relation_statistics_test_data)
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f4938ca/catalog/tests/CatalogRelationStatistics_unittest.cpp
----------------------------------------------------------------------
diff --git a/catalog/tests/CatalogRelationStatistics_unittest.cpp b/catalog/tests/CatalogRelationStatistics_unittest.cpp
new file mode 100644
index 0000000..294a6c7
--- /dev/null
+++ b/catalog/tests/CatalogRelationStatistics_unittest.cpp
@@ -0,0 +1,219 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include <cstdio>
+#include <fstream>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "catalog/Catalog.hpp"
+#include "catalog/CatalogDatabase.hpp"
+#include "catalog/CatalogRelation.hpp"
+#include "catalog/CatalogRelationStatistics.hpp"
+#include "cli/CommandExecutor.hpp"
+#include "cli/DropRelation.hpp"
+#include "parser/ParseStatement.hpp"
+#include "parser/SqlParserWrapper.hpp"
+#include "query_execution/ForemanSingleNode.hpp"
+#include "query_execution/QueryExecutionTypedefs.hpp"
+#include "query_execution/QueryExecutionUtil.hpp"
+#include "query_execution/Worker.hpp"
+#include "query_execution/WorkerDirectory.hpp"
+#include "query_optimizer/QueryHandle.hpp"
+#include "query_optimizer/QueryProcessor.hpp"
+#include "storage/StorageConstants.hpp"
+#include "storage/StorageManager.hpp"
+
+#include "glog/logging.h"
+#include "gtest/gtest.h"
+
+#include "tmb/id_typedefs.h"
+
+namespace quickstep {
+
+namespace {
+
+constexpr char kStoragePath[] = "./catalog_relation_statistics_test_data/";
+
+constexpr attribute_id kFirstAttributeId = 0;
+constexpr attribute_id kSecondAttributeId = 1;
+
+} // namespace
+
+class CatalogRelationStatisticsTest : public ::testing::Test {
+ protected:
+ virtual void SetUp() {
+ // Set up the environment for running end-to-end queries.
+ quickstep::ClientIDMap::Instance();
+
+ bus_.Initialize();
+
+ main_thread_client_id_ = bus_.Connect();
+ bus_.RegisterClientAsSender(main_thread_client_id_, kAdmitRequestMessage);
+ bus_.RegisterClientAsSender(main_thread_client_id_, kPoisonMessage);
+ bus_.RegisterClientAsReceiver(main_thread_client_id_, kWorkloadCompletionMessage);
+
+ std::string catalog_path(kStoragePath);
+ catalog_path.append(kCatalogFilename);
+
+ std::ofstream catalog_file(catalog_path.c_str());
+ Catalog catalog;
+ catalog.addDatabase(new CatalogDatabase(nullptr, "default"));
+ catalog.getProto().SerializeToOstream(&catalog_file);
+ catalog_file.close();
+
+ storage_manager_.reset(new StorageManager(kStoragePath));
+ query_processor_.reset(new QueryProcessor(std::move(catalog_path)));
+
+ worker_.reset(new Worker(0, &bus_));
+ worker_directory_.reset(
+ new WorkerDirectory(1, {worker_->getBusClientID()}, {-1}));
+
+ foreman_.reset(
+ new ForemanSingleNode(main_thread_client_id_,
+ worker_directory_.get(),
+ &bus_,
+ query_processor_->getDefaultDatabase(),
+ storage_manager_.get()));
+
+ worker_->start();
+ foreman_->start();
+ }
+
+ virtual void TearDown() {
+ for (const auto &relation : *query_processor_->getDefaultDatabase()) {
+ DropRelation::Drop(relation,
+ query_processor_->getDefaultDatabase(),
+ storage_manager_.get());
+ }
+
+ QueryExecutionUtil::BroadcastPoisonMessage(main_thread_client_id_, &bus_);
+ worker_->join();
+ foreman_->join();
+ }
+
+ void executeQuery(const std::string &query_string) {
+ SqlParserWrapper parser_wrapper;
+ parser_wrapper.feedNextBuffer(new std::string(query_string));
+
+ ParseResult result = parser_wrapper.getNextStatement();
+ DCHECK(result.condition == ParseResult::kSuccess);
+
+ const ParseStatement &statement = *result.parsed_statement;
+ std::unique_ptr<QueryHandle> query_handle =
+ std::make_unique<QueryHandle>(query_processor_->query_id(),
+ main_thread_client_id_,
+ statement.getPriority());
+ query_processor_->generateQueryHandle(statement, query_handle.get());
+
+ QueryExecutionUtil::ConstructAndSendAdmitRequestMessage(
+ main_thread_client_id_,
+ foreman_->getBusClientID(),
+ query_handle.release(),
+ &bus_);
+
+ QueryExecutionUtil::ReceiveQueryCompletionMessage(main_thread_client_id_, &bus_);
+ }
+
+ void executeAnalyze(const std::string &rel_name) {
+ SqlParserWrapper parser_wrapper;
+ parser_wrapper.feedNextBuffer(new std::string("\\analyze " + rel_name));
+
+ ParseResult result = parser_wrapper.getNextStatement();
+ DCHECK(result.condition == ParseResult::kSuccess);
+
+ const ParseStatement &statement = *result.parsed_statement;
+ DCHECK(statement.getStatementType() == ParseStatement::kCommand);
+ quickstep::cli::executeCommand(statement,
+ *(query_processor_->getDefaultDatabase()),
+ main_thread_client_id_,
+ foreman_->getBusClientID(),
+ &bus_,
+ storage_manager_.get(),
+ query_processor_.get(),
+ stdout);
+ }
+
+ const CatalogRelation *getRelationByName(const std::string &rel_name) const {
+ const CatalogRelation *relation =
+ query_processor_->getDefaultDatabase()->getRelationByName(rel_name);
+ DCHECK(relation != nullptr);
+ return relation;
+ }
+
+ private:
+ MessageBusImpl bus_;
+ tmb::client_id main_thread_client_id_;
+
+ std::unique_ptr<StorageManager> storage_manager_;
+ std::unique_ptr<QueryProcessor> query_processor_;
+
+ std::unique_ptr<Worker> worker_;
+ std::unique_ptr<WorkerDirectory> worker_directory_;
+ std::unique_ptr<ForemanSingleNode> foreman_;
+};
+
+TEST_F(CatalogRelationStatisticsTest, AnalyzeTest) {
+ executeQuery("CREATE TABLE analyzetest(x INT, y DOUBLE);");
+ executeQuery("INSERT INTO analyzetest VALUES(0, -0.5);");
+ executeQuery("INSERT INTO analyzetest VALUES(1, 0);");
+ executeQuery("INSERT INTO analyzetest VALUES(0, 0.5);");
+ executeAnalyze("analyzetest");
+
+ const CatalogRelation *relation = getRelationByName("analyzetest");
+ const CatalogRelationStatistics &stat = relation->getStatistics();
+
+ EXPECT_EQ(3u, stat.getNumTuples());
+
+ EXPECT_EQ(2u, stat.getNumDistinctValues(kFirstAttributeId));
+ EXPECT_EQ(0, stat.getMinValue(kFirstAttributeId).getLiteral<int>());
+ EXPECT_EQ(1, stat.getMaxValue(kFirstAttributeId).getLiteral<int>());
+
+ EXPECT_EQ(3u, stat.getNumDistinctValues(kSecondAttributeId));
+ EXPECT_EQ(-0.5, stat.getMinValue(kSecondAttributeId).getLiteral<double>());
+ EXPECT_EQ(0.5, stat.getMaxValue(kSecondAttributeId).getLiteral<double>());
+}
+
+TEST_F(CatalogRelationStatisticsTest, ExactnessTest) {
+ executeQuery("CREATE TABLE exactnesstest(x INT);");
+
+ const CatalogRelationStatistics &stat =
+ getRelationByName("exactnesstest")->getStatistics();
+
+ EXPECT_FALSE(stat.isExact());
+
+ const std::vector<std::string> queries = {
+ "INSERT INTO exactnesstest VALUES(1);",
+ "INSERT INTO exactnesstest SELECT i FROM generate_series(2, 10) AS gs(i);",
+ "DELETE FROM exactnesstest WHERE x = 5;",
+ "UPDATE exactnesstest SET x = 100 WHERE x = 10;"
+ };
+
+ for (const std::string &query : queries) {
+ executeQuery(query);
+ EXPECT_FALSE(stat.isExact());
+
+ executeAnalyze("exactnesstest");
+ EXPECT_TRUE(stat.isExact());
+ }
+}
+
+} // namespace quickstep
[06/10] incubator-quickstep git commit: Fixed the linking issue for
the distributed cli.
Posted by ji...@apache.org.
Fixed the linking issue for the distributed cli.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/dff4a145
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/dff4a145
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/dff4a145
Branch: refs/heads/reduce-group-by-attrs
Commit: dff4a145e2c2d3d7b84fb259e48e425310a52a8a
Parents: 259cd5e
Author: Zuyu Zhang <zu...@apache.org>
Authored: Tue Jan 31 12:19:00 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Tue Jan 31 12:19:00 2017 -0800
----------------------------------------------------------------------
cli/distributed/CMakeLists.txt | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/dff4a145/cli/distributed/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cli/distributed/CMakeLists.txt b/cli/distributed/CMakeLists.txt
index a00ffda..1069abd 100644
--- a/cli/distributed/CMakeLists.txt
+++ b/cli/distributed/CMakeLists.txt
@@ -28,6 +28,7 @@ target_link_libraries(quickstep_cli_distributed_Cli
glog
quickstep_catalog_CatalogRelation
quickstep_cli_Flags
+ quickstep_cli_LineReader
quickstep_cli_PrintToScreen
quickstep_cli_distributed_Role
quickstep_parser_ParseStatement
[10/10] incubator-quickstep git commit: Reduce the number of group-by
attributes by pulling tables up aggregations
Posted by ji...@apache.org.
Reduce the number of group-by attributes by pulling tables up aggregations
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/0f5b0fbb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/0f5b0fbb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/0f5b0fbb
Branch: refs/heads/reduce-group-by-attrs
Commit: 0f5b0fbb456642d89c57a5d0215ebc0fdca3b6ce
Parents: 4ba819c
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Sun Jan 29 18:36:14 2017 -0600
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Thu Feb 2 12:09:10 2017 -0600
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 1 +
query_optimizer/Optimizer.cpp | 3 +-
query_optimizer/Optimizer.hpp | 2 -
query_optimizer/PhysicalGenerator.cpp | 6 +
query_optimizer/PhysicalGenerator.hpp | 11 +-
query_optimizer/rules/CMakeLists.txt | 22 ++
.../rules/ReduceGroupByAttributes.cpp | 211 +++++++++++++++++++
.../rules/ReduceGroupByAttributes.hpp | 143 +++++++++++++
query_optimizer/tests/OptimizerTest.cpp | 2 +-
.../tests/OptimizerTextTestRunner.cpp | 7 +-
.../tests/OptimizerTextTestRunner.hpp | 3 +-
11 files changed, 401 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f5b0fbb/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 7f90e11..bc9a52f 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -212,6 +212,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_rules_InjectJoinFilters
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
+ quickstep_queryoptimizer_rules_ReduceGroupByAttributes
quickstep_queryoptimizer_rules_ReorderColumns
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
quickstep_queryoptimizer_rules_SwapProbeBuild
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f5b0fbb/query_optimizer/Optimizer.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/Optimizer.cpp b/query_optimizer/Optimizer.cpp
index b14c938..1b91574 100644
--- a/query_optimizer/Optimizer.cpp
+++ b/query_optimizer/Optimizer.cpp
@@ -30,10 +30,11 @@ void Optimizer::generateQueryHandle(const ParseStatement &parse_statement,
OptimizerContext *optimizer_context,
QueryHandle *query_handle) {
LogicalGenerator logical_generator(optimizer_context);
+ PhysicalGenerator physical_generator(optimizer_context);
ExecutionGenerator execution_generator(catalog_database, query_handle);
execution_generator.generatePlan(
- physical_generator_.generatePlan(
+ physical_generator.generatePlan(
logical_generator.generatePlan(*catalog_database, parse_statement)));
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f5b0fbb/query_optimizer/Optimizer.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/Optimizer.hpp b/query_optimizer/Optimizer.hpp
index 36f956a..227dd04 100644
--- a/query_optimizer/Optimizer.hpp
+++ b/query_optimizer/Optimizer.hpp
@@ -70,8 +70,6 @@ class Optimizer {
QueryHandle *query_handle);
private:
- PhysicalGenerator physical_generator_;
-
DISALLOW_COPY_AND_ASSIGN(Optimizer);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f5b0fbb/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 5dc0ffb..5c6651b 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -29,8 +29,12 @@
#include "query_optimizer/rules/AttachLIPFilters.hpp"
#include "query_optimizer/rules/InjectJoinFilters.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
+<<<<<<< 4ba819c5b82af1d9284525bd7a16784e0254be3f
#include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
#include "query_optimizer/rules/ReorderColumns.hpp"
+=======
+#include "query_optimizer/rules/ReduceGroupByAttributes.hpp"
+>>>>>>> Reduce the number of group-by attributes by pulling tables up aggregations
#include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
#include "query_optimizer/rules/SwapProbeBuild.hpp"
#include "query_optimizer/strategy/Aggregate.hpp"
@@ -127,6 +131,8 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
// general FusePhysical optimization) in the future.
rules.emplace_back(new PushDownLowCostDisjunctivePredicate());
+ rules.emplace_back(new ReduceGroupByAttributes(optimizer_context_));
+
if (FLAGS_reorder_hash_joins) {
rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
rules.emplace_back(new PruneColumns());
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f5b0fbb/query_optimizer/PhysicalGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.hpp b/query_optimizer/PhysicalGenerator.hpp
index 886a173..42fea86 100644
--- a/query_optimizer/PhysicalGenerator.hpp
+++ b/query_optimizer/PhysicalGenerator.hpp
@@ -33,6 +33,8 @@
namespace quickstep {
namespace optimizer {
+class OptimizerContext;
+
/** \addtogroup QueryOptimizer
* @{
*/
@@ -43,9 +45,12 @@ namespace optimizer {
class PhysicalGenerator : public LogicalToPhysicalMapper {
public:
/**
- * @brief Constructor
+ * @brief Constructor.
+ *
+ * @param optimizer_context The optimizer context.
*/
- PhysicalGenerator() {
+ explicit PhysicalGenerator(OptimizerContext *optimizer_context)
+ : optimizer_context_(optimizer_context) {
createStrategies();
}
@@ -125,6 +130,8 @@ class PhysicalGenerator : public LogicalToPhysicalMapper {
*/
std::unordered_map<logical::LogicalPtr, physical::PhysicalPtr> logical_to_physical_map_;
+ OptimizerContext *optimizer_context_;
+
/**
* @brief The complete physical plan.
*/
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f5b0fbb/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 223c78c..0586f00 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -29,6 +29,9 @@ add_library(quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
PushDownLowCostDisjunctivePredicate.cpp
PushDownLowCostDisjunctivePredicate.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin PushDownSemiAntiJoin.cpp PushDownSemiAntiJoin.hpp)
+add_library(quickstep_queryoptimizer_rules_ReduceGroupByAttributes
+ ReduceGroupByAttributes.cpp
+ ReduceGroupByAttributes.hpp)
add_library(quickstep_queryoptimizer_rules_ReorderColumns ReorderColumns.cpp ReorderColumns.hpp)
add_library(quickstep_queryoptimizer_rules_Rule ../../empty_src.cpp Rule.hpp)
add_library(quickstep_queryoptimizer_rules_RuleHelper RuleHelper.cpp RuleHelper.hpp)
@@ -143,6 +146,25 @@ target_link_libraries(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
quickstep_queryoptimizer_logical_PatternMatcher
quickstep_queryoptimizer_rules_TopDownRule
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_ReduceGroupByAttributes
+ quickstep_catalog_CatalogRelation
+ quickstep_queryoptimizer_OptimizerContext
+ quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExprId
+ quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_expressions_NamedExpression
+ quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_TableReference
+ quickstep_queryoptimizer_physical_TopLevelPlan
+ quickstep_queryoptimizer_rules_PruneColumns
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_types_TypeID
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_ReorderColumns
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ExprId
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f5b0fbb/query_optimizer/rules/ReduceGroupByAttributes.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReduceGroupByAttributes.cpp b/query_optimizer/rules/ReduceGroupByAttributes.cpp
new file mode 100644
index 0000000..99e17b3
--- /dev/null
+++ b/query_optimizer/rules/ReduceGroupByAttributes.cpp
@@ -0,0 +1,211 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/ReduceGroupByAttributes.hpp"
+
+#include <algorithm>
+#include <map>
+#include <vector>
+#include <unordered_set>
+#include <utility>
+
+#include "catalog/CatalogRelation.hpp"
+#include "query_optimizer/OptimizerContext.hpp"
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/rules/PruneColumns.hpp"
+#include "types/TypeID.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr ReduceGroupByAttributes::apply(const P::PhysicalPtr &input) {
+ DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+ cost_model_.reset(new cost::StarSchemaSimpleCostModel(
+ std::static_pointer_cast<const P::TopLevelPlan>(input)->shared_subplans()));
+
+ P::PhysicalPtr output = applyInternal(input);
+ if (output != input) {
+ output = PruneColumns().apply(output);
+ }
+ return output;
+}
+
+P::PhysicalPtr ReduceGroupByAttributes::applyInternal(const P::PhysicalPtr &input) {
+ std::vector<P::PhysicalPtr> new_children;
+ for (const P::PhysicalPtr &child : input->children()) {
+ new_children.push_back(applyInternal(child));
+ }
+
+ if (new_children != input->children()) {
+ return applyToNode(input->copyWithNewChildren(new_children));
+ } else {
+ return applyToNode(input);
+ }
+}
+
+P::PhysicalPtr ReduceGroupByAttributes::applyToNode(const P::PhysicalPtr &input) {
+ P::TableReferencePtr table_reference;
+ if (P::SomeTableReference::MatchesWithConditionalCast(input, &table_reference)) {
+ // Collect the attributes-to-TableReference mapping info.
+ for (const auto &attr : table_reference->attribute_list()) {
+ source_.emplace(attr->id(), std::make_pair(table_reference, attr));
+ }
+ return input;
+ }
+
+ P::AggregatePtr aggregate;
+ if (!P::SomeAggregate::MatchesWithConditionalCast(input, &aggregate) ||
+ aggregate->grouping_expressions().size() <= 1u) {
+ return input;
+ }
+
+ // Divide the group-by attributes into groups based on their source table.
+ std::map<P::TableReferencePtr, std::vector<E::AttributeReferencePtr>> table_attributes;
+ for (const auto &expr : aggregate->grouping_expressions()) {
+ const auto source_it = source_.find(expr->id());
+ if (source_it != source_.end()) {
+ table_attributes[source_it->second.first].emplace_back(source_it->second.second);
+ }
+ }
+
+ std::unordered_set<E::ExprId> erased_grouping_attr_ids;
+ std::vector<std::pair<P::TableReferencePtr, E::AttributeReferencePtr>> hoisted_tables;
+
+ // For each group (i.e. each source table), if it is profitable then we pull
+ // the table up the aggregation.
+ for (const auto &pair : table_attributes) {
+ const P::TableReferencePtr table = pair.first;
+ const std::vector<E::AttributeReferencePtr> &attributes = pair.second;
+ // TODO(jianqiao): find a cost-based metic instead of hard-coding the threshold
+ // number of group-by attributes.
+ if (attributes.size() <= 3u) {
+ continue;
+ }
+
+ std::vector<AttributeInfo> attr_infos;
+ for (const auto &attr : attributes) {
+ attr_infos.emplace_back(attr,
+ cost_model_->impliesUniqueAttributes(table, {attr}),
+ !attr->getValueType().isVariableLength(),
+ attr->getValueType().maximumByteLength());
+ }
+
+ std::vector<const AttributeInfo *> attr_info_refs;
+ for (const auto &info : attr_infos) {
+ attr_info_refs.emplace_back(&info);
+ }
+ std::sort(attr_info_refs.begin(),
+ attr_info_refs.end(),
+ AttributeInfo::IsBetterThan);
+
+ const AttributeInfo &best_candidate = *attr_info_refs.front();
+ if (!best_candidate.is_unique) {
+ // Cannot find a key attribute. Give up pulling this table up.
+ continue;
+ }
+
+ const E::AttributeReferencePtr key_attribute = best_candidate.attribute;
+ hoisted_tables.emplace_back(table, key_attribute);
+
+ for (const auto &attr : attributes) {
+ if (attr->id() != key_attribute->id()) {
+ erased_grouping_attr_ids.emplace(attr->id());
+ }
+ }
+ }
+
+ if (erased_grouping_attr_ids.empty()) {
+ return input;
+ }
+
+ // Reconstuct the Aggregate node with reduced group-by attributes and then
+ // construct HashJoin nodes on top of the Aggregate.
+ std::vector<E::NamedExpressionPtr> reduced_grouping_expressions;
+ for (const auto &expr : aggregate->grouping_expressions()) {
+ if (erased_grouping_attr_ids.find(expr->id()) == erased_grouping_attr_ids.end()) {
+ reduced_grouping_expressions.emplace_back(expr);
+ }
+ }
+
+ const P::AggregatePtr new_aggregate =
+ P::Aggregate::Create(aggregate->input(),
+ reduced_grouping_expressions,
+ aggregate->aggregate_expressions(),
+ aggregate->filter_predicate());
+
+ P::PhysicalPtr output = new_aggregate;
+ std::vector<E::NamedExpressionPtr> project_expressions =
+ E::ToNamedExpressions(output->getOutputAttributes());
+ for (const auto &pair : hoisted_tables) {
+ const P::TableReferencePtr &source_table = pair.first;
+ const E::AttributeReferencePtr &probe_attribute = pair.second;
+
+ E::AttributeReferencePtr build_attribute;
+ std::vector<E::AttributeReferencePtr> new_attribute_list;
+ for (const auto &attr : source_table->attribute_list()) {
+ if (attr->id() == probe_attribute->id()) {
+ build_attribute =
+ E::AttributeReference::Create(optimizer_context_->nextExprId(),
+ attr->attribute_name(),
+ attr->attribute_alias(),
+ attr->relation_name(),
+ attr->getValueType(),
+ E::AttributeReferenceScope::kLocal);
+ new_attribute_list.emplace_back(build_attribute);
+ } else {
+ new_attribute_list.emplace_back(attr);
+ project_expressions.emplace_back(attr);
+ }
+ }
+
+ DCHECK(build_attribute != nullptr);
+ const P::TableReferencePtr build_side_table =
+ P::TableReference::Create(source_table->relation(),
+ source_table->relation()->getName(),
+ new_attribute_list);
+ output = P::HashJoin::Create(output,
+ build_side_table,
+ {probe_attribute},
+ {build_attribute},
+ nullptr,
+ project_expressions,
+ P::HashJoin::JoinType::kInnerJoin);
+ }
+
+ return output;
+}
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f5b0fbb/query_optimizer/rules/ReduceGroupByAttributes.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReduceGroupByAttributes.hpp b/query_optimizer/rules/ReduceGroupByAttributes.hpp
new file mode 100644
index 0000000..5a1f295
--- /dev/null
+++ b/query_optimizer/rules/ReduceGroupByAttributes.hpp
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_REDUCE_GROUP_BY_ATTRIBUTES_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_REDUCE_GROUP_BY_ATTRIBUTES_HPP_
+
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+class OptimizerContext;
+
+/**
+ * @brief Rule that applies to a physical plan to reduce the number of group-by
+ * attributes for Aggregate nodes (to improve performance) by pulling
+ * joins up the aggregations.
+ *
+ * For example, let R be a relation with PRIMARY KEY x and attributes y, z. Let
+ * S be a relation with FOREIGN KEY u refering to R(x) and attribute v. Then the
+ * optimization rule will transform the physical plan:
+ * Aggregate(
+ * [input relation]: HashJoin(
+ * [probe relation]: S
+ * [build relation]: R
+ * [join expression]: S.u = R.x
+ * [project attributes]: v, x, y, z
+ * )
+ * [aggregate expression]: SUM(v) AS sum_v
+ * [group-by attributes]: x, y, z
+ * )
+ *
+ * into:
+ * HashJoin(
+ * [probe relation]: Aggregate(
+ * [input relation]: S
+ * [aggregate expression]: SUM(v) AS sum_v
+ * [group-by attribute]: u
+ * ) AS T
+ * [build relation]: R
+ * [join expression]: T.u = R.x
+ * [project attributes]: sum_v, x, y, z
+ * )
+ */
+class ReduceGroupByAttributes : public Rule<physical::Physical> {
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * @param optimizer_context The optimizer context.
+ */
+ explicit ReduceGroupByAttributes(OptimizerContext *optimizer_context)
+ : optimizer_context_(optimizer_context) {}
+
+ ~ReduceGroupByAttributes() override {}
+
+ std::string getName() const override {
+ return "ReduceGroupByAttributes";
+ }
+
+ physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+ struct AttributeInfo {
+ AttributeInfo(const expressions::AttributeReferencePtr &attribute_in,
+ const bool is_unique_in,
+ const bool is_fixed_length_in,
+ const std::size_t maximum_size_in)
+ : attribute(attribute_in),
+ is_unique(is_unique_in),
+ is_fixed_length(is_fixed_length_in),
+ maximum_size(maximum_size_in) {}
+
+ // In the situation that there are multiple attributes that can serve as the
+ // group-by key, we define an ordering based on aggregation performance (e.g.
+ // it is faster to do aggregation with a fix-length attribute as the group-by
+ // key than with a variable-length attribute).
+ inline static bool IsBetterThan(const AttributeInfo *lhs,
+ const AttributeInfo *rhs) {
+ if (lhs->is_unique != rhs->is_unique) {
+ return lhs->is_unique;
+ }
+ if (lhs->is_fixed_length != rhs->is_fixed_length) {
+ return lhs->is_fixed_length;
+ }
+ if (lhs->maximum_size != rhs->maximum_size) {
+ return lhs->maximum_size < rhs->maximum_size;
+ }
+ return lhs->attribute->id() < rhs->attribute->id();
+ }
+
+ const expressions::AttributeReferencePtr attribute;
+ const bool is_unique;
+ const bool is_fixed_length;
+ const std::size_t maximum_size;
+ };
+
+ physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input);
+ physical::PhysicalPtr applyToNode(const physical::PhysicalPtr &input);
+
+ OptimizerContext *optimizer_context_;
+ std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+
+ // Maps an attribute's id to the TableReference that generates the attribute.
+ std::unordered_map<expressions::ExprId,
+ std::pair<physical::TableReferencePtr,
+ expressions::AttributeReferencePtr>> source_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReduceGroupByAttributes);
+};
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_REDUCE_GROUP_BY_ATTRIBUTES_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f5b0fbb/query_optimizer/tests/OptimizerTest.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/OptimizerTest.cpp b/query_optimizer/tests/OptimizerTest.cpp
index 3838638..7eb7a11 100644
--- a/query_optimizer/tests/OptimizerTest.cpp
+++ b/query_optimizer/tests/OptimizerTest.cpp
@@ -62,7 +62,7 @@ OptimizerTest::OptimizerTest()
catalog_database_(
new CatalogDatabase(catalog_.get(), "TestDatabase" /* name */, 0)),
optimizer_context_(new OptimizerContext),
- physical_generator_(new PhysicalGenerator()) {}
+ physical_generator_(new PhysicalGenerator(optimizer_context_.get())) {}
E::AliasPtr OptimizerTest::createAlias(const E::ExpressionPtr &expression,
const std::string &attribute_name,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f5b0fbb/query_optimizer/tests/OptimizerTextTestRunner.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/OptimizerTextTestRunner.cpp b/query_optimizer/tests/OptimizerTextTestRunner.cpp
index b9238c9..cb8f153 100644
--- a/query_optimizer/tests/OptimizerTextTestRunner.cpp
+++ b/query_optimizer/tests/OptimizerTextTestRunner.cpp
@@ -80,7 +80,7 @@ void OptimizerTextTestRunner::runTestCase(const std::string &input,
}
if (output_physical_plan) {
physical_plan =
- generatePhysicalPlan(optimized_logical_plan);
+ generatePhysicalPlan(optimized_logical_plan, &optimizer_context);
++num_options;
}
@@ -126,8 +126,9 @@ logical::LogicalPtr OptimizerTextTestRunner::generateLogicalPlan(
}
physical::PhysicalPtr OptimizerTextTestRunner::generatePhysicalPlan(
- const logical::LogicalPtr &logical_plan) {
- PhysicalGenerator physical_generator;
+ const logical::LogicalPtr &logical_plan,
+ OptimizerContext *optimizer_context) {
+ PhysicalGenerator physical_generator(optimizer_context);
return physical_generator.generatePlan(logical_plan);
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f5b0fbb/query_optimizer/tests/OptimizerTextTestRunner.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/OptimizerTextTestRunner.hpp b/query_optimizer/tests/OptimizerTextTestRunner.hpp
index 27fa14f..d8f604b 100644
--- a/query_optimizer/tests/OptimizerTextTestRunner.hpp
+++ b/query_optimizer/tests/OptimizerTextTestRunner.hpp
@@ -73,7 +73,8 @@ class OptimizerTextTestRunner : public TextBasedTestRunner {
OptimizerContext *optimizer_context);
physical::PhysicalPtr generatePhysicalPlan(
- const logical::LogicalPtr &logical_plan);
+ const logical::LogicalPtr &logical_plan,
+ OptimizerContext *optimizer_context);
SqlParserWrapper sql_parser_;
TestDatabaseLoader test_database_loader_;
[05/10] incubator-quickstep git commit: Push down low cost
disjunctive predicates to filter the stored relations early
Posted by ji...@apache.org.
Push down low cost disjunctive predicates to filter the stored relations early
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/259cd5e7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/259cd5e7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/259cd5e7
Branch: refs/heads/reduce-group-by-attrs
Commit: 259cd5e731ead6e38f546c66211aceb3c20f6f4d
Parents: 6d83b46
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Mon Jan 30 01:02:19 2017 -0600
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Tue Jan 31 10:59:08 2017 -0600
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 1 +
query_optimizer/PhysicalGenerator.cpp | 15 ++
query_optimizer/rules/CMakeLists.txt | 24 ++
.../PushDownLowCostDisjunctivePredicate.cpp | 225 +++++++++++++++++++
.../PushDownLowCostDisjunctivePredicate.hpp | 116 ++++++++++
5 files changed, 381 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index e8bc21c..0ca971d 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -207,6 +207,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_rules_AttachLIPFilters
quickstep_queryoptimizer_rules_PruneColumns
+ quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
quickstep_queryoptimizer_rules_ReorderColumns
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
quickstep_queryoptimizer_rules_SwapProbeBuild
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index e12f8be..bd05267 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -28,6 +28,7 @@
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/rules/AttachLIPFilters.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
+#include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
#include "query_optimizer/rules/ReorderColumns.hpp"
#include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
#include "query_optimizer/rules/SwapProbeBuild.hpp"
@@ -108,12 +109,22 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan(
P::PhysicalPtr PhysicalGenerator::optimizePlan() {
std::vector<std::unique_ptr<Rule<P::Physical>>> rules;
rules.emplace_back(new PruneColumns());
+
+ // TODO(jianqiao): It is possible for PushDownLowCostDisjunctivePredicate to
+ // generate two chaining Selection nodes that can actually be fused into one.
+ // Note that currently it is okay to have the two Selections because they are
+ // applied to a small cardinality stored relation, which is very light-weight.
+ // However it is better to have a FuseSelection optimization (or even a more
+ // general FusePhysical optimization) in the future.
+ rules.emplace_back(new PushDownLowCostDisjunctivePredicate());
+
if (FLAGS_reorder_hash_joins) {
rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
rules.emplace_back(new PruneColumns());
} else {
rules.emplace_back(new SwapProbeBuild());
}
+
if (FLAGS_reorder_columns) {
// NOTE(jianqiao): This optimization relies on the fact that the intermediate
// relations all have SPLIT_ROW_STORE layouts. If this fact gets changed, the
@@ -121,6 +132,10 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
// should be re-evaluated.
rules.emplace_back(new ReorderColumns());
}
+
+ // NOTE(jianqiao): Adding rules after AttachLIPFilters requires extra handling
+ // of LIPFilterConfiguration for transformed nodes. So currently it is suggested
+ // that all the new rules be placed before this point.
if (FLAGS_use_lip_filters) {
rules.emplace_back(new AttachLIPFilters());
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index fe2fd17..86d1ef7 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -24,6 +24,9 @@ add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp C
add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
add_library(quickstep_queryoptimizer_rules_PruneColumns PruneColumns.cpp PruneColumns.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownFilter PushDownFilter.cpp PushDownFilter.hpp)
+add_library(quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
+ PushDownLowCostDisjunctivePredicate.cpp
+ PushDownLowCostDisjunctivePredicate.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin PushDownSemiAntiJoin.cpp PushDownSemiAntiJoin.hpp)
add_library(quickstep_queryoptimizer_rules_ReorderColumns ReorderColumns.cpp ReorderColumns.hpp)
add_library(quickstep_queryoptimizer_rules_Rule ../../empty_src.cpp Rule.hpp)
@@ -111,6 +114,26 @@ target_link_libraries(quickstep_queryoptimizer_rules_PushDownFilter
quickstep_queryoptimizer_rules_RuleHelper
quickstep_queryoptimizer_rules_TopDownRule
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
+ ${GFLAGS_LIB_NAME}
+ quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_expressions_LogicalAnd
+ quickstep_queryoptimizer_expressions_LogicalOr
+ quickstep_queryoptimizer_expressions_PatternMatcher
+ quickstep_queryoptimizer_expressions_Predicate
+ quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_NestedLoopsJoin
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_Selection
+ quickstep_queryoptimizer_physical_TableReference
+ quickstep_queryoptimizer_physical_TopLevelPlan
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ExpressionUtil
@@ -225,6 +248,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
quickstep_queryoptimizer_rules_GenerateJoins
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_PushDownFilter
+ quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
quickstep_queryoptimizer_rules_ReorderColumns
quickstep_queryoptimizer_rules_Rule
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp
new file mode 100644
index 0000000..e39f155
--- /dev/null
+++ b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp
@@ -0,0 +1,225 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
+
+#include <cstddef>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/LogicalAnd.hpp"
+#include "query_optimizer/expressions/LogicalOr.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/NestedLoopsJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+
+#include "gflags/gflags.h"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+DEFINE_uint64(push_down_disjunctive_predicate_cardinality_threshold, 100u,
+ "The cardinality threshold for a stored relation for the "
+ "PushDownLowCostDisjunctivePredicate optimization rule to push "
+ "down a disjunctive predicate to pre-filter that relation.");
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr PushDownLowCostDisjunctivePredicate::apply(const P::PhysicalPtr &input) {
+ DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+
+ const P::TopLevelPlanPtr top_level_plan =
+ std::static_pointer_cast<const P::TopLevelPlan>(input);
+ cost_model_.reset(
+ new cost::StarSchemaSimpleCostModel(
+ top_level_plan->shared_subplans()));
+
+ collectApplicablePredicates(input);
+
+ if (!applicable_predicates_.empty()) {
+ // Apply the selected predicates to stored relations.
+ return attachPredicates(input);
+ } else {
+ return input;
+ }
+}
+
+void PushDownLowCostDisjunctivePredicate::collectApplicablePredicates(
+ const physical::PhysicalPtr &input) {
+ P::TableReferencePtr table_reference;
+ if (P::SomeTableReference::MatchesWithConditionalCast(input, &table_reference)) {
+ // Consider only stored relations with small cardinality as targets.
+ if (cost_model_->estimateCardinality(input) <=
+ FLAGS_push_down_disjunctive_predicate_cardinality_threshold) {
+ applicable_nodes_.emplace_back(input, &table_reference->attribute_list());
+ }
+ return;
+ }
+
+ for (const auto &child : input->children()) {
+ collectApplicablePredicates(child);
+ }
+
+ E::PredicatePtr filter_predicate = nullptr;
+ switch (input->getPhysicalType()) {
+ case P::PhysicalType::kAggregate: {
+ filter_predicate =
+ std::static_pointer_cast<const P::Aggregate>(input)->filter_predicate();
+ break;
+ }
+ case P::PhysicalType::kHashJoin: {
+ const P::HashJoinPtr hash_join =
+ std::static_pointer_cast<const P::HashJoin>(input);
+ if (hash_join->join_type() == P::HashJoin::JoinType::kInnerJoin) {
+ filter_predicate = hash_join->residual_predicate();
+ }
+ break;
+ }
+ case P::PhysicalType::kNestedLoopsJoin: {
+ filter_predicate =
+ std::static_pointer_cast<const P::NestedLoopsJoin>(input)->join_predicate();
+ break;
+ }
+ case P::PhysicalType::kSelection: {
+ filter_predicate =
+ std::static_pointer_cast<const P::Selection>(input)->filter_predicate();
+ break;
+ }
+ default:
+ break;
+ }
+
+ E::LogicalOrPtr disjunctive_predicate;
+ if (filter_predicate == nullptr ||
+ !E::SomeLogicalOr::MatchesWithConditionalCast(filter_predicate, &disjunctive_predicate)) {
+ return;
+ }
+
+ // Consider only disjunctive normal form, i.e. disjunction of conjunctions.
+ // Divide the disjunctive components into groups.
+ std::vector<std::vector<E::PredicatePtr>> candidate_predicates;
+ std::vector<std::vector<std::vector<E::AttributeReferencePtr>>> candidate_attributes;
+ for (const auto &conjunctive_predicate : disjunctive_predicate->operands()) {
+ candidate_predicates.emplace_back();
+ candidate_attributes.emplace_back();
+ E::LogicalAndPtr logical_and;
+ if (E::SomeLogicalAnd::MatchesWithConditionalCast(conjunctive_predicate, &logical_and)) {
+ for (const auto &predicate : logical_and->operands()) {
+ candidate_predicates.back().emplace_back(predicate);
+ candidate_attributes.back().emplace_back(
+ predicate->getReferencedAttributes());
+ }
+ } else {
+ candidate_predicates.back().emplace_back(conjunctive_predicate);
+ candidate_attributes.back().emplace_back(
+ conjunctive_predicate->getReferencedAttributes());
+ }
+ }
+
+ // Check whether the conditions are met for pushing down part of the predicates
+ // to each small-cardinality stored relation.
+ for (const auto &node_pair : applicable_nodes_) {
+ const std::vector<E::AttributeReferencePtr> &target_attributes = *node_pair.second;
+ std::vector<E::PredicatePtr> selected_disj_preds;
+ for (std::size_t i = 0; i < candidate_predicates.size(); ++i) {
+ const auto &cand_preds = candidate_predicates[i];
+ const auto &cand_attrs = candidate_attributes[i];
+
+ std::vector<E::PredicatePtr> selected_conj_preds;
+ for (std::size_t j = 0; j < cand_preds.size(); ++j) {
+ if (E::SubsetOfExpressions(cand_attrs[j], target_attributes)) {
+ selected_conj_preds.emplace_back(cand_preds[j]);
+ }
+ }
+ if (selected_conj_preds.empty()) {
+ // Not every disjunctive component contains a predicate that can be applied
+ // to the table reference node -- condition failed, exit.
+ selected_disj_preds.clear();
+ break;
+ } else {
+ selected_disj_preds.emplace_back(
+ CreateConjunctive(selected_conj_preds));
+ }
+ }
+ if (!selected_disj_preds.empty()) {
+ applicable_predicates_[node_pair.first].add(
+ CreateDisjunctive(selected_disj_preds));
+ }
+ }
+}
+
+P::PhysicalPtr PushDownLowCostDisjunctivePredicate::attachPredicates(
+ const P::PhysicalPtr &input) const {
+ std::vector<P::PhysicalPtr> new_children;
+ for (const P::PhysicalPtr &child : input->children()) {
+ const P::PhysicalPtr new_child = attachPredicates(child);
+ new_children.push_back(new_child);
+ }
+
+ const P::PhysicalPtr output =
+ new_children == input->children() ? input
+ : input->copyWithNewChildren(new_children);
+
+ const auto &node_it = applicable_predicates_.find(input);
+ if (node_it != applicable_predicates_.end()) {
+ const E::PredicatePtr filter_predicate =
+ CreateConjunctive(node_it->second.predicates);
+ return P::Selection::Create(output,
+ E::ToNamedExpressions(output->getOutputAttributes()),
+ filter_predicate);
+ }
+
+ return output;
+}
+
+E::PredicatePtr PushDownLowCostDisjunctivePredicate::CreateConjunctive(
+ const std::vector<E::PredicatePtr> predicates) {
+ DCHECK_GE(predicates.size(), 1u);
+ if (predicates.size() == 1) {
+ return predicates.front();
+ } else {
+ return E::LogicalAnd::Create(predicates);
+ }
+}
+
+E::PredicatePtr PushDownLowCostDisjunctivePredicate::CreateDisjunctive(
+ const std::vector<E::PredicatePtr> predicates) {
+ DCHECK_GE(predicates.size(), 1u);
+ if (predicates.size() == 1) {
+ return predicates.front();
+ } else {
+ return E::LogicalOr::Create(predicates);
+ }
+}
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/259cd5e7/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp
new file mode 100644
index 0000000..3e4b602
--- /dev/null
+++ b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_PUSH_DOWN_LOW_COST_DISJUNCTIVE_PREDICATE_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_PUSH_DOWN_LOW_COST_DISJUNCTIVE_PREDICATE_HPP_
+
+#include <cstddef>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+/**
+ * @brief Rule that applies to a physical plan to push down low-cost disjunctive
+ * predicate when proper conditions are met.
+ *
+ * Here we elaborate the conditions.
+ *
+ * Let
+ * P = p_{1,1} AND ... AND p_{1, m_1} OR ... OR p_{n,1} AND ... AND p_{n, m_n}
+ * be a predicate in disjunctive normal form.
+ *
+ * Now consider each small-cardinality relation R, if for each i in 1..n, there
+ * exists at least one predicate p_{i, k_i} that is applicable to R. Then we can
+ * construct a new predicate
+ * P' = p_{1, k_1} OR ... OR p_{n, k_n}
+ * and push down P' to be applied to R.
+ *
+ * Also, if any conjunctive component in P contains more than one predicate that
+ * is applicable to R, then we can combine all these applicable predicates as a
+ * conjunctive component in P'.
+ *
+ * Finally, note that if there exists a conjunctive component that contains no
+ * predicate applicable to R. Then the condition fails and we cannot do a push
+ * down for R.
+ */
+class PushDownLowCostDisjunctivePredicate : public Rule<physical::Physical> {
+ public:
+ /**
+ * @brief Constructor.
+ */
+ PushDownLowCostDisjunctivePredicate() {}
+
+ ~PushDownLowCostDisjunctivePredicate() override {}
+
+ std::string getName() const override {
+ return "PushDownLowCostDisjunctivePredicate";
+ }
+
+ physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+ struct PredicateInfo {
+ PredicateInfo() {}
+ inline void add(expressions::PredicatePtr predicate) {
+ predicates.emplace_back(predicate);
+ }
+ std::vector<expressions::PredicatePtr> predicates;
+ };
+
+ void collectApplicablePredicates(const physical::PhysicalPtr &input);
+
+ physical::PhysicalPtr attachPredicates(const physical::PhysicalPtr &input) const;
+
+ static expressions::PredicatePtr CreateConjunctive(
+ const std::vector<expressions::PredicatePtr> predicates);
+
+ static expressions::PredicatePtr CreateDisjunctive(
+ const std::vector<expressions::PredicatePtr> predicates);
+
+ std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+
+ std::vector<std::pair<physical::PhysicalPtr,
+ const std::vector<expressions::AttributeReferencePtr> *>> applicable_nodes_;
+ std::map<physical::PhysicalPtr, PredicateInfo> applicable_predicates_;
+
+ DISALLOW_COPY_AND_ASSIGN(PushDownLowCostDisjunctivePredicate);
+};
+
+/** @} */
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_PUSH_DOWN_LOW_COST_DISJUNCTIVE_PREDICATE_HPP_
[02/10] incubator-quickstep git commit: Minor refactor for
InsertDestinations.
Posted by ji...@apache.org.
Minor refactor for InsertDestinations.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/f2e77266
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/f2e77266
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/f2e77266
Branch: refs/heads/reduce-group-by-attrs
Commit: f2e77266edeaff38a60650b48836ff6ddb3b84ca
Parents: 0f4938c
Author: Zuyu Zhang <zu...@apache.org>
Authored: Mon Jan 30 15:24:03 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Mon Jan 30 15:24:03 2017 -0800
----------------------------------------------------------------------
storage/InsertDestination.cpp | 17 ++++-------------
storage/InsertDestination.hpp | 4 +++-
storage/InsertDestinationInterface.hpp | 2 +-
storage/StorageBlock.hpp | 2 +-
4 files changed, 9 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f2e77266/storage/InsertDestination.cpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.cpp b/storage/InsertDestination.cpp
index 944998f..714e6e5 100644
--- a/storage/InsertDestination.cpp
+++ b/storage/InsertDestination.cpp
@@ -290,7 +290,6 @@ void InsertDestination::bulkInsertTuplesFromValueAccessors(
ValueAccessor *accessor = p.first;
std::vector<attribute_id> attribute_map = p.second;
-
InvokeOnAnyValueAccessor(
accessor,
[&](auto *accessor) -> void { // NOLINT(build/c++11)
@@ -621,11 +620,10 @@ void PartitionAwareInsertDestination::bulkInsertTuples(ValueAccessor *accessor,
&always_mark_full,
&num_partitions](auto *accessor) -> void { // NOLINT(build/c++11)
std::vector<std::unique_ptr<TupleIdSequence>> partition_membership;
- partition_membership.resize(num_partitions);
// Create a tuple-id sequence for each partition.
for (std::size_t partition = 0; partition < num_partitions; ++partition) {
- partition_membership[partition].reset(new TupleIdSequence(accessor->getEndPosition()));
+ partition_membership.emplace_back(std::make_unique<TupleIdSequence>(accessor->getEndPosition()));
}
// Iterate over ValueAccessor for each tuple,
@@ -641,9 +639,8 @@ void PartitionAwareInsertDestination::bulkInsertTuples(ValueAccessor *accessor,
// TupleIdSequence.
std::vector<std::unique_ptr<typename std::remove_pointer<
decltype(accessor->createSharedTupleIdSequenceAdapter(*partition_membership.front()))>::type>> adapter;
- adapter.resize(num_partitions);
for (std::size_t partition = 0; partition < num_partitions; ++partition) {
- adapter[partition].reset(accessor->createSharedTupleIdSequenceAdapter(*partition_membership[partition]));
+ adapter.emplace_back(accessor->createSharedTupleIdSequenceAdapter(*partition_membership[partition]));
}
// Bulk-insert into a block belonging to the partition.
@@ -678,11 +675,10 @@ void PartitionAwareInsertDestination::bulkInsertTuplesWithRemappedAttributes(
&always_mark_full,
&num_partitions](auto *accessor) -> void { // NOLINT(build/c++11)
std::vector<std::unique_ptr<TupleIdSequence>> partition_membership;
- partition_membership.resize(num_partitions);
// Create a tuple-id sequence for each partition.
for (std::size_t partition = 0; partition < num_partitions; ++partition) {
- partition_membership[partition].reset(new TupleIdSequence(accessor->getEndPosition()));
+ partition_membership.emplace_back(std::make_unique<TupleIdSequence>(accessor->getEndPosition()));
}
// Iterate over ValueAccessor for each tuple,
@@ -698,9 +694,8 @@ void PartitionAwareInsertDestination::bulkInsertTuplesWithRemappedAttributes(
// TupleIdSequence.
std::vector<std::unique_ptr<typename std::remove_pointer<
decltype(accessor->createSharedTupleIdSequenceAdapter(*partition_membership.front()))>::type>> adapter;
- adapter.resize(num_partitions);
for (std::size_t partition = 0; partition < num_partitions; ++partition) {
- adapter[partition].reset(accessor->createSharedTupleIdSequenceAdapter(*partition_membership[partition]));
+ adapter.emplace_back(accessor->createSharedTupleIdSequenceAdapter(*partition_membership[partition]));
}
// Bulk-insert into a block belonging to the partition.
@@ -742,10 +737,6 @@ void PartitionAwareInsertDestination::insertTuplesFromVector(std::vector<Tuple>:
}
}
-MutableBlockReference PartitionAwareInsertDestination::getBlockForInsertion() {
- FATAL_ERROR("PartitionAwareInsertDestination::getBlockForInsertion needs a partition id as an argument.");
-}
-
MutableBlockReference PartitionAwareInsertDestination::getBlockForInsertionInPartition(const partition_id part_id) {
DCHECK_LT(part_id, partition_scheme_header_->getNumPartitions());
SpinMutexLock lock(mutexes_for_partition_[part_id]);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f2e77266/storage/InsertDestination.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.hpp b/storage/InsertDestination.hpp
index c3c40bd..6707192 100644
--- a/storage/InsertDestination.hpp
+++ b/storage/InsertDestination.hpp
@@ -539,7 +539,9 @@ class PartitionAwareInsertDestination : public InsertDestination {
std::vector<Tuple>::const_iterator end) override;
protected:
- MutableBlockReference getBlockForInsertion() override;
+ MutableBlockReference getBlockForInsertion() override {
+ LOG(FATAL) << "PartitionAwareInsertDestination::getBlockForInsertion needs a partition id as an argument.";
+ }
/**
* @brief Get a block to use for insertion from a partition.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f2e77266/storage/InsertDestinationInterface.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestinationInterface.hpp b/storage/InsertDestinationInterface.hpp
index b62d3e5..be6b0c2 100644
--- a/storage/InsertDestinationInterface.hpp
+++ b/storage/InsertDestinationInterface.hpp
@@ -131,7 +131,7 @@ class InsertDestinationInterface {
*
* @param accessor_attribute_map A vector of pairs of ValueAccessor and
* corresponding attribute map
- * The i-th attribute ID in the attr map for a value accessor is "n"
+ * The i-th attribute ID in the attr map for a value accessor is "n"
* if the attribute_id "i" in the output relation
* is the attribute_id "n" in corresponding input value accessor.
* Set the i-th element to kInvalidCatalogId if it doesn't come from
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f2e77266/storage/StorageBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.hpp b/storage/StorageBlock.hpp
index ed252c5..16ea50f 100644
--- a/storage/StorageBlock.hpp
+++ b/storage/StorageBlock.hpp
@@ -325,7 +325,7 @@ class StorageBlock : public StorageBlockBase {
* function with the appropriate attribute_map for each value
* accessor (InsertDestination::bulkInsertTuplesFromValueAccessors
* handles all the details) to insert tuples without an extra temp copy.
- *
+ *
* @warning Must call bulkInsertPartialTuplesFinalize() to update the header,
* until which point, the insertion is not visible to others.
* @warning The inserted tuples may be placed in sub-optimal locations in this
[03/10] incubator-quickstep git commit: Minor refactor for
HashJoinInnerJoin.
Posted by ji...@apache.org.
Minor refactor for HashJoinInnerJoin.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/23e14b8e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/23e14b8e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/23e14b8e
Branch: refs/heads/reduce-group-by-attrs
Commit: 23e14b8e078f42a8d3e5f6c0c4885dee271d99aa
Parents: f2e7726
Author: Zuyu Zhang <zu...@apache.org>
Authored: Mon Jan 30 15:28:49 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Mon Jan 30 20:21:23 2017 -0800
----------------------------------------------------------------------
relational_operators/CMakeLists.txt | 1 +
relational_operators/HashJoinOperator.cpp | 42 ++++++++++++++------------
2 files changed, 23 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/23e14b8e/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index c1caaa3..b2e08cf 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -199,6 +199,7 @@ target_link_libraries(quickstep_relationaloperators_FinalizeAggregationOperator
target_link_libraries(quickstep_relationaloperators_HashJoinOperator
${GFLAGS_LIB_NAME}
glog
+ quickstep_catalog_CatalogAttribute
quickstep_catalog_CatalogRelation
quickstep_catalog_CatalogRelationSchema
quickstep_catalog_CatalogTypedefs
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/23e14b8e/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index fd3841f..7394554 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -25,6 +25,7 @@
#include <utility>
#include <vector>
+#include "catalog/CatalogAttribute.hpp"
#include "catalog/CatalogRelation.hpp"
#include "catalog/CatalogRelationSchema.hpp"
#include "catalog/CatalogTypedefs.hpp"
@@ -165,6 +166,12 @@ class OuterJoinTupleCollector {
TupleIdSequence *filter_;
};
+// For InnerJoin.
+constexpr std::size_t kNumValueAccessors = 3u;
+constexpr std::size_t kBuildValueAccessorIndex = 0,
+ kProbeValueAccessorIndex = 1u,
+ kTempResultValueAccessorIndex = 2u;
+
} // namespace
bool HashJoinOperator::getAllWorkOrders(
@@ -565,31 +572,27 @@ void HashInnerJoinWorkOrder::execute() {
});
}
-
// We also need a temp value accessor to store results of any scalar expressions.
ColumnVectorsValueAccessor temp_result;
// Create a map of ValueAccessors and what attributes we want to pick from them
- std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map;
- const std::vector<ValueAccessor *> accessors{
- ordered_build_accessor.get(), ordered_probe_accessor.get(), &temp_result};
- const unsigned int build_index = 0, probe_index = 1, temp_index = 2;
- for (auto &accessor : accessors) {
- accessor_attribute_map.push_back(std::make_pair(
- accessor,
- std::vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
- }
+ std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map(
+ kNumValueAccessors, std::make_pair(nullptr, // A late binding ValueAccessor.
+ vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
- attribute_id dest_attr = 0;
- std::vector<std::pair<tuple_id, tuple_id>> zipped_joined_tuple_ids;
+ accessor_attribute_map[kBuildValueAccessorIndex].first = ordered_build_accessor.get();
+ accessor_attribute_map[kProbeValueAccessorIndex].first = ordered_probe_accessor.get();
+ accessor_attribute_map[kTempResultValueAccessorIndex].first = &temp_result;
+ attribute_id dest_attr = 0;
for (auto &selection_cit : selection_) {
// If the Scalar (column) is not an attribute in build/probe blocks, then
// insert it into a ColumnVectorsValueAccessor.
if (selection_cit->getDataSource() != Scalar::ScalarDataSource::kAttribute) {
// Current destination attribute maps to the column we'll create now.
- accessor_attribute_map[temp_index].second[dest_attr] = temp_result.getNumColumns();
+ accessor_attribute_map[kTempResultValueAccessorIndex].second[dest_attr] = temp_result.getNumColumns();
+ std::vector<std::pair<tuple_id, tuple_id>> zipped_joined_tuple_ids;
if (temp_result.getNumColumns() == 0) {
// The getAllValuesForJoin function below needs joined tuple IDs as
// a vector of pair of (build-tuple-ID, probe-tuple-ID), and we have
@@ -599,9 +602,8 @@ void HashInnerJoinWorkOrder::execute() {
// they don't have scalar expressions with attributes from both
// build and probe relations (other expressions would have been
// pushed down to before the join).
- zipped_joined_tuple_ids.reserve(build_tids.size());
for (std::size_t i = 0; i < build_tids.size(); ++i) {
- zipped_joined_tuple_ids.push_back(std::make_pair(build_tids[i], probe_tids[i]));
+ zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]);
}
}
temp_result.addColumn(
@@ -610,12 +612,12 @@ void HashInnerJoinWorkOrder::execute() {
probe_relation_id, probe_accessor.get(),
zipped_joined_tuple_ids));
} else {
- auto scalar_attr = static_cast<const ScalarAttribute *>(selection_cit.get());
- const attribute_id attr_id = scalar_attr->getAttribute().getID();
- if (scalar_attr->getAttribute().getParent().getID() == build_relation_id) {
- accessor_attribute_map[build_index].second[dest_attr] = attr_id;
+ const CatalogAttribute &attr = static_cast<const ScalarAttribute *>(selection_cit.get())->getAttribute();
+ const attribute_id attr_id = attr.getID();
+ if (attr.getParent().getID() == build_relation_id) {
+ accessor_attribute_map[kBuildValueAccessorIndex].second[dest_attr] = attr_id;
} else {
- accessor_attribute_map[probe_index].second[dest_attr] = attr_id;
+ accessor_attribute_map[kProbeValueAccessorIndex].second[dest_attr] = attr_id;
}
}
++dest_attr;
[07/10] incubator-quickstep git commit: Enabled some checks for the
distributed version in the release build.
Posted by ji...@apache.org.
Enabled some checks for the distributed version in the release build.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/5ffdaaf9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/5ffdaaf9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/5ffdaaf9
Branch: refs/heads/reduce-group-by-attrs
Commit: 5ffdaaf9f9d42cb25ffcbaf59cfafc049dcaca27
Parents: dff4a14
Author: Zuyu Zhang <zu...@apache.org>
Authored: Tue Jan 31 14:45:27 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Tue Jan 31 14:45:27 2017 -0800
----------------------------------------------------------------------
cli/distributed/Cli.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5ffdaaf9/cli/distributed/Cli.cpp
----------------------------------------------------------------------
diff --git a/cli/distributed/Cli.cpp b/cli/distributed/Cli.cpp
index 01f824d..5af70e6 100644
--- a/cli/distributed/Cli.cpp
+++ b/cli/distributed/Cli.cpp
@@ -95,13 +95,13 @@ void Cli::init() {
tmb::MessageStyle style;
TaggedMessage cli_reg_message(kDistributedCliRegistrationMessage);
- DCHECK(tmb::MessageBus::SendStatus::kOK ==
+ CHECK(tmb::MessageBus::SendStatus::kOK ==
bus_.Send(cli_id_, all_addresses, style, move(cli_reg_message)));
// Wait for Conductor to response.
const AnnotatedMessage cli_reg_response_message(bus_.Receive(cli_id_, 0, true));
- DCHECK_EQ(kDistributedCliRegistrationResponseMessage,
- cli_reg_response_message.tagged_message.message_type());
+ CHECK_EQ(kDistributedCliRegistrationResponseMessage,
+ cli_reg_response_message.tagged_message.message_type());
conductor_client_id_ = cli_reg_response_message.sender;
DLOG(INFO) << "DistributedCli received typed '" << kDistributedCliRegistrationResponseMessage
[04/10] incubator-quickstep git commit: Reorder output attribute
order to improve copy performance.
Posted by ji...@apache.org.
Reorder output attribute order to improve copy performance.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/6d83b46a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/6d83b46a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/6d83b46a
Branch: refs/heads/reduce-group-by-attrs
Commit: 6d83b46af25b35fb0b3a23452b6fbd2842b33793
Parents: 23e14b8
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Thu Jan 12 18:41:17 2017 -0600
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Tue Jan 31 00:10:45 2017 -0800
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 1 +
query_optimizer/PhysicalGenerator.cpp | 12 +
query_optimizer/rules/CMakeLists.txt | 14 +
query_optimizer/rules/ReorderColumns.cpp | 214 ++++++++++++++++
query_optimizer/rules/ReorderColumns.hpp | 75 ++++++
query_optimizer/tests/OptimizerTextTest.cpp | 6 +-
relational_operators/CMakeLists.txt | 1 +
relational_operators/HashJoinOperator.cpp | 254 +++++++++++--------
relational_operators/HashJoinOperator.hpp | 4 +
storage/SplitRowStoreValueAccessor.hpp | 5 +
storage/ValueAccessor.hpp | 30 +++
types/containers/ColumnVectorsValueAccessor.hpp | 5 +
12 files changed, 515 insertions(+), 106 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index b6c794d..e8bc21c 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -207,6 +207,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_rules_AttachLIPFilters
quickstep_queryoptimizer_rules_PruneColumns
+ quickstep_queryoptimizer_rules_ReorderColumns
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
quickstep_queryoptimizer_rules_SwapProbeBuild
quickstep_queryoptimizer_strategy_Aggregate
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 7cb97dc..e12f8be 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -28,6 +28,7 @@
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/rules/AttachLIPFilters.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
+#include "query_optimizer/rules/ReorderColumns.hpp"
#include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
#include "query_optimizer/rules/SwapProbeBuild.hpp"
#include "query_optimizer/strategy/Aggregate.hpp"
@@ -44,6 +45,10 @@
namespace quickstep {
namespace optimizer {
+DEFINE_bool(reorder_columns, true,
+ "Adjust the ordering of intermediate relations' columns to improve "
+ "copy performance.");
+
DEFINE_bool(reorder_hash_joins, true,
"If true, apply hash join order optimization to each group of hash "
"joins. The optimization applies a greedy algorithm to favor smaller "
@@ -109,6 +114,13 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
} else {
rules.emplace_back(new SwapProbeBuild());
}
+ if (FLAGS_reorder_columns) {
+ // NOTE(jianqiao): This optimization relies on the fact that the intermediate
+ // relations all have SPLIT_ROW_STORE layouts. If this fact gets changed, the
+ // optimization algorithm may need to be updated and the performance impact
+ // should be re-evaluated.
+ rules.emplace_back(new ReorderColumns());
+ }
if (FLAGS_use_lip_filters) {
rules.emplace_back(new AttachLIPFilters());
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 7fffadc..fe2fd17 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -25,6 +25,7 @@ add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp Gener
add_library(quickstep_queryoptimizer_rules_PruneColumns PruneColumns.cpp PruneColumns.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownFilter PushDownFilter.cpp PushDownFilter.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin PushDownSemiAntiJoin.cpp PushDownSemiAntiJoin.hpp)
+add_library(quickstep_queryoptimizer_rules_ReorderColumns ReorderColumns.cpp ReorderColumns.hpp)
add_library(quickstep_queryoptimizer_rules_Rule ../../empty_src.cpp Rule.hpp)
add_library(quickstep_queryoptimizer_rules_RuleHelper RuleHelper.cpp RuleHelper.hpp)
add_library(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
@@ -118,6 +119,18 @@ target_link_libraries(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
quickstep_queryoptimizer_logical_PatternMatcher
quickstep_queryoptimizer_rules_TopDownRule
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_ReorderColumns
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExprId
+ quickstep_queryoptimizer_expressions_NamedExpression
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_Selection
+ quickstep_queryoptimizer_physical_TableReference
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_Rule
glog
quickstep_utility_Macros)
@@ -213,6 +226,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_PushDownFilter
quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
+ quickstep_queryoptimizer_rules_ReorderColumns
quickstep_queryoptimizer_rules_Rule
quickstep_queryoptimizer_rules_RuleHelper
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/query_optimizer/rules/ReorderColumns.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReorderColumns.cpp b/query_optimizer/rules/ReorderColumns.cpp
new file mode 100644
index 0000000..f7e58d5
--- /dev/null
+++ b/query_optimizer/rules/ReorderColumns.cpp
@@ -0,0 +1,214 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/ReorderColumns.hpp"
+
+#include <algorithm>
+#include <cstddef>
+#include <limits>
+#include <unordered_map>
+#include <vector>
+
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr ReorderColumns::apply(const P::PhysicalPtr &input) {
+ DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+
+ return applyInternal(input, true);
+}
+
+P::PhysicalPtr ReorderColumns::applyInternal(const P::PhysicalPtr &input,
+ const bool lock_ordering) {
+ // We have to guarantee that the top level ordering of the columns remain
+ // unchanged so that the output columns are ordered as specified by the user.
+ // So here we use the flag "lock_ordering" to skip the first transformable
+ // node (i.e. the first Selection or HashJoin).
+ const bool is_not_transformable = !IsTransformable(input);
+ const bool skip_transform = lock_ordering || is_not_transformable;
+
+ if (skip_transform) {
+ std::vector<P::PhysicalPtr> new_children;
+ for (const P::PhysicalPtr &child : input->children()) {
+ new_children.emplace_back(applyInternal(child, lock_ordering && is_not_transformable));
+ }
+
+ if (new_children != input->children()) {
+ return input->copyWithNewChildren(new_children);
+ } else {
+ return input;
+ }
+ }
+
+ // Collect the maximal chain of transformable nodes.
+ std::vector<P::PhysicalPtr> nodes;
+ for (P::PhysicalPtr node = input; IsTransformable(node); node = node->children().front()) {
+ nodes.emplace_back(node);
+ }
+ // Arrange the nodes with bottom-up order.
+ std::reverse(nodes.begin(), nodes.end());
+
+ // A greedy algorithm that reorders the output attributes based on the GEN/KILL
+ // intervals. This algorithm works well with SSB/TPCH queries and is not likely
+ // to make the plans worse for whatever queries.
+ //
+ // Here is a brief explanation of the three data structure base/gen/kill.
+ // (1) base: maps each attribute's id to its position in the BASE relation's
+ // output attributes. Note that the base relation is the child
+ // relation of nodes[0].
+ // (2) gen: maps each attribute's id to the MINIMUM index i such that the
+ // attribute is among nodes[i]'s output attributes. I.e. node i
+ // GENERATEs the attribute.
+ // (3) kill: maps each attribute's id to the MAXIMUM index i such that the
+ // attribute is among nodes[i]'s output attributes. I.e. node i+1
+ // KILLs the attribute.
+ std::unordered_map<E::ExprId, std::size_t> base, gen, kill;
+
+ const P::PhysicalPtr base_node =
+ applyInternal(nodes.front()->children().front(), false);
+ const std::vector<E::AttributeReferencePtr> base_attrs =
+ base_node->getOutputAttributes();
+ for (std::size_t i = 0; i < base_attrs.size(); ++i) {
+ base.emplace(base_attrs[i]->id(), i);
+ }
+
+ for (std::size_t i = 0; i < nodes.size(); ++i) {
+ for (const auto &attr : nodes[i]->getOutputAttributes()) {
+ const E::ExprId attr_id = attr->id();
+ if (gen.find(attr_id) == gen.end()) {
+ gen.emplace(attr_id, i);
+ }
+ kill[attr_id] = i;
+ }
+ }
+
+ // TODO(jianqiao): implement this comparator as a standalone and well-documented
+ // struct.
+ const auto comparator = [&gen, &kill, &base](const E::NamedExpressionPtr &lhs,
+ const E::NamedExpressionPtr &rhs) -> bool {
+ const E::ExprId lhs_id = lhs->id();
+ const E::ExprId rhs_id = rhs->id();
+
+ // Sort the attributes first by GEN location.
+ const std::size_t lhs_gen = gen.at(lhs_id);
+ const std::size_t rhs_gen = gen.at(rhs_id);
+ if (lhs_gen != rhs_gen) {
+ return lhs_gen < rhs_gen;
+ }
+
+ // Then by KILL location.
+ const std::size_t lhs_kill = kill.at(lhs_id);
+ const std::size_t rhs_kill = kill.at(rhs_id);
+ if (lhs_kill != rhs_kill) {
+ return lhs_kill < rhs_kill;
+ }
+
+ // Finally by the ordering in the base relaton.
+ const auto lhs_base_it = base.find(lhs_id);
+ const auto rhs_base_it = base.find(rhs_id);
+ const std::size_t lhs_base =
+ lhs_base_it == base.end() ? std::numeric_limits<std::size_t>::max()
+ : lhs_base_it->second;
+ const std::size_t rhs_base =
+ rhs_base_it == base.end() ? std::numeric_limits<std::size_t>::max()
+ : rhs_base_it->second;
+ if (lhs_base != rhs_base) {
+ return lhs_base < rhs_base;
+ }
+
+ return lhs_id < rhs_id;
+ };
+
+ P::PhysicalPtr output = base_node;
+
+ for (const auto &node : nodes) {
+ std::vector<E::NamedExpressionPtr> project_expressions;
+ switch (node->getPhysicalType()) {
+ case P::PhysicalType::kHashJoin: {
+ project_expressions =
+ std::static_pointer_cast<const P::HashJoin>(node)->project_expressions();
+ break;
+ }
+ case P::PhysicalType::kSelection: {
+ project_expressions =
+ std::static_pointer_cast<const P::Selection>(node)->project_expressions();
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported physical type";
+ }
+
+ std::sort(project_expressions.begin(), project_expressions.end(), comparator);
+
+ switch (node->getPhysicalType()) {
+ case P::PhysicalType::kHashJoin: {
+ const P::HashJoinPtr old_node =
+ std::static_pointer_cast<const P::HashJoin>(node);
+ output = P::HashJoin::Create(output,
+ applyInternal(old_node->right(), false),
+ old_node->left_join_attributes(),
+ old_node->right_join_attributes(),
+ old_node->residual_predicate(),
+ project_expressions,
+ old_node->join_type());
+ break;
+ }
+ case P::PhysicalType::kSelection: {
+ const P::SelectionPtr old_node =
+ std::static_pointer_cast<const P::Selection>(node);
+ output = P::Selection::Create(output,
+ project_expressions,
+ old_node->filter_predicate());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported physical type";
+ }
+ }
+
+ return output;
+}
+
+bool ReorderColumns::IsTransformable(const physical::PhysicalPtr &input) {
+ switch (input->getPhysicalType()) {
+ case P::PhysicalType::kHashJoin: // Fall through
+ case P::PhysicalType::kSelection:
+ return true;
+ default:
+ return false;
+ }
+}
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/query_optimizer/rules/ReorderColumns.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReorderColumns.hpp b/query_optimizer/rules/ReorderColumns.hpp
new file mode 100644
index 0000000..36fa183
--- /dev/null
+++ b/query_optimizer/rules/ReorderColumns.hpp
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_REORDER_COLUMNS_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_REORDER_COLUMNS_HPP_
+
+#include <string>
+
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+/**
+ * @brief Rule that applies to a physical plan to adjust the orderings of some
+ * intermediate nodes' output attributes to improve copy performance.
+ *
+ * @note This optimization is based on the fact that the intermediate relations
+ * all have SPLIT_ROW_STORE layouts. If this fact gets changed, the rule's
+ * algorithm may need to be updated and the performance impact should be
+ * re-evaluated.
+ */
+class ReorderColumns : public Rule<physical::Physical> {
+ public:
+ /**
+ * @brief Constructor.
+ */
+ ReorderColumns() {}
+
+ ~ReorderColumns() override {}
+
+ std::string getName() const override {
+ return "ReorderColumns";
+ }
+
+ physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+ physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input,
+ const bool lock_ordering);
+
+ // Whether the physical node can
+ inline static bool IsTransformable(const physical::PhysicalPtr &input);
+
+ DISALLOW_COPY_AND_ASSIGN(ReorderColumns);
+};
+
+/** @} */
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_REORDER_COLUMNS_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/query_optimizer/tests/OptimizerTextTest.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/OptimizerTextTest.cpp b/query_optimizer/tests/OptimizerTextTest.cpp
index 759c173..e17f5c4 100644
--- a/query_optimizer/tests/OptimizerTextTest.cpp
+++ b/query_optimizer/tests/OptimizerTextTest.cpp
@@ -31,6 +31,7 @@
namespace quickstep {
namespace optimizer {
+DECLARE_bool(reorder_columns);
DECLARE_bool(reorder_hash_joins);
DECLARE_bool(use_lip_filters);
@@ -58,8 +59,9 @@ int main(int argc, char** argv) {
test_driver->registerOptions(
quickstep::optimizer::OptimizerTextTestRunner::kTestOptions);
- // Turn off join order optimization and LIPFilter for optimizer test since
- // it is up to change and affects a large number of test cases.
+ // Turn off some optimization rules for optimizer test since they are up to
+ // change and affects a large number of test cases.
+ quickstep::optimizer::FLAGS_reorder_columns = false;
quickstep::optimizer::FLAGS_reorder_hash_joins = false;
quickstep::optimizer::FLAGS_use_lip_filters = false;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index b2e08cf..c8447f3 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -207,6 +207,7 @@ target_link_libraries(quickstep_relationaloperators_HashJoinOperator
quickstep_catalog_PartitionSchemeHeader
quickstep_expressions_predicate_Predicate
quickstep_expressions_scalar_Scalar
+ quickstep_expressions_scalar_ScalarAttribute
quickstep_queryexecution_QueryContext
quickstep_queryexecution_WorkOrderProtosContainer
quickstep_queryexecution_WorkOrdersContainer
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index 7394554..0e75411 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -31,6 +31,7 @@
#include "catalog/CatalogTypedefs.hpp"
#include "expressions/predicate/Predicate.hpp"
#include "expressions/scalar/Scalar.hpp"
+#include "expressions/scalar/ScalarAttribute.hpp"
#include "query_execution/QueryContext.hpp"
#include "query_execution/WorkOrderProtosContainer.hpp"
#include "query_execution/WorkOrdersContainer.hpp"
@@ -64,6 +65,9 @@ namespace quickstep {
namespace {
+typedef std::vector<std::pair<tuple_id, tuple_id>> VectorOfTupleIdPair;
+typedef std::pair<std::vector<tuple_id>, std::vector<tuple_id>> PairOfTupleIdVector;
+
// Functor passed to HashTable::getAllFromValueAccessor() to collect matching
// tuples from the inner relation. It stores matching tuple ID pairs
// in an unordered_map keyed by inner block ID and a vector of
@@ -83,8 +87,7 @@ class VectorsOfPairsJoinedTuplesCollector {
// key is inner block_id, values are vectors of joined tuple ID pairs with
// tuple ID from the inner block on the left and the outer block on the
// right.
- inline std::unordered_map<block_id, std::vector<std::pair<tuple_id, tuple_id>>>*
- getJoinedTuples() {
+ inline std::unordered_map<block_id, VectorOfTupleIdPair>* getJoinedTuples() {
return &joined_tuples_;
}
@@ -94,7 +97,7 @@ class VectorsOfPairsJoinedTuplesCollector {
// cross-product of all tuples from both blocks, but simply using pairs of
// tuple-IDs is expected to be more space efficient if the result set is less
// than 1/64 the cardinality of the cross-product.
- std::unordered_map<block_id, std::vector<std::pair<tuple_id, tuple_id>>> joined_tuples_;
+ std::unordered_map<block_id, VectorOfTupleIdPair> joined_tuples_;
};
// Another collector using an unordered_map keyed on inner block just like above,
@@ -107,15 +110,15 @@ class PairsOfVectorsJoinedTuplesCollector {
template <typename ValueAccessorT>
inline void operator()(const ValueAccessorT &accessor,
const TupleReference &tref) {
- joined_tuples_[tref.block].first.push_back(tref.tuple);
- joined_tuples_[tref.block].second.push_back(accessor.getCurrentPosition());
+ auto &entry = joined_tuples_[tref.block];
+ entry.first.emplace_back(tref.tuple);
+ entry.second.emplace_back(accessor.getCurrentPosition());
}
// Get a mutable pointer to the collected map of joined tuple ID pairs. The
// key is inner block_id, value is a pair consisting of
// inner block tuple IDs (first) and outer block tuple IDs (second).
- inline std::unordered_map< block_id, std::pair<std::vector<tuple_id>, std::vector<tuple_id>>>*
- getJoinedTuples() {
+ inline std::unordered_map<block_id, PairOfTupleIdVector>* getJoinedTuples() {
return &joined_tuples_;
}
@@ -166,12 +169,6 @@ class OuterJoinTupleCollector {
TupleIdSequence *filter_;
};
-// For InnerJoin.
-constexpr std::size_t kNumValueAccessors = 3u;
-constexpr std::size_t kBuildValueAccessorIndex = 0,
- kProbeValueAccessorIndex = 1u,
- kTempResultValueAccessorIndex = 2u;
-
} // namespace
bool HashJoinOperator::getAllWorkOrders(
@@ -473,16 +470,93 @@ void HashInnerJoinWorkOrder::execute() {
base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
}
+ if (probe_accessor->getImplementationType() == ValueAccessor::Implementation::kSplitRowStore) {
+ executeWithCopyElision(probe_accessor.get());
+ } else {
+ executeWithoutCopyElision(probe_accessor.get());
+ }
+}
+
+void HashInnerJoinWorkOrder::executeWithoutCopyElision(ValueAccessor *probe_accessor) {
+ VectorsOfPairsJoinedTuplesCollector collector;
+ if (join_key_attributes_.size() == 1) {
+ hash_table_.getAllFromValueAccessor(
+ probe_accessor,
+ join_key_attributes_.front(),
+ any_join_key_attributes_nullable_,
+ &collector);
+ } else {
+ hash_table_.getAllFromValueAccessorCompositeKey(
+ probe_accessor,
+ join_key_attributes_,
+ any_join_key_attributes_nullable_,
+ &collector);
+ }
+
+ const relation_id build_relation_id = build_relation_.getID();
+ const relation_id probe_relation_id = probe_relation_.getID();
+
+ for (std::pair<const block_id, VectorOfTupleIdPair>
+ &build_block_entry : *collector.getJoinedTuples()) {
+ BlockReference build_block =
+ storage_manager_->getBlock(build_block_entry.first, build_relation_);
+ const TupleStorageSubBlock &build_store = build_block->getTupleStorageSubBlock();
+ std::unique_ptr<ValueAccessor> build_accessor(build_store.createValueAccessor());
+
+ // Evaluate '*residual_predicate_', if any.
+ //
+ // TODO(chasseur): We might consider implementing true vectorized
+ // evaluation for join predicates that are not equijoins (although in
+ // general that would require evaluating and materializing some expressions
+ // over the cross-product of all tuples in a pair of blocks in order to
+ // evaluate the predicate). We could use a heuristic where we only do the
+ // vectorized materialization and evaluation if the set of matches from the
+ // hash join is below a reasonable threshold so that we don't blow up
+ // temporary memory requirements to an unreasonable degree.
+ if (residual_predicate_ != nullptr) {
+ VectorOfTupleIdPair filtered_matches;
+
+ for (const std::pair<tuple_id, tuple_id> &hash_match
+ : build_block_entry.second) {
+ if (residual_predicate_->matchesForJoinedTuples(*build_accessor,
+ build_relation_id,
+ hash_match.first,
+ *probe_accessor,
+ probe_relation_id,
+ hash_match.second)) {
+ filtered_matches.emplace_back(hash_match);
+ }
+ }
+
+ build_block_entry.second = std::move(filtered_matches);
+ }
+
+ ColumnVectorsValueAccessor temp_result;
+ for (auto selection_cit = selection_.begin();
+ selection_cit != selection_.end();
+ ++selection_cit) {
+ temp_result.addColumn((*selection_cit)->getAllValuesForJoin(build_relation_id,
+ build_accessor.get(),
+ probe_relation_id,
+ probe_accessor,
+ build_block_entry.second));
+ }
+
+ output_destination_->bulkInsertTuples(&temp_result);
+ }
+}
+
+void HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor *probe_accessor) {
PairsOfVectorsJoinedTuplesCollector collector;
if (join_key_attributes_.size() == 1) {
hash_table_.getAllFromValueAccessor(
- probe_accessor.get(),
+ probe_accessor,
join_key_attributes_.front(),
any_join_key_attributes_nullable_,
&collector);
} else {
hash_table_.getAllFromValueAccessorCompositeKey(
- probe_accessor.get(),
+ probe_accessor,
join_key_attributes_,
any_join_key_attributes_nullable_,
&collector);
@@ -491,7 +565,37 @@ void HashInnerJoinWorkOrder::execute() {
const relation_id build_relation_id = build_relation_.getID();
const relation_id probe_relation_id = probe_relation_.getID();
- for (std::pair<const block_id, std::pair<std::vector<tuple_id>, std::vector<tuple_id>>>
+ constexpr std::size_t kNumIndexes = 3u;
+ constexpr std::size_t kBuildIndex = 0, kProbeIndex = 1u, kTempIndex = 2u;
+
+ // Create a map of ValueAccessors and what attributes we want to pick from them.
+ std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map(
+ kNumIndexes, std::make_pair(nullptr /* late binding ValueAccessor */,
+ vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
+
+ std::vector<const Scalar *> non_trivial_expressions;
+ attribute_id dest_attr = 0;
+
+ for (const auto &scalar : selection_) {
+ // If the Scalar (column) is not an attribute in build/probe blocks, we will
+ // insert it into a ColumnVectorsValueAccessor.
+ if (scalar->getDataSource() != Scalar::ScalarDataSource::kAttribute) {
+ // Current destination attribute maps to the column we'll create now.
+ accessor_attribute_map[kTempIndex].second[dest_attr] = non_trivial_expressions.size();
+ non_trivial_expressions.emplace_back(scalar.get());
+ } else {
+ const CatalogAttribute &attr = static_cast<const ScalarAttribute *>(scalar.get())->getAttribute();
+ const attribute_id attr_id = attr.getID();
+ if (attr.getParent().getID() == build_relation_id) {
+ accessor_attribute_map[kBuildIndex].second[dest_attr] = attr_id;
+ } else {
+ accessor_attribute_map[kProbeIndex].second[dest_attr] = attr_id;
+ }
+ }
+ ++dest_attr;
+ }
+
+ for (std::pair<const block_id, PairOfTupleIdVector>
&build_block_entry : *collector.getJoinedTuples()) {
BlockReference build_block =
storage_manager_->getBlock(build_block_entry.first, build_relation_);
@@ -511,7 +615,8 @@ void HashInnerJoinWorkOrder::execute() {
// hash join is below a reasonable threshold so that we don't blow up
// temporary memory requirements to an unreasonable degree.
if (residual_predicate_ != nullptr) {
- std::pair<std::vector<tuple_id>, std::vector<tuple_id>> filtered_matches;
+ PairOfTupleIdVector filtered_matches;
+
for (std::size_t i = 0; i < build_tids.size(); ++i) {
if (residual_predicate_->matchesForJoinedTuples(*build_accessor,
build_relation_id,
@@ -519,110 +624,51 @@ void HashInnerJoinWorkOrder::execute() {
*probe_accessor,
probe_relation_id,
probe_tids[i])) {
- filtered_matches.first.push_back(build_tids[i]);
- filtered_matches.second.push_back(probe_tids[i]);
+ filtered_matches.first.emplace_back(build_tids[i]);
+ filtered_matches.second.emplace_back(probe_tids[i]);
}
}
build_block_entry.second = std::move(filtered_matches);
}
- // TODO(chasseur): If all the output expressions are ScalarAttributes,
- // we could implement a similar fast-path to StorageBlock::selectSimple()
- // that avoids a copy.
- //
// TODO(chasseur): See TODO in NestedLoopsJoinOperator.cpp about limiting
// the size of materialized temporary results. In common usage, this
// probably won't be an issue for hash-joins, but in the worst case a hash
// join can still devolve into a cross-product.
- //
- // NOTE(chasseur): We could also create one big ColumnVectorsValueAccessor
- // and accumulate all the results across multiple block pairs into it
- // before inserting anything into output blocks, but this would require
- // some significant API extensions to the expressions system for a dubious
- // benefit (probably only a real performance win when there are very few
- // matching tuples in each individual inner block but very many inner
- // blocks with at least one match).
-
- // We now create ordered value accessors for both build and probe side,
- // using the joined tuple TIDs. Note that we have to use this Lambda-based
- // invocation method here because the accessors don't have a virtual
- // function that creates such an OrderedTupleIdSequenceAdapterValueAccessor.
- std::unique_ptr<ValueAccessor> ordered_build_accessor, ordered_probe_accessor;
- InvokeOnValueAccessorNotAdapter(
- build_accessor.get(),
- [&](auto *accessor) -> void { // NOLINT(build/c++11)
- ordered_build_accessor.reset(
- accessor->createSharedOrderedTupleIdSequenceAdapter(build_tids));
- });
-
- if (probe_accessor->isTupleIdSequenceAdapter()) {
- InvokeOnTupleIdSequenceAdapterValueAccessor(
- probe_accessor.get(),
- [&](auto *accessor) -> void { // NOLINT(build/c++11)
- ordered_probe_accessor.reset(
- accessor->createSharedOrderedTupleIdSequenceAdapter(probe_tids));
- });
- } else {
- InvokeOnValueAccessorNotAdapter(
- probe_accessor.get(),
- [&](auto *accessor) -> void { // NOLINT(build/c++11)
- ordered_probe_accessor.reset(
- accessor->createSharedOrderedTupleIdSequenceAdapter(probe_tids));
- });
- }
// We also need a temp value accessor to store results of any scalar expressions.
ColumnVectorsValueAccessor temp_result;
+ if (!non_trivial_expressions.empty()) {
+ // The getAllValuesForJoin function below needs joined tuple IDs as a
+ // vector of pair of (build-tuple-ID, probe-tuple-ID), and we have a pair
+ // of (build-tuple-IDs-vector, probe-tuple-IDs-vector). So we'll have to
+ // zip our two vectors together.
+ VectorOfTupleIdPair zipped_joined_tuple_ids;
+ for (std::size_t i = 0; i < build_tids.size(); ++i) {
+ zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]);
+ }
- // Create a map of ValueAccessors and what attributes we want to pick from them
- std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map(
- kNumValueAccessors, std::make_pair(nullptr, // A late binding ValueAccessor.
- vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
-
- accessor_attribute_map[kBuildValueAccessorIndex].first = ordered_build_accessor.get();
- accessor_attribute_map[kProbeValueAccessorIndex].first = ordered_probe_accessor.get();
- accessor_attribute_map[kTempResultValueAccessorIndex].first = &temp_result;
-
- attribute_id dest_attr = 0;
- for (auto &selection_cit : selection_) {
- // If the Scalar (column) is not an attribute in build/probe blocks, then
- // insert it into a ColumnVectorsValueAccessor.
- if (selection_cit->getDataSource() != Scalar::ScalarDataSource::kAttribute) {
- // Current destination attribute maps to the column we'll create now.
- accessor_attribute_map[kTempResultValueAccessorIndex].second[dest_attr] = temp_result.getNumColumns();
-
- std::vector<std::pair<tuple_id, tuple_id>> zipped_joined_tuple_ids;
- if (temp_result.getNumColumns() == 0) {
- // The getAllValuesForJoin function below needs joined tuple IDs as
- // a vector of pair of (build-tuple-ID, probe-tuple-ID), and we have
- // a pair of (build-tuple-IDs-vector, probe-tuple-IDs-vector). So
- // we'll have to zip our two vectors together. We do this inside
- // the loop because most queries don't exercise this code since
- // they don't have scalar expressions with attributes from both
- // build and probe relations (other expressions would have been
- // pushed down to before the join).
- for (std::size_t i = 0; i < build_tids.size(); ++i) {
- zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]);
- }
- }
- temp_result.addColumn(
- selection_cit
- ->getAllValuesForJoin(build_relation_id, build_accessor.get(),
- probe_relation_id, probe_accessor.get(),
- zipped_joined_tuple_ids));
- } else {
- const CatalogAttribute &attr = static_cast<const ScalarAttribute *>(selection_cit.get())->getAttribute();
- const attribute_id attr_id = attr.getID();
- if (attr.getParent().getID() == build_relation_id) {
- accessor_attribute_map[kBuildValueAccessorIndex].second[dest_attr] = attr_id;
- } else {
- accessor_attribute_map[kProbeValueAccessorIndex].second[dest_attr] = attr_id;
- }
+ for (const Scalar *scalar : non_trivial_expressions) {
+ temp_result.addColumn(scalar->getAllValuesForJoin(build_relation_id,
+ build_accessor.get(),
+ probe_relation_id,
+ probe_accessor,
+ zipped_joined_tuple_ids));
}
- ++dest_attr;
}
+ // We now create ordered value accessors for both build and probe side,
+ // using the joined tuple IDs.
+ std::unique_ptr<ValueAccessor> ordered_build_accessor(
+ build_accessor->createSharedOrderedTupleIdSequenceAdapterVirtual(build_tids));
+ std::unique_ptr<ValueAccessor> ordered_probe_accessor(
+ probe_accessor->createSharedOrderedTupleIdSequenceAdapterVirtual(probe_tids));
+
+ accessor_attribute_map[kBuildIndex].first = ordered_build_accessor.get();
+ accessor_attribute_map[kProbeIndex].first = ordered_probe_accessor.get();
+ accessor_attribute_map[kTempIndex].first = &temp_result;
+
// NOTE(chasseur): calling the bulk-insert method of InsertDestination once
// for each pair of joined blocks incurs some extra overhead that could be
// avoided by keeping checked-out MutableBlockReferences across iterations
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index acfe3d2..5e9c5d8 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -423,6 +423,10 @@ class HashInnerJoinWorkOrder : public WorkOrder {
}
private:
+ void executeWithoutCopyElision(ValueAccessor *probe_accesor);
+
+ void executeWithCopyElision(ValueAccessor *probe_accessor);
+
const CatalogRelationSchema &build_relation_;
const CatalogRelationSchema &probe_relation_;
const std::vector<attribute_id> join_key_attributes_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/storage/SplitRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/SplitRowStoreValueAccessor.hpp b/storage/SplitRowStoreValueAccessor.hpp
index 951a20a..46367b3 100644
--- a/storage/SplitRowStoreValueAccessor.hpp
+++ b/storage/SplitRowStoreValueAccessor.hpp
@@ -318,6 +318,11 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
return createSharedTupleIdSequenceAdapter(id_sequence);
}
+ ValueAccessor* createSharedOrderedTupleIdSequenceAdapterVirtual(
+ const OrderedTupleIdSequence &id_sequence) override {
+ return createSharedOrderedTupleIdSequenceAdapter(id_sequence);
+ }
+
const TupleIdSequence* getTupleIdSequenceVirtual() const override {
return getTupleIdSequence();
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/storage/ValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/ValueAccessor.hpp b/storage/ValueAccessor.hpp
index 654bbf9..f183efe 100644
--- a/storage/ValueAccessor.hpp
+++ b/storage/ValueAccessor.hpp
@@ -305,6 +305,21 @@ class ValueAccessor {
const TupleIdSequence &id_sequence) = 0;
/**
+ * @brief Create a new OrderedTupleIdSequenceAdapterValueAccessor that wraps
+ * this ValueAccessor.
+ * @warning The newly-created adapter does NOT take ownership of this
+ * ValueAccessor nor the provided OrderedTupleIdSequence. Both must
+ * remain valid so long as the adapter will be used.
+ *
+ * @param id_sequence An OrderedTupleIdSequence specifying some subset of the
+ * tuples for this ValueAccessor that the adapter will iterate over.
+ * @return A new OrderedTupleIdSequenceAdapterValueAccessor that will iterate
+ * over only the tuples specified in id_sequence.
+ **/
+ virtual ValueAccessor* createSharedOrderedTupleIdSequenceAdapterVirtual(
+ const OrderedTupleIdSequence &id_sequence) = 0;
+
+ /**
* @brief Get a TupleIdSequence indicating which positions this ValueAccessor
* is iterating over.
*
@@ -512,6 +527,11 @@ class TupleIdSequenceAdapterValueAccessor : public ValueAccessor {
return createSharedTupleIdSequenceAdapter(id_sequence);
}
+ ValueAccessor* createSharedOrderedTupleIdSequenceAdapterVirtual(
+ const OrderedTupleIdSequence &id_sequence) override {
+ return createSharedOrderedTupleIdSequenceAdapter(id_sequence);
+ }
+
const TupleIdSequence* getTupleIdSequenceVirtual() const override {
return getTupleIdSequence();
}
@@ -718,6 +738,11 @@ class OrderedTupleIdSequenceAdapterValueAccessor : public ValueAccessor {
return createSharedTupleIdSequenceAdapter(id_sequence);
}
+ ValueAccessor* createSharedOrderedTupleIdSequenceAdapterVirtual(
+ const OrderedTupleIdSequence &id_sequence) override {
+ return createSharedOrderedTupleIdSequenceAdapter(id_sequence);
+ }
+
const TupleIdSequence* getTupleIdSequenceVirtual() const override {
return getTupleIdSequence();
}
@@ -944,6 +969,11 @@ class PackedTupleStorageSubBlockValueAccessor : public ValueAccessor {
return createSharedTupleIdSequenceAdapter(id_sequence);
}
+ ValueAccessor* createSharedOrderedTupleIdSequenceAdapterVirtual(
+ const OrderedTupleIdSequence &id_sequence) override {
+ return createSharedOrderedTupleIdSequenceAdapter(id_sequence);
+ }
+
const TupleIdSequence* getTupleIdSequenceVirtual() const override {
return getTupleIdSequence();
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/types/containers/ColumnVectorsValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVectorsValueAccessor.hpp b/types/containers/ColumnVectorsValueAccessor.hpp
index fbbdc1b..6dc1124 100644
--- a/types/containers/ColumnVectorsValueAccessor.hpp
+++ b/types/containers/ColumnVectorsValueAccessor.hpp
@@ -290,6 +290,11 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
return createSharedTupleIdSequenceAdapter(id_sequence);
}
+ ValueAccessor* createSharedOrderedTupleIdSequenceAdapterVirtual(
+ const OrderedTupleIdSequence &id_sequence) override {
+ return createSharedOrderedTupleIdSequenceAdapter(id_sequence);
+ }
+
const TupleIdSequence* getTupleIdSequenceVirtual() const override {
return getTupleIdSequence();
}
[08/10] incubator-quickstep git commit: Add BitVectorExactFilter as a
LIP filter and supports Join-to-Semijoin transformation.
Posted by ji...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/rules/InjectJoinFilters.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/InjectJoinFilters.cpp b/query_optimizer/rules/InjectJoinFilters.cpp
new file mode 100644
index 0000000..0fcd06b
--- /dev/null
+++ b/query_optimizer/rules/InjectJoinFilters.cpp
@@ -0,0 +1,438 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/InjectJoinFilters.hpp"
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/rules/PruneColumns.hpp"
+#include "types/TypeID.hpp"
+#include "types/TypedValue.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr InjectJoinFilters::apply(const P::PhysicalPtr &input) {
+ DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+
+ const P::TopLevelPlanPtr top_level_plan =
+ std::static_pointer_cast<const P::TopLevelPlan>(input);
+ cost_model_.reset(
+ new cost::StarSchemaSimpleCostModel(
+ top_level_plan->shared_subplans()));
+ lip_filter_configuration_.reset(new P::LIPFilterConfiguration());
+
+ // Step 1. Transform applicable HashJoin nodes to FilterJoin nodes.
+ P::PhysicalPtr output = transformHashJoinToFilters(input);
+
+ // Step 2. Push down FilterJoin nodes to be evaluated early.
+ output = pushDownFilters(output);
+
+ // Step 3. Add Selection nodes for attaching the LIPFilters, if necessary.
+ output = addFilterAnchors(output, false);
+
+ // Step 4. Because of the pushdown of FilterJoin nodes, there are optimization
+ // opportunities for projecting columns early.
+ output = PruneColumns().apply(output);
+
+ // Step 5. For each FilterJoin node, attach its corresponding LIPFilter to
+ // proper nodes.
+ concretizeAsLIPFilters(output, nullptr);
+
+ if (!lip_filter_configuration_->getBuildInfoMap().empty() ||
+ !lip_filter_configuration_->getProbeInfoMap().empty()) {
+ output = std::static_pointer_cast<const P::TopLevelPlan>(output)
+ ->copyWithLIPFilterConfiguration(
+ P::LIPFilterConfigurationPtr(lip_filter_configuration_.release()));
+ }
+
+ return output;
+}
+
+bool InjectJoinFilters::isTransformable(
+ const physical::HashJoinPtr &hash_join) const {
+ // Conditions for replacing a HashJoin with a FilterJoin:
+
+ // No residual predicate.
+ if (hash_join->residual_predicate() != nullptr) {
+ return false;
+ }
+ // Single attribute equi-join.
+ if (hash_join->right_join_attributes().size() > 1) {
+ return false;
+ }
+ // All the output attributes must be from the probe side.
+ if (!E::SubsetOfExpressions(hash_join->getOutputAttributes(),
+ hash_join->left()->getOutputAttributes())) {
+ return false;
+ }
+ switch (hash_join->join_type()) {
+ case P::HashJoin::JoinType::kInnerJoin: {
+ // In the case of inner join, the build side join attributes must be unique.
+ if (!cost_model_->impliesUniqueAttributes(hash_join->right(),
+ hash_join->right_join_attributes())) {
+ return false;
+ }
+ break;
+ }
+ case P::HashJoin::JoinType::kLeftSemiJoin: // Fall through
+ case P::HashJoin::JoinType::kLeftAntiJoin:
+ break;
+ default:
+ return false;
+ }
+
+ // The build side join attribute has integer type and its values are exactly
+ // within a reasonable range.
+ std::int64_t min_cpp_value;
+ std::int64_t max_cpp_value;
+ const bool has_exact_min_max_stats =
+ findExactMinMaxValuesForAttributeHelper(hash_join->right(),
+ hash_join->right_join_attributes().front(),
+ &min_cpp_value,
+ &max_cpp_value);
+ if (!has_exact_min_max_stats) {
+ return false;
+ }
+
+ // TODO(jianqiao): implement SimpleHashSetExactFilter to relax this requirement.
+ // Note that 1G bits = 128MB.
+ const std::int64_t value_range = max_cpp_value - min_cpp_value;
+ DCHECK_GE(value_range, 0);
+ if (value_range > kMaxFilterSize) {
+ return false;
+ }
+
+ return true;
+}
+
+P::PhysicalPtr InjectJoinFilters::transformHashJoinToFilters(
+ const P::PhysicalPtr &input) const {
+ std::vector<P::PhysicalPtr> new_children;
+ bool has_changed_children = false;
+ for (const P::PhysicalPtr &child : input->children()) {
+ const P::PhysicalPtr new_child = transformHashJoinToFilters(child);
+ if (child != new_child && !has_changed_children) {
+ has_changed_children = true;
+ }
+ new_children.push_back(new_child);
+ }
+
+ P::HashJoinPtr hash_join;
+ if (P::SomeHashJoin::MatchesWithConditionalCast(input, &hash_join) &&
+ isTransformable(hash_join)) {
+ const bool is_anti_join =
+ hash_join->join_type() == P::HashJoin::JoinType::kLeftAntiJoin;
+
+ DCHECK_EQ(2u, new_children.size());
+ P::PhysicalPtr build_child = new_children[1];
+ E::PredicatePtr build_side_filter_predicate = nullptr;
+ P::SelectionPtr selection;
+ if (P::SomeSelection::MatchesWithConditionalCast(build_child, &selection) &&
+ E::SubsetOfExpressions(hash_join->right_join_attributes(),
+ selection->input()->getOutputAttributes())) {
+ build_child = selection->input();
+ build_side_filter_predicate = selection->filter_predicate();
+ }
+
+ return P::FilterJoin::Create(new_children[0],
+ build_child,
+ hash_join->left_join_attributes(),
+ hash_join->right_join_attributes(),
+ hash_join->project_expressions(),
+ build_side_filter_predicate,
+ is_anti_join);
+ }
+
+ if (has_changed_children) {
+ return input->copyWithNewChildren(new_children);
+ } else {
+ return input;
+ }
+}
+
+physical::PhysicalPtr InjectJoinFilters::pushDownFilters(
+ const physical::PhysicalPtr &input) const {
+ std::vector<P::PhysicalPtr> new_children;
+ bool has_changed_children = false;
+ for (const P::PhysicalPtr &child : input->children()) {
+ const P::PhysicalPtr new_child = pushDownFilters(child);
+ if (child != new_child && !has_changed_children) {
+ has_changed_children = true;
+ }
+ new_children.push_back(new_child);
+ }
+
+ P::FilterJoinPtr filter_join;
+ if (P::SomeFilterJoin::MatchesWithConditionalCast(input, &filter_join)) {
+ DCHECK_EQ(2u, new_children.size());
+ return pushDownFiltersInternal(
+ new_children[0], new_children[1], filter_join);
+ }
+
+ if (has_changed_children) {
+ return input->copyWithNewChildren(new_children);
+ } else {
+ return input;
+ }
+}
+
+physical::PhysicalPtr InjectJoinFilters::pushDownFiltersInternal(
+ const physical::PhysicalPtr &probe_child,
+ const physical::PhysicalPtr &build_child,
+ const physical::FilterJoinPtr &filter_join) const {
+ switch (probe_child->getPhysicalType()) {
+ case P::PhysicalType::kAggregate: // Fall through
+ case P::PhysicalType::kHashJoin:
+ case P::PhysicalType::kSample:
+ case P::PhysicalType::kSelection:
+ case P::PhysicalType::kSort:
+ case P::PhysicalType::kWindowAggregate: {
+ DCHECK_GE(probe_child->getNumChildren(), 1u);
+ const P::PhysicalPtr child = probe_child->children()[0];
+ if (E::SubsetOfExpressions(filter_join->probe_attributes(),
+ child->getOutputAttributes())) {
+ const P::PhysicalPtr new_child =
+ pushDownFiltersInternal(child, build_child, filter_join);
+ if (new_child != child) {
+ std::vector<P::PhysicalPtr> new_children = probe_child->children();
+ new_children[0] = new_child;
+ return probe_child->copyWithNewChildren(new_children);
+ }
+ }
+ }
+ default:
+ break;
+ }
+
+ if (probe_child != filter_join->left()) {
+ // TODO(jianqiao): may need to update probe_attributes.
+ return P::FilterJoin::Create(probe_child,
+ build_child,
+ filter_join->probe_attributes(),
+ filter_join->build_attributes(),
+ E::ToNamedExpressions(probe_child->getOutputAttributes()),
+ filter_join->build_side_filter_predicate(),
+ filter_join->is_anti_join());
+ } else {
+ return filter_join;
+ }
+}
+
+
+physical::PhysicalPtr InjectJoinFilters::addFilterAnchors(
+ const physical::PhysicalPtr &input,
+ const bool ancestor_can_anchor_filter) const {
+ std::vector<P::PhysicalPtr> new_children;
+
+ switch (input->getPhysicalType()) {
+ case P::PhysicalType::kAggregate: {
+ const P::AggregatePtr &aggregate =
+ std::static_pointer_cast<const P::Aggregate>(input);
+ new_children.emplace_back(
+ addFilterAnchors(aggregate->input(), true));
+ break;
+ }
+ case P::PhysicalType::kSelection: {
+ const P::SelectionPtr &selection =
+ std::static_pointer_cast<const P::Selection>(input);
+ new_children.emplace_back(
+ addFilterAnchors(selection->input(), true));
+ break;
+ }
+ // NOTE(jianqiao): Some of the SSB/TPCH queries slow down significantly if
+ // we attach converted filters to parent HashJoin nodes. E.g. one HashJoin +
+ // one attached LIPFilter is slower than the original two HashJoins. This is
+ // due to some implementation issues with the current HashJoinOperator. So
+ // currently we disable the anchoring of filters to HashJoin nodes. That is,
+ // in the case that a FilterJoin's parent node (or ancestor node, if there
+ // is a chain of FilterJoins) is a HashJoin, we create an extra Selection
+ // before the parent HashJoin as anchoring node to attach the filters. This
+ // guarantees non-degrading performance.
+ /*
+ case P::PhysicalType::kHashJoin: {
+ const P::HashJoinPtr &hash_join =
+ std::static_pointer_cast<const P::HashJoin>(input);
+ new_children.emplace_back(
+ addFilterAnchors(hash_join->left(), true));
+ new_children.emplace_back(
+ addFilterAnchors(hash_join->right(), false));
+ break;
+ }
+ */
+ case P::PhysicalType::kFilterJoin: {
+ const P::FilterJoinPtr &filter_join =
+ std::static_pointer_cast<const P::FilterJoin>(input);
+ new_children.emplace_back(
+ addFilterAnchors(filter_join->left(), true));
+ new_children.emplace_back(
+ addFilterAnchors(filter_join->right(), true));
+ break;
+ }
+ default: {
+ for (const P::PhysicalPtr &child : input->children()) {
+ new_children.emplace_back(addFilterAnchors(child, false));
+ }
+ }
+ }
+
+ DCHECK_EQ(new_children.size(), input->children().size());
+ const P::PhysicalPtr output_with_new_children =
+ new_children == input->children()
+ ? input
+ : input->copyWithNewChildren(new_children);
+
+ if (input->getPhysicalType() == P::PhysicalType::kFilterJoin &&
+ !ancestor_can_anchor_filter) {
+ const P::FilterJoinPtr &filter_join =
+ std::static_pointer_cast<const P::FilterJoin>(output_with_new_children);
+ return P::Selection::Create(filter_join,
+ filter_join->project_expressions(),
+ nullptr);
+ } else {
+ return output_with_new_children;
+ }
+}
+
+void InjectJoinFilters::concretizeAsLIPFilters(
+ const P::PhysicalPtr &input,
+ const P::PhysicalPtr &anchor_node) const {
+ switch (input->getPhysicalType()) {
+ case P::PhysicalType::kAggregate: {
+ const P::AggregatePtr &aggregate =
+ std::static_pointer_cast<const P::Aggregate>(input);
+ concretizeAsLIPFilters(aggregate->input(), aggregate);
+ break;
+ }
+ case P::PhysicalType::kSelection: {
+ const P::SelectionPtr &selection =
+ std::static_pointer_cast<const P::Selection>(input);
+ concretizeAsLIPFilters(selection->input(), selection);
+ break;
+ }
+ // Currently we disable the attachment of filters to HashJoin nodes. See the
+ // comments in InjectJoinFilters::addFilterAnchors().
+ /*
+ case P::PhysicalType::kHashJoin: {
+ const P::HashJoinPtr &hash_join =
+ std::static_pointer_cast<const P::HashJoin>(input);
+ concretizeAsLIPFilters(hash_join->left(), hash_join);
+ concretizeAsLIPFilters(hash_join->right(), nullptr);
+ break;
+ }
+ */
+ case P::PhysicalType::kFilterJoin: {
+ const P::FilterJoinPtr &filter_join =
+ std::static_pointer_cast<const P::FilterJoin>(input);
+ DCHECK_EQ(1u, filter_join->build_attributes().size());
+ const E::AttributeReferencePtr &build_attr =
+ filter_join->build_attributes().front();
+
+ std::int64_t min_cpp_value;
+ std::int64_t max_cpp_value;
+ const bool has_exact_min_max_stats =
+ findExactMinMaxValuesForAttributeHelper(filter_join,
+ build_attr,
+ &min_cpp_value,
+ &max_cpp_value);
+ DCHECK(has_exact_min_max_stats);
+ DCHECK_GE(max_cpp_value, min_cpp_value);
+ DCHECK_LE(max_cpp_value - min_cpp_value, kMaxFilterSize);
+ CHECK(anchor_node != nullptr);
+
+ lip_filter_configuration_->addBuildInfo(
+ P::BitVectorExactFilterBuildInfo::Create(build_attr,
+ min_cpp_value,
+ max_cpp_value,
+ filter_join->is_anti_join()),
+ filter_join);
+ lip_filter_configuration_->addProbeInfo(
+ P::LIPFilterProbeInfo::Create(filter_join->probe_attributes().front(),
+ build_attr,
+ filter_join),
+ anchor_node);
+
+ concretizeAsLIPFilters(filter_join->left(), anchor_node);
+ concretizeAsLIPFilters(filter_join->right(), filter_join);
+ break;
+ }
+ default: {
+ for (const P::PhysicalPtr &child : input->children()) {
+ concretizeAsLIPFilters(child, nullptr);
+ }
+ }
+ }
+}
+
+bool InjectJoinFilters::findExactMinMaxValuesForAttributeHelper(
+ const physical::PhysicalPtr &physical_plan,
+ const expressions::AttributeReferencePtr &attribute,
+ std::int64_t *min_cpp_value,
+ std::int64_t *max_cpp_value) const {
+ bool min_value_is_exact;
+ bool max_value_is_exact;
+
+ const TypedValue min_value =
+ cost_model_->findMinValueStat(physical_plan, attribute, &min_value_is_exact);
+ const TypedValue max_value =
+ cost_model_->findMaxValueStat(physical_plan, attribute, &max_value_is_exact);
+ if (min_value.isNull() || max_value.isNull() ||
+ (!min_value_is_exact) || (!max_value_is_exact)) {
+ return false;
+ }
+
+ switch (attribute->getValueType().getTypeID()) {
+ case TypeID::kInt: {
+ *min_cpp_value = min_value.getLiteral<int>();
+ *max_cpp_value = max_value.getLiteral<int>();
+ return true;
+ }
+ case TypeID::kLong: {
+ *min_cpp_value = min_value.getLiteral<std::int64_t>();
+ *max_cpp_value = max_value.getLiteral<std::int64_t>();
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/rules/InjectJoinFilters.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/InjectJoinFilters.hpp b/query_optimizer/rules/InjectJoinFilters.hpp
new file mode 100644
index 0000000..c5250b3
--- /dev/null
+++ b/query_optimizer/rules/InjectJoinFilters.hpp
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_INJECT_JOIN_FILTERS_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_INJECT_JOIN_FILTERS_HPP_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+/**
+ * @brief Rule that applies to a physical plan to transform HashJoin nodes into
+ * FilterJoin nodes.
+ *
+ * This is an optimization that strength-reduces HashJoins to FilterJoins
+ * (implemented as LIPFilters attached to some anchoring operators where the
+ * filters get applied). Briefly speaking, the idea is that in the case that
+ * (1) the join attribute has consecutive integer values bounded in a reasonably
+ * small range AND (2) the output attributes are all from the probe-side table,
+ * we can eliminate the HashJoin by building a BitVector on the build-side
+ * attribute and using the BitVector to filter the probe-side table.
+ */
+class InjectJoinFilters : public Rule<physical::Physical> {
+ public:
+ /**
+ * @brief Constructor.
+ */
+ InjectJoinFilters() {}
+
+ ~InjectJoinFilters() override {}
+
+ std::string getName() const override {
+ return "TransformFilterJoins";
+ }
+
+ physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+ // Check whether a HashJoin can be transformed into a FilterJoin.
+ bool isTransformable(const physical::HashJoinPtr &hash_join) const;
+
+ // Transform applicable HashJoin nodes into FilterJoin nodes.
+ physical::PhysicalPtr transformHashJoinToFilters(
+ const physical::PhysicalPtr &input) const;
+
+ // Push down FilterJoin nodes to be evaluated early.
+ physical::PhysicalPtr pushDownFilters(const physical::PhysicalPtr &input) const;
+
+ // Add Selection node, if necessary, for anchoring the LIP filters built by
+ // FilterJoin nodes.
+ physical::PhysicalPtr addFilterAnchors(const physical::PhysicalPtr &input,
+ const bool ancestor_can_anchor_filter) const;
+
+ // Setup lip_filter_configuration_ with the transformed plan tree.
+ void concretizeAsLIPFilters(const physical::PhysicalPtr &input,
+ const physical::PhysicalPtr &anchor_node) const;
+
+ physical::PhysicalPtr pushDownFiltersInternal(
+ const physical::PhysicalPtr &probe_child,
+ const physical::PhysicalPtr &build_child,
+ const physical::FilterJoinPtr &filter_join) const;
+
+ bool findExactMinMaxValuesForAttributeHelper(
+ const physical::PhysicalPtr &physical_plan,
+ const expressions::AttributeReferencePtr &attribute,
+ std::int64_t *min_cpp_value,
+ std::int64_t *max_cpp_value) const;
+
+ std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+ std::unique_ptr<physical::LIPFilterConfiguration> lip_filter_configuration_;
+
+ // TODO(jianqiao): Add this threshold as a gflag.
+ // Note that 1G bits = 128MB
+ static constexpr std::int64_t kMaxFilterSize = 1000000000L;
+
+ DISALLOW_COPY_AND_ASSIGN(InjectJoinFilters);
+};
+
+/** @} */
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_INJECT_JOIN_FILTERS_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/tests/OptimizerTextTest.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/OptimizerTextTest.cpp b/query_optimizer/tests/OptimizerTextTest.cpp
index e17f5c4..07accf6 100644
--- a/query_optimizer/tests/OptimizerTextTest.cpp
+++ b/query_optimizer/tests/OptimizerTextTest.cpp
@@ -34,6 +34,7 @@ namespace optimizer {
DECLARE_bool(reorder_columns);
DECLARE_bool(reorder_hash_joins);
DECLARE_bool(use_lip_filters);
+DECLARE_bool(use_filter_joins);
}
}
@@ -64,6 +65,7 @@ int main(int argc, char** argv) {
quickstep::optimizer::FLAGS_reorder_columns = false;
quickstep::optimizer::FLAGS_reorder_hash_joins = false;
quickstep::optimizer::FLAGS_use_lip_filters = false;
+ quickstep::optimizer::FLAGS_use_filter_joins = false;
::testing::InitGoogleTest(&argc, argv);
int success = RUN_ALL_TESTS();
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/relational_operators/BuildLIPFilterOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/BuildLIPFilterOperator.cpp b/relational_operators/BuildLIPFilterOperator.cpp
new file mode 100644
index 0000000..f7c09cd
--- /dev/null
+++ b/relational_operators/BuildLIPFilterOperator.cpp
@@ -0,0 +1,154 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "relational_operators/BuildLIPFilterOperator.hpp"
+
+#include <memory>
+#include <vector>
+
+#include "catalog/CatalogRelation.hpp"
+#include "query_execution/QueryContext.hpp"
+#include "query_execution/WorkOrderProtosContainer.hpp"
+#include "query_execution/WorkOrdersContainer.hpp"
+#include "relational_operators/WorkOrder.pb.h"
+#include "storage/StorageBlock.hpp"
+#include "storage/StorageBlockInfo.hpp"
+#include "storage/StorageManager.hpp"
+#include "storage/TupleIdSequence.hpp"
+#include "storage/TupleStorageSubBlock.hpp"
+#include "storage/ValueAccessor.hpp"
+#include "utility/lip_filter/LIPFilterAdaptiveProber.hpp"
+#include "utility/lip_filter/LIPFilterBuilder.hpp"
+#include "utility/lip_filter/LIPFilterUtil.hpp"
+
+#include "glog/logging.h"
+
+#include "tmb/id_typedefs.h"
+
+namespace quickstep {
+
+bool BuildLIPFilterOperator::getAllWorkOrders(
+ WorkOrdersContainer *container,
+ QueryContext *query_context,
+ StorageManager *storage_manager,
+ const tmb::client_id scheduler_client_id,
+ tmb::MessageBus *bus) {
+ DCHECK(query_context != nullptr);
+
+ const Predicate *build_side_predicate =
+ query_context->getPredicate(build_side_predicate_index_);
+
+ if (input_relation_is_stored_) {
+ if (!started_) {
+ for (const block_id input_block_id : input_relation_block_ids_) {
+ container->addNormalWorkOrder(
+ new BuildLIPFilterWorkOrder(
+ query_id_,
+ input_relation_,
+ input_block_id,
+ build_side_predicate,
+ storage_manager,
+ CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context),
+ CreateLIPFilterBuilderHelper(lip_deployment_index_, query_context)),
+ op_index_);
+ }
+ started_ = true;
+ }
+ return true;
+ } else {
+ while (num_workorders_generated_ < input_relation_block_ids_.size()) {
+ container->addNormalWorkOrder(
+ new BuildLIPFilterWorkOrder(
+ query_id_,
+ input_relation_,
+ input_relation_block_ids_[num_workorders_generated_],
+ build_side_predicate,
+ storage_manager,
+ CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context),
+ CreateLIPFilterBuilderHelper(lip_deployment_index_, query_context)),
+ op_index_);
+ ++num_workorders_generated_;
+ }
+ return done_feeding_input_relation_;
+ }
+}
+
+bool BuildLIPFilterOperator::getAllWorkOrderProtos(WorkOrderProtosContainer *container) {
+ if (input_relation_is_stored_) {
+ if (!started_) {
+ for (const block_id block : input_relation_block_ids_) {
+ container->addWorkOrderProto(createWorkOrderProto(block), op_index_);
+ }
+ started_ = true;
+ }
+ return true;
+ } else {
+ while (num_workorders_generated_ < input_relation_block_ids_.size()) {
+ container->addWorkOrderProto(
+ createWorkOrderProto(input_relation_block_ids_[num_workorders_generated_]),
+ op_index_);
+ ++num_workorders_generated_;
+ }
+ return done_feeding_input_relation_;
+ }
+}
+
+serialization::WorkOrder* BuildLIPFilterOperator::createWorkOrderProto(const block_id block) {
+ serialization::WorkOrder *proto = new serialization::WorkOrder;
+ proto->set_work_order_type(serialization::BUILD_LIP_FILTER);
+ proto->set_query_id(query_id_);
+
+ proto->SetExtension(serialization::BuildLIPFilterWorkOrder::relation_id, input_relation_.getID());
+ proto->SetExtension(serialization::BuildLIPFilterWorkOrder::build_block_id, block);
+ proto->SetExtension(serialization::BuildLIPFilterWorkOrder::build_side_predicate_index,
+ build_side_predicate_index_);
+ proto->SetExtension(serialization::BuildLIPFilterWorkOrder::lip_deployment_index, lip_deployment_index_);
+
+ return proto;
+}
+
+void BuildLIPFilterWorkOrder::execute() {
+ BlockReference block(
+ storage_manager_->getBlock(build_block_id_, input_relation_));
+
+ // Apply the predicate first.
+ std::unique_ptr<TupleIdSequence> predicate_matches;
+ if (build_side_predicate_ != nullptr) {
+ predicate_matches.reset(block->getMatchesForPredicate(build_side_predicate_));
+ }
+
+ std::unique_ptr<ValueAccessor> accessor(
+ block->getTupleStorageSubBlock().createValueAccessor(predicate_matches.get()));
+
+ if (lip_filter_adaptive_prober_ != nullptr) {
+ // Probe the LIP filters if there are any. Note that the LIP filters to be
+ // probed are for filtering the input relation. They are distinct from the
+ // target LIP filters we are building.
+ std::unique_ptr<TupleIdSequence> matches(
+ lip_filter_adaptive_prober_->filterValueAccessor(accessor.get()));
+ std::unique_ptr<ValueAccessor> filtered_accessor(
+ accessor->createSharedTupleIdSequenceAdapterVirtual(*matches));
+
+ lip_filter_builder_->insertValueAccessor(filtered_accessor.get());
+ } else {
+ lip_filter_builder_->insertValueAccessor(accessor.get());
+ }
+}
+
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/relational_operators/BuildLIPFilterOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/BuildLIPFilterOperator.hpp b/relational_operators/BuildLIPFilterOperator.hpp
new file mode 100644
index 0000000..5192b40
--- /dev/null
+++ b/relational_operators/BuildLIPFilterOperator.hpp
@@ -0,0 +1,200 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_RELATIONAL_OPERATORS_BUILD_LIP_FILTER_OPERATOR_HPP_
+#define QUICKSTEP_RELATIONAL_OPERATORS_BUILD_LIP_FILTER_OPERATOR_HPP_
+
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "catalog/CatalogRelation.hpp"
+#include "catalog/CatalogTypedefs.hpp"
+#include "query_execution/QueryContext.hpp"
+#include "relational_operators/RelationalOperator.hpp"
+#include "relational_operators/WorkOrder.hpp"
+#include "storage/StorageBlockInfo.hpp"
+#include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilterAdaptiveProber.hpp"
+#include "utility/lip_filter/LIPFilterBuilder.hpp"
+
+#include "glog/logging.h"
+
+#include "tmb/id_typedefs.h"
+
+namespace tmb { class MessageBus; }
+
+namespace quickstep {
+
+class CatalogRelationSchema;
+class Predicate;
+class StorageManager;
+class WorkOrderProtosContainer;
+class WorkOrdersContainer;
+
+namespace serialization { class WorkOrder; }
+
+/** \addtogroup RelationalOperators
+ * @{
+ */
+
+/**
+ * @brief An operator which builds LIPFilters on one relation.
+ **/
+class BuildLIPFilterOperator : public RelationalOperator {
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * @note The LIPFilters' information are not passed explicitly as parameters
+ * to this constructor, but attached later via RelationalOperator::deployLIPFilters().
+ *
+ * @param query_id The ID of the query to which this operator belongs.
+ * @param input_relation The relation to build LIP filters on.
+ * @param build_side_predicate_index The index of the predicate in QueryContext
+ * where the predicate is to be applied to the input relation before
+ * building the LIP filters (or kInvalidPredicateId if no predicate is
+ * to be applied).
+ * @param input_relation_is_stored If input_relation is a stored relation and
+ * is fully available to the operator before it can start generating
+ * workorders.
+ **/
+ BuildLIPFilterOperator(const std::size_t query_id,
+ const CatalogRelation &input_relation,
+ const QueryContext::predicate_id build_side_predicate_index,
+ const bool input_relation_is_stored)
+ : RelationalOperator(query_id),
+ input_relation_(input_relation),
+ build_side_predicate_index_(build_side_predicate_index),
+ input_relation_is_stored_(input_relation_is_stored),
+ input_relation_block_ids_(input_relation_is_stored ? input_relation.getBlocksSnapshot()
+ : std::vector<block_id>()),
+ num_workorders_generated_(0),
+ started_(false) {}
+
+ ~BuildLIPFilterOperator() override {}
+
+ /**
+ * @return The input relation to build LIP filters on.
+ */
+ const CatalogRelation& input_relation() const {
+ return input_relation_;
+ }
+
+ std::string getName() const override {
+ return "BuildLIPFilterOperator";
+ }
+
+ bool getAllWorkOrders(WorkOrdersContainer *container,
+ QueryContext *query_context,
+ StorageManager *storage_manager,
+ const tmb::client_id scheduler_client_id,
+ tmb::MessageBus *bus) override;
+
+ bool getAllWorkOrderProtos(WorkOrderProtosContainer *container) override;
+
+ void feedInputBlock(const block_id input_block_id,
+ const relation_id input_relation_id,
+ const partition_id part_id) override {
+ input_relation_block_ids_.push_back(input_block_id);
+ }
+
+ private:
+ /**
+ * @brief Create Work Order proto.
+ *
+ * @param block The block id used in the Work Order.
+ **/
+ serialization::WorkOrder* createWorkOrderProto(const block_id block);
+
+ const CatalogRelation &input_relation_;
+ const QueryContext::predicate_id build_side_predicate_index_;
+ const bool input_relation_is_stored_;
+
+ std::vector<block_id> input_relation_block_ids_;
+ std::vector<block_id>::size_type num_workorders_generated_;
+
+ bool started_;
+
+ DISALLOW_COPY_AND_ASSIGN(BuildLIPFilterOperator);
+};
+
+/**
+ * @brief A WorkOrder produced by BuildLIPFilterOperator.
+ **/
+class BuildLIPFilterWorkOrder : public WorkOrder {
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * @param query_id The ID of the query to which this WorkOrder belongs.
+ * @param input_relation The relation to build LIP filters on.
+ * @param build_block_id The block id.
+ * @param build_side_predicate The predicate to be applied to filter the input
+ * relation before building the LIP filters (or nullptr if no predicate
+ * is to be applied).
+ * @param storage_manager The StorageManager to use.
+ * @param lip_filter_adaptive_prober The attached LIP filter prober.
+ * @param lip_filter_builder The attached LIP filter builder.
+ **/
+ BuildLIPFilterWorkOrder(const std::size_t query_id,
+ const CatalogRelationSchema &input_relation,
+ const block_id build_block_id,
+ const Predicate *build_side_predicate,
+ StorageManager *storage_manager,
+ LIPFilterAdaptiveProber *lip_filter_adaptive_prober,
+ LIPFilterBuilder *lip_filter_builder)
+ : WorkOrder(query_id),
+ input_relation_(input_relation),
+ build_block_id_(build_block_id),
+ build_side_predicate_(build_side_predicate),
+ storage_manager_(DCHECK_NOTNULL(storage_manager)),
+ lip_filter_adaptive_prober_(lip_filter_adaptive_prober),
+ lip_filter_builder_(DCHECK_NOTNULL(lip_filter_builder)) {}
+
+ ~BuildLIPFilterWorkOrder() override {}
+
+ /**
+ * @return The input relation to build LIP filters on.
+ */
+ const CatalogRelationSchema& input_relation() const {
+ return input_relation_;
+ }
+
+ void execute() override;
+
+ private:
+ const CatalogRelationSchema &input_relation_;
+ const block_id build_block_id_;
+ const Predicate *build_side_predicate_;
+
+ StorageManager *storage_manager_;
+
+ std::unique_ptr<LIPFilterAdaptiveProber> lip_filter_adaptive_prober_;
+ std::unique_ptr<LIPFilterBuilder> lip_filter_builder_;
+
+ DISALLOW_COPY_AND_ASSIGN(BuildLIPFilterWorkOrder);
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_RELATIONAL_OPERATORS_BUILD_LIP_FILTER_OPERATOR_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index c8447f3..c18dc77 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -34,6 +34,7 @@ set_gflags_lib_name ()
# Declare micro-libs:
add_library(quickstep_relationaloperators_AggregationOperator AggregationOperator.cpp AggregationOperator.hpp)
add_library(quickstep_relationaloperators_BuildHashOperator BuildHashOperator.cpp BuildHashOperator.hpp)
+add_library(quickstep_relationaloperators_BuildLIPFilterOperator BuildLIPFilterOperator.cpp BuildLIPFilterOperator.hpp)
add_library(quickstep_relationaloperators_CreateIndexOperator CreateIndexOperator.cpp CreateIndexOperator.hpp)
add_library(quickstep_relationaloperators_CreateTableOperator CreateTableOperator.cpp CreateTableOperator.hpp)
add_library(quickstep_relationaloperators_DestroyAggregationStateOperator
@@ -113,6 +114,27 @@ target_link_libraries(quickstep_relationaloperators_BuildHashOperator
quickstep_utility_lipfilter_LIPFilterBuilder
quickstep_utility_lipfilter_LIPFilterUtil
tmb)
+target_link_libraries(quickstep_relationaloperators_BuildLIPFilterOperator
+ glog
+ quickstep_catalog_CatalogRelation
+ quickstep_catalog_CatalogTypedefs
+ quickstep_queryexecution_QueryContext
+ quickstep_queryexecution_WorkOrderProtosContainer
+ quickstep_queryexecution_WorkOrdersContainer
+ quickstep_relationaloperators_RelationalOperator
+ quickstep_relationaloperators_WorkOrder
+ quickstep_relationaloperators_WorkOrder_proto
+ quickstep_storage_StorageBlock
+ quickstep_storage_StorageBlockInfo
+ quickstep_storage_StorageManager
+ quickstep_storage_TupleIdSequence
+ quickstep_storage_TupleStorageSubBlock
+ quickstep_storage_ValueAccessor
+ quickstep_utility_Macros
+ quickstep_utility_lipfilter_LIPFilterAdaptiveProber
+ quickstep_utility_lipfilter_LIPFilterBuilder
+ quickstep_utility_lipfilter_LIPFilterUtil
+ tmb)
target_link_libraries(quickstep_relationaloperators_CreateIndexOperator
glog
quickstep_catalog_CatalogRelation
@@ -483,6 +505,7 @@ target_link_libraries(quickstep_relationaloperators_WorkOrderFactory
quickstep_queryexecution_QueryContext
quickstep_relationaloperators_AggregationOperator
quickstep_relationaloperators_BuildHashOperator
+ quickstep_relationaloperators_BuildLIPFilterOperator
quickstep_relationaloperators_DeleteOperator
quickstep_relationaloperators_DestroyAggregationStateOperator
quickstep_relationaloperators_DestroyHashOperator
@@ -515,6 +538,7 @@ target_link_libraries(quickstep_relationaloperators_WorkOrder_proto
add_library(quickstep_relationaloperators ../empty_src.cpp RelationalOperatorsModule.hpp)
target_link_libraries(quickstep_relationaloperators
quickstep_relationaloperators_AggregationOperator
+ quickstep_relationaloperators_BuildLIPFilterOperator
quickstep_relationaloperators_BuildHashOperator
quickstep_relationaloperators_CreateIndexOperator
quickstep_relationaloperators_CreateTableOperator
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/relational_operators/WorkOrder.proto
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrder.proto b/relational_operators/WorkOrder.proto
index f8d9246..76753d2 100644
--- a/relational_operators/WorkOrder.proto
+++ b/relational_operators/WorkOrder.proto
@@ -24,25 +24,26 @@ import "relational_operators/SortMergeRunOperator.proto";
enum WorkOrderType {
AGGREGATION = 1;
BUILD_HASH = 2;
- CREATE_INDEX = 3; // Placeholder.
- CREATE_TABLE = 4; // Placeholder.
- DELETE = 5;
- DESTROY_HASH = 6;
- DROP_TABLE = 7;
- FINALIZE_AGGREGATION = 8;
- HASH_JOIN = 9;
- INSERT = 10;
- NESTED_LOOP_JOIN = 11;
- SAMPLE = 12;
- SAVE_BLOCKS = 13;
- SELECT = 14;
- SORT_MERGE_RUN = 15;
- SORT_RUN_GENERATION = 16;
- TABLE_GENERATOR = 17;
- TEXT_SCAN = 18;
- UPDATE = 19;
- WINDOW_AGGREGATION = 20;
- DESTROY_AGGREGATION_STATE = 21;
+ BUILD_LIP_FILTER = 3;
+ CREATE_INDEX = 4; // Placeholder.
+ CREATE_TABLE = 5; // Placeholder.
+ DELETE = 6;
+ DESTROY_HASH = 7;
+ DROP_TABLE = 8;
+ FINALIZE_AGGREGATION = 9;
+ HASH_JOIN = 10;
+ INSERT = 11;
+ NESTED_LOOP_JOIN = 12;
+ SAMPLE = 13;
+ SAVE_BLOCKS = 14;
+ SELECT = 15;
+ SORT_MERGE_RUN = 16;
+ SORT_RUN_GENERATION = 17;
+ TABLE_GENERATOR = 18;
+ TEXT_SCAN = 19;
+ UPDATE = 20;
+ WINDOW_AGGREGATION = 21;
+ DESTROY_AGGREGATION_STATE = 22;
}
message WorkOrder {
@@ -77,6 +78,16 @@ message BuildHashWorkOrder {
}
}
+message BuildLIPFilterWorkOrder {
+ extend WorkOrder {
+ // All required.
+ optional int32 relation_id = 48;
+ optional fixed64 build_block_id = 49;
+ optional int32 build_side_predicate_index = 50;
+ optional int32 lip_deployment_index = 51;
+ }
+}
+
message DeleteWorkOrder {
extend WorkOrder {
// All required.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/relational_operators/WorkOrderFactory.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrderFactory.cpp b/relational_operators/WorkOrderFactory.cpp
index a6cba02..5e8d03d 100644
--- a/relational_operators/WorkOrderFactory.cpp
+++ b/relational_operators/WorkOrderFactory.cpp
@@ -30,6 +30,7 @@
#include "query_execution/QueryContext.hpp"
#include "relational_operators/AggregationOperator.hpp"
#include "relational_operators/BuildHashOperator.hpp"
+#include "relational_operators/BuildLIPFilterOperator.hpp"
#include "relational_operators/DeleteOperator.hpp"
#include "relational_operators/DestroyAggregationStateOperator.hpp"
#include "relational_operators/DestroyHashOperator.hpp"
@@ -90,6 +91,23 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder
CreateLIPFilterAdaptiveProberHelper(
proto.GetExtension(serialization::AggregationWorkOrder::lip_deployment_index), query_context));
}
+ case serialization::BUILD_LIP_FILTER: {
+ LOG(INFO) << "Creating BuildLIPFilterWorkOrder in Shiftboss " << shiftboss_index;
+
+ const QueryContext::lip_deployment_id lip_deployment_index =
+ proto.GetExtension(serialization::BuildLIPFilterWorkOrder::lip_deployment_index);
+
+ return new BuildLIPFilterWorkOrder(
+ proto.query_id(),
+ catalog_database->getRelationSchemaById(
+ proto.GetExtension(serialization::BuildLIPFilterWorkOrder::relation_id)),
+ proto.GetExtension(serialization::BuildLIPFilterWorkOrder::build_block_id),
+ query_context->getPredicate(
+ proto.GetExtension(serialization::BuildLIPFilterWorkOrder::build_side_predicate_index)),
+ storage_manager,
+ CreateLIPFilterAdaptiveProberHelper(lip_deployment_index, query_context),
+ CreateLIPFilterBuilderHelper(lip_deployment_index, query_context));
+ }
case serialization::BUILD_HASH: {
LOG(INFO) << "Creating BuildHashWorkOrder in Shiftboss " << shiftboss_index;
vector<attribute_id> join_key_attributes;
@@ -541,6 +559,33 @@ bool WorkOrderFactory::ProtoIsValid(const serialization::WorkOrder &proto,
proto.GetExtension(serialization::BuildHashWorkOrder::join_hash_table_index),
proto.GetExtension(serialization::BuildHashWorkOrder::partition_id));
}
+ case serialization::BUILD_LIP_FILTER: {
+ if (!proto.HasExtension(serialization::BuildLIPFilterWorkOrder::relation_id)) {
+ return false;
+ }
+
+ const relation_id rel_id =
+ proto.GetExtension(serialization::BuildLIPFilterWorkOrder::relation_id);
+ if (!catalog_database.hasRelationWithId(rel_id)) {
+ return false;
+ }
+
+ if (!proto.HasExtension(serialization::BuildLIPFilterWorkOrder::lip_deployment_index)) {
+ return false;
+ } else {
+ const QueryContext::lip_deployment_id lip_deployment_index =
+ proto.GetExtension(serialization::BuildLIPFilterWorkOrder::lip_deployment_index);
+ if (lip_deployment_index != QueryContext::kInvalidLIPDeploymentId &&
+ !query_context.isValidLIPDeploymentId(lip_deployment_index)) {
+ return false;
+ }
+ }
+
+ return proto.HasExtension(serialization::BuildLIPFilterWorkOrder::build_block_id) &&
+ proto.HasExtension(serialization::BuildLIPFilterWorkOrder::build_side_predicate_index) &&
+ query_context.isValidPredicate(
+ proto.GetExtension(serialization::BuildLIPFilterWorkOrder::build_side_predicate_index));
+ }
case serialization::DELETE: {
return proto.HasExtension(serialization::DeleteWorkOrder::relation_id) &&
catalog_database.hasRelationWithId(
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index 8571149..aeff388 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -270,6 +270,7 @@ target_link_libraries(quickstep_utility_PlanVisualizer
quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ExprId
+ quickstep_queryoptimizer_physical_FilterJoin
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_LIPFilterConfiguration
quickstep_queryoptimizer_physical_Physical
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/utility/PlanVisualizer.cpp
----------------------------------------------------------------------
diff --git a/utility/PlanVisualizer.cpp b/utility/PlanVisualizer.cpp
index df7a20c..f8bf6f8 100644
--- a/utility/PlanVisualizer.cpp
+++ b/utility/PlanVisualizer.cpp
@@ -32,6 +32,7 @@
#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/PhysicalType.hpp"
@@ -58,6 +59,8 @@ std::string PlanVisualizer::visualize(const P::PhysicalPtr &input) {
color_map_["TableReference"] = "skyblue";
color_map_["Selection"] = "#90EE90";
+ color_map_["FilterJoin"] = "pink";
+ color_map_["FilterJoin(Anti)"] = "pink";
color_map_["HashJoin"] = "red";
color_map_["HashLeftOuterJoin"] = "orange";
color_map_["HashLeftSemiJoin"] = "orange";
@@ -126,7 +129,8 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
edge_info.dst_node_id = node_id;
edge_info.dashed = false;
- if (input->getPhysicalType() == P::PhysicalType::kHashJoin &&
+ if ((input->getPhysicalType() == P::PhysicalType::kHashJoin ||
+ input->getPhysicalType() == P::PhysicalType::kFilterJoin) &&
child == input->children()[1]) {
edge_info.dashed = true;
}
@@ -165,6 +169,20 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
}
break;
}
+ case P::PhysicalType::kFilterJoin: {
+ const P::FilterJoinPtr filter_join =
+ std::static_pointer_cast<const P::FilterJoin>(input);
+ node_info.labels.emplace_back(input->getName());
+
+ const auto &probe_attributes = filter_join->probe_attributes();
+ const auto &build_attributes = filter_join->build_attributes();
+ for (std::size_t i = 0; i < probe_attributes.size(); ++i) {
+ node_info.labels.emplace_back(
+ probe_attributes[i]->attribute_alias() + " = " +
+ build_attributes[i]->attribute_alias());
+ }
+ break;
+ }
default: {
node_info.labels.emplace_back(input->getName());
break;
@@ -177,7 +195,7 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
if (build_it != build_filters.end()) {
for (const auto &build_info : build_it->second) {
node_info.labels.emplace_back(
- std::string("[LIP build] ") + build_info.build_attribute->attribute_alias());
+ std::string("[LIP build] ") + build_info->build_attribute()->attribute_alias());
}
}
const auto &probe_filters = lip_filter_conf_->getProbeInfoMap();
@@ -185,7 +203,7 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
if (probe_it != probe_filters.end()) {
for (const auto &probe_info : probe_it->second) {
node_info.labels.emplace_back(
- std::string("[LIP probe] ") + probe_info.probe_attribute->attribute_alias());
+ std::string("[LIP probe] ") + probe_info->probe_attribute()->attribute_alias());
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/utility/lip_filter/BitVectorExactFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/BitVectorExactFilter.hpp b/utility/lip_filter/BitVectorExactFilter.hpp
new file mode 100644
index 0000000..6ad0567
--- /dev/null
+++ b/utility/lip_filter/BitVectorExactFilter.hpp
@@ -0,0 +1,202 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_LIP_FILTER_BIT_VECTOR_EXACT_FILTER_HPP_
+#define QUICKSTEP_UTILITY_LIP_FILTER_BIT_VECTOR_EXACT_FILTER_HPP_
+
+#include <atomic>
+#include <cstdint>
+#include <cstring>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "storage/StorageBlockInfo.hpp"
+#include "storage/StorageConstants.hpp"
+#include "storage/ValueAccessor.hpp"
+#include "storage/ValueAccessorUtil.hpp"
+#include "types/Type.hpp"
+#include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ * @{
+ */
+
+/**
+ * @brief A LIP filter that tests the EXACT memberships of elements, i.e. there
+ * will be neither false positives nor false negatives. The implementation
+ * is to simply reinterpret_cast a value's byte stream into the specified
+ * CppType as the underlying bit vector's index. Therefore, to use this
+ * filter, the corresponding LIP attribute's values must be bounded in a
+ * reasonably small integer range.
+ */
+template <typename CppType, bool is_anti_filter>
+class BitVectorExactFilter : public LIPFilter {
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * @param min_value The minimum possible value for this filter to set.
+ * @param max_value The maximum possible value for this filter to set.
+ */
+ explicit BitVectorExactFilter(const std::int64_t min_value,
+ const std::int64_t max_value)
+ : LIPFilter(LIPFilterType::kBitVectorExactFilter),
+ min_value_(static_cast<CppType>(min_value)),
+ max_value_(static_cast<CppType>(max_value)),
+ bit_array_(GetByteSize(max_value - min_value + 1)) {
+ DCHECK_EQ(min_value, static_cast<std::int64_t>(min_value_));
+ DCHECK_EQ(max_value, static_cast<std::int64_t>(max_value_));
+ DCHECK_GE(max_value_, min_value_);
+
+ std::memset(bit_array_.data(),
+ 0x0,
+ sizeof(std::atomic<std::uint8_t>) * GetByteSize(max_value - min_value + 1));
+ }
+
+ void insertValueAccessor(ValueAccessor *accessor,
+ const attribute_id attr_id,
+ const Type *attr_type) override {
+ InvokeOnAnyValueAccessor(
+ accessor,
+ [&](auto *accessor) -> void { // NOLINT(build/c++11)
+ if (attr_type->isNullable()) {
+ this->insertValueAccessorInternal<true>(accessor, attr_id);
+ } else {
+ this->insertValueAccessorInternal<false>(accessor, attr_id);
+ }
+ });
+ }
+
+ std::size_t filterBatch(ValueAccessor *accessor,
+ const attribute_id attr_id,
+ const bool is_attr_nullable,
+ std::vector<tuple_id> *batch,
+ const std::size_t batch_size) const override {
+ DCHECK(batch != nullptr);
+ DCHECK_LE(batch_size, batch->size());
+
+ return InvokeOnAnyValueAccessor(
+ accessor,
+ [&](auto *accessor) -> std::size_t { // NOLINT(build/c++11)
+ if (is_attr_nullable) {
+ return this->filterBatchInternal<true>(accessor, attr_id, batch, batch_size);
+ } else {
+ return this->filterBatchInternal<false>(accessor, attr_id, batch, batch_size);
+ }
+ });
+ }
+
+ private:
+ /**
+ * @brief Round up bit_size to multiples of 8.
+ */
+ inline static std::size_t GetByteSize(const std::size_t bit_size) {
+ return (bit_size + 7u) / 8u;
+ }
+
+ /**
+ * @brief Iterate through the accessor and hash values into the internal bit
+ * array.
+ */
+ template <bool is_attr_nullable, typename ValueAccessorT>
+ inline void insertValueAccessorInternal(ValueAccessorT *accessor,
+ const attribute_id attr_id) {
+ accessor->beginIteration();
+ while (accessor->next()) {
+ const void *value = accessor->template getUntypedValue<is_attr_nullable>(attr_id);
+ if (!is_attr_nullable || value != nullptr) {
+ insert(value);
+ }
+ }
+ }
+
+ /**
+ * @brief Filter the given batch of tuples from a ValueAccessor. Write the
+ * tuple ids which survive in the filtering back to \p batch.
+ */
+ template <bool is_attr_nullable, typename ValueAccessorT>
+ inline std::size_t filterBatchInternal(const ValueAccessorT *accessor,
+ const attribute_id attr_id,
+ std::vector<tuple_id> *batch,
+ const std::size_t batch_size) const {
+ std::size_t out_size = 0;
+ for (std::size_t i = 0; i < batch_size; ++i) {
+ const tuple_id tid = batch->at(i);
+ const void *value =
+ accessor->template getUntypedValueAtAbsolutePosition(attr_id, tid);
+ if (is_attr_nullable && value == nullptr) {
+ continue;
+ }
+ if (contains(value)) {
+ batch->at(out_size) = tid;
+ ++out_size;
+ }
+ }
+ return out_size;
+ }
+
+ /**
+ * @brief Inserts a given value into the exact filter.
+ */
+ inline void insert(const void *key_begin) {
+ const CppType value = *reinterpret_cast<const CppType *>(key_begin);
+ DCHECK_GE(value, min_value_);
+ DCHECK_LE(value, max_value_);
+
+ const CppType loc = value - min_value_;
+ bit_array_[loc >> 3u].fetch_or(1u << (loc & 7u), std::memory_order_relaxed);
+ }
+
+ /**
+ * @brief Test membership of a given value in the exact filter.
+ */
+ inline bool contains(const void *key_begin) const {
+ const CppType value = *reinterpret_cast<const CppType *>(key_begin);
+ if (value < min_value_ || value > max_value_) {
+ return is_anti_filter;
+ }
+
+ const CppType loc = value - min_value_;
+ const bool is_bit_set =
+ (bit_array_[loc >> 3u].load(std::memory_order_relaxed) & (1u << (loc & 7u))) != 0;
+
+ if (is_anti_filter) {
+ return !is_bit_set;
+ } else {
+ return is_bit_set;
+ }
+ }
+
+ const CppType min_value_;
+ const CppType max_value_;
+ alignas(kCacheLineBytes) std::vector<std::atomic<std::uint8_t>> bit_array_;
+
+ DISALLOW_COPY_AND_ASSIGN(BitVectorExactFilter);
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_UTILITY_LIP_FILTER_BIT_VECTOR_EXACT_FILTER_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/utility/lip_filter/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/lip_filter/CMakeLists.txt b/utility/lip_filter/CMakeLists.txt
index 23b3763..edd0d24 100644
--- a/utility/lip_filter/CMakeLists.txt
+++ b/utility/lip_filter/CMakeLists.txt
@@ -20,6 +20,7 @@ QS_PROTOBUF_GENERATE_CPP(utility_lipfilter_LIPFilter_proto_srcs
LIPFilter.proto)
# Declare micro-libs:
+add_library(quickstep_utility_lipfilter_BitVectorExactFilter ../../empty_src.cpp BitVectorExactFilter.hpp)
add_library(quickstep_utility_lipfilter_LIPFilter ../../empty_src.cpp LIPFilter.hpp)
add_library(quickstep_utility_lipfilter_LIPFilterAdaptiveProber ../../empty_src.cpp LIPFilterAdaptiveProber.hpp)
add_library(quickstep_utility_lipfilter_LIPFilterBuilder ../../empty_src.cpp LIPFilterBuilder.hpp)
@@ -31,6 +32,15 @@ add_library(quickstep_utility_lipfilter_LIPFilter_proto
add_library(quickstep_utility_lipfilter_SingleIdentityHashFilter ../../empty_src.cpp SingleIdentityHashFilter.hpp)
# Link dependencies:
+target_link_libraries(quickstep_utility_lipfilter_BitVectorExactFilter
+ quickstep_catalog_CatalogTypedefs
+ quickstep_storage_StorageBlockInfo
+ quickstep_storage_StorageConstants
+ quickstep_storage_ValueAccessor
+ quickstep_storage_ValueAccessorUtil
+ quickstep_types_Type
+ quickstep_utility_lipfilter_LIPFilter
+ quickstep_utility_Macros)
target_link_libraries(quickstep_utility_lipfilter_LIPFilter
quickstep_catalog_CatalogTypedefs
quickstep_storage_StorageBlockInfo
@@ -56,6 +66,7 @@ target_link_libraries(quickstep_utility_lipfilter_LIPFilterDeployment
quickstep_utility_lipfilter_LIPFilterBuilder
quickstep_utility_lipfilter_LIPFilter_proto)
target_link_libraries(quickstep_utility_lipfilter_LIPFilterFactory
+ quickstep_utility_lipfilter_BitVectorExactFilter
quickstep_utility_lipfilter_LIPFilter_proto
quickstep_utility_lipfilter_SingleIdentityHashFilter
quickstep_utility_Macros)
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/utility/lip_filter/LIPFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilter.hpp b/utility/lip_filter/LIPFilter.hpp
index 682d69f..ba38264 100644
--- a/utility/lip_filter/LIPFilter.hpp
+++ b/utility/lip_filter/LIPFilter.hpp
@@ -37,8 +37,8 @@ class ValueAccessor;
*/
enum class LIPFilterType {
+ kBitVectorExactFilter,
kBloomFilter,
- kExactFilter,
kSingleIdentityHashFilter
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/utility/lip_filter/LIPFilter.proto
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilter.proto b/utility/lip_filter/LIPFilter.proto
index def13dd..45843f3 100644
--- a/utility/lip_filter/LIPFilter.proto
+++ b/utility/lip_filter/LIPFilter.proto
@@ -22,8 +22,8 @@ package quickstep.serialization;
import "types/Type.proto";
enum LIPFilterType {
- BLOOM_FILTER = 1;
- EXACT_FILTER = 2;
+ BIT_VECTOR_EXACT_FILTER = 1;
+ BLOOM_FILTER = 2;
SINGLE_IDENTITY_HASH_FILTER = 3;
}
@@ -33,17 +33,22 @@ message LIPFilter {
extensions 16 to max;
}
-message SingleIdentityHashFilter {
+message BitVectorExactFilter {
extend LIPFilter {
// All required
- optional uint64 filter_cardinality = 16;
- optional uint64 attribute_size = 17;
+ optional int64 min_value = 16;
+ optional int64 max_value = 17;
+ optional uint64 attribute_size = 18;
+ optional bool is_anti_filter = 19;
}
}
-enum LIPFilterActionType {
- BUILD = 1;
- PROBE = 2;
+message SingleIdentityHashFilter {
+ extend LIPFilter {
+ // All required
+ optional uint64 filter_cardinality = 24;
+ optional uint64 attribute_size = 25;
+ }
}
message LIPFilterDeployment {
@@ -53,6 +58,6 @@ message LIPFilterDeployment {
required Type attribute_type = 3;
}
- required LIPFilterActionType action_type = 1;
- repeated Entry entries = 2;
+ repeated Entry build_entries = 1;
+ repeated Entry probe_entries = 2;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/utility/lip_filter/LIPFilterDeployment.cpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilterDeployment.cpp b/utility/lip_filter/LIPFilterDeployment.cpp
index cd4d90f..5721496 100644
--- a/utility/lip_filter/LIPFilterDeployment.cpp
+++ b/utility/lip_filter/LIPFilterDeployment.cpp
@@ -28,45 +28,49 @@
#include "utility/lip_filter/LIPFilterBuilder.hpp"
#include "utility/lip_filter/LIPFilterAdaptiveProber.hpp"
-#include "glog/logging.h"
-
namespace quickstep {
LIPFilterDeployment::LIPFilterDeployment(
const serialization::LIPFilterDeployment &proto,
const std::vector<std::unique_ptr<LIPFilter>> &lip_filters) {
- switch (proto.action_type()) {
- case serialization::LIPFilterActionType::BUILD:
- action_type_ = LIPFilterActionType::kBuild;
- break;
- case serialization::LIPFilterActionType::PROBE:
- action_type_ = LIPFilterActionType::kProbe;
- break;
- default:
- LOG(FATAL) << "Unsupported LIPFilterActionType: "
- << serialization::LIPFilterActionType_Name(proto.action_type());
+ if (proto.build_entries_size() > 0) {
+ build_.reset(new LIPFilterDeploymentInfo());
+ for (int i = 0; i < proto.build_entries_size(); ++i) {
+ const auto &entry_proto = proto.build_entries(i);
+ build_->lip_filters_.emplace_back(
+ lip_filters.at(entry_proto.lip_filter_id()).get());
+ build_->attr_ids_.emplace_back(entry_proto.attribute_id());
+ build_->attr_types_.emplace_back(
+ &TypeFactory::ReconstructFromProto(entry_proto.attribute_type()));
+ }
}
- for (int i = 0; i < proto.entries_size(); ++i) {
- const auto &entry_proto = proto.entries(i);
- lip_filters_.emplace_back(lip_filters.at(entry_proto.lip_filter_id()).get());
- attr_ids_.emplace_back(entry_proto.attribute_id());
- attr_types_.emplace_back(&TypeFactory::ReconstructFromProto(entry_proto.attribute_type()));
+ if (proto.probe_entries_size() > 0) {
+ probe_.reset(new LIPFilterDeploymentInfo());
+ for (int i = 0; i < proto.probe_entries_size(); ++i) {
+ const auto &entry_proto = proto.probe_entries(i);
+ probe_->lip_filters_.emplace_back(
+ lip_filters.at(entry_proto.lip_filter_id()).get());
+ probe_->attr_ids_.emplace_back(entry_proto.attribute_id());
+ probe_->attr_types_.emplace_back(
+ &TypeFactory::ReconstructFromProto(entry_proto.attribute_type()));
+ }
}
}
bool LIPFilterDeployment::ProtoIsValid(
const serialization::LIPFilterDeployment &proto) {
- if (proto.action_type() != serialization::LIPFilterActionType::BUILD &&
- proto.action_type() != serialization::LIPFilterActionType::PROBE) {
- LOG(FATAL) << "Unsupported LIPFilterActionType: "
- << serialization::LIPFilterActionType_Name(proto.action_type());
- }
- if (proto.entries_size() == 0) {
+ if (proto.build_entries_size() == 0 && proto.probe_entries_size() == 0) {
return false;
}
- for (int i = 0; i < proto.entries_size(); ++i) {
- const auto &entry_proto = proto.entries(i);
+ for (int i = 0; i < proto.build_entries_size(); ++i) {
+ const auto &entry_proto = proto.build_entries(i);
+ if (!TypeFactory::ProtoIsValid(entry_proto.attribute_type())) {
+ return false;
+ }
+ }
+ for (int i = 0; i < proto.probe_entries_size(); ++i) {
+ const auto &entry_proto = proto.probe_entries(i);
if (!TypeFactory::ProtoIsValid(entry_proto.attribute_type())) {
return false;
}
@@ -75,13 +79,23 @@ bool LIPFilterDeployment::ProtoIsValid(
}
LIPFilterBuilder* LIPFilterDeployment::createLIPFilterBuilder() const {
- DCHECK(action_type_ == LIPFilterActionType::kBuild);
- return new LIPFilterBuilder(lip_filters_, attr_ids_, attr_types_);
+ if (build_ == nullptr) {
+ return nullptr;
+ }
+
+ return new LIPFilterBuilder(build_->lip_filters_,
+ build_->attr_ids_,
+ build_->attr_types_);
}
LIPFilterAdaptiveProber* LIPFilterDeployment::createLIPFilterAdaptiveProber() const {
- DCHECK(action_type_ == LIPFilterActionType::kProbe);
- return new LIPFilterAdaptiveProber(lip_filters_, attr_ids_, attr_types_);
+ if (probe_ == nullptr) {
+ return nullptr;
+ }
+
+ return new LIPFilterAdaptiveProber(probe_->lip_filters_,
+ probe_->attr_ids_,
+ probe_->attr_types_);
}
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/utility/lip_filter/LIPFilterDeployment.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilterDeployment.hpp b/utility/lip_filter/LIPFilterDeployment.hpp
index 9b37f88..ab1259b 100644
--- a/utility/lip_filter/LIPFilterDeployment.hpp
+++ b/utility/lip_filter/LIPFilterDeployment.hpp
@@ -39,11 +39,6 @@ class Type;
* @{
*/
-enum class LIPFilterActionType {
- kBuild = 0,
- kProbe
-};
-
/**
* @brief Helper class for organizing a group of LIPFilters in the backend.
* Each LIPFilterDeployment object is attached to a RelationalOperator.
@@ -69,16 +64,6 @@ class LIPFilterDeployment {
static bool ProtoIsValid(const serialization::LIPFilterDeployment &proto);
/**
- * @brief Get the action type for this group of LIPFilters (i.e. whether
- * to build or probe the filters).
- *
- * @return The action type.
- */
- LIPFilterActionType getActionType() const {
- return action_type_;
- }
-
- /**
* @brief Create a LIPFilterBuilder for this group of LIPFilters.
*
* @return A new LIPFilterBuilder object for this group of LIPFilters.
@@ -95,11 +80,14 @@ class LIPFilterDeployment {
LIPFilterAdaptiveProber* createLIPFilterAdaptiveProber() const;
private:
- LIPFilterActionType action_type_;
-
- std::vector<LIPFilter *> lip_filters_;
- std::vector<attribute_id> attr_ids_;
- std::vector<const Type *> attr_types_;
+ struct LIPFilterDeploymentInfo {
+ std::vector<LIPFilter *> lip_filters_;
+ std::vector<attribute_id> attr_ids_;
+ std::vector<const Type *> attr_types_;
+ };
+
+ std::unique_ptr<LIPFilterDeploymentInfo> build_;
+ std::unique_ptr<LIPFilterDeploymentInfo> probe_;
DISALLOW_COPY_AND_ASSIGN(LIPFilterDeployment);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/utility/lip_filter/LIPFilterFactory.cpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilterFactory.cpp b/utility/lip_filter/LIPFilterFactory.cpp
index ebc4a0e..f69d8b0 100644
--- a/utility/lip_filter/LIPFilterFactory.cpp
+++ b/utility/lip_filter/LIPFilterFactory.cpp
@@ -23,6 +23,7 @@
#include <cstdint>
#include "utility/lip_filter/LIPFilter.pb.h"
+#include "utility/lip_filter/BitVectorExactFilter.hpp"
#include "utility/lip_filter/SingleIdentityHashFilter.hpp"
#include "glog/logging.h"
@@ -31,6 +32,46 @@ namespace quickstep {
LIPFilter* LIPFilterFactory::ReconstructFromProto(const serialization::LIPFilter &proto) {
switch (proto.lip_filter_type()) {
+ case serialization::LIPFilterType::BIT_VECTOR_EXACT_FILTER: {
+ const std::size_t attr_size =
+ proto.GetExtension(serialization::BitVectorExactFilter::attribute_size);
+ const std::int64_t min_value =
+ proto.GetExtension(serialization::BitVectorExactFilter::min_value);
+ const std::int64_t max_value =
+ proto.GetExtension(serialization::BitVectorExactFilter::max_value);
+ const bool is_anti_filter =
+ proto.GetExtension(serialization::BitVectorExactFilter::is_anti_filter);
+
+ switch (attr_size) {
+ case 1:
+ if (is_anti_filter) {
+ return new BitVectorExactFilter<std::int8_t, true>(min_value, max_value);
+ } else {
+ return new BitVectorExactFilter<std::int8_t, false>(min_value, max_value);
+ }
+ case 2:
+ if (is_anti_filter) {
+ return new BitVectorExactFilter<std::int16_t, true>(min_value, max_value);
+ } else {
+ return new BitVectorExactFilter<std::int16_t, false>(min_value, max_value);
+ }
+ case 4:
+ if (is_anti_filter) {
+ return new BitVectorExactFilter<std::int32_t, true>(min_value, max_value);
+ } else {
+ return new BitVectorExactFilter<std::int32_t, false>(min_value, max_value);
+ }
+ case 8:
+ if (is_anti_filter) {
+ return new BitVectorExactFilter<std::int64_t, true>(min_value, max_value);
+ } else {
+ return new BitVectorExactFilter<std::int64_t, false>(min_value, max_value);
+ }
+ default:
+ LOG(FATAL) << "Invalid attribute size for BitVectorExactFilter: "
+ << attr_size;
+ }
+ }
case serialization::LIPFilterType::SINGLE_IDENTITY_HASH_FILTER: {
const std::size_t attr_size =
proto.GetExtension(serialization::SingleIdentityHashFilter::attribute_size);
@@ -57,6 +98,15 @@ LIPFilter* LIPFilterFactory::ReconstructFromProto(const serialization::LIPFilter
bool LIPFilterFactory::ProtoIsValid(const serialization::LIPFilter &proto) {
switch (proto.lip_filter_type()) {
+ case serialization::LIPFilterType::BIT_VECTOR_EXACT_FILTER: {
+ const std::size_t attr_size =
+ proto.GetExtension(serialization::BitVectorExactFilter::attribute_size);
+ const std::int64_t min_value =
+ proto.GetExtension(serialization::BitVectorExactFilter::min_value);
+ const std::int64_t max_value =
+ proto.GetExtension(serialization::BitVectorExactFilter::max_value);
+ return (attr_size != 0 && max_value >= min_value);
+ }
case serialization::LIPFilterType::SINGLE_IDENTITY_HASH_FILTER: {
const std::size_t attr_size =
proto.GetExtension(serialization::SingleIdentityHashFilter::attribute_size);
[09/10] incubator-quickstep git commit: Add BitVectorExactFilter as a
LIP filter and supports Join-to-Semijoin transformation.
Posted by ji...@apache.org.
Add BitVectorExactFilter as a LIP filter and supports Join-to-Semijoin transformation.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/4ba819c5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/4ba819c5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/4ba819c5
Branch: refs/heads/reduce-group-by-attrs
Commit: 4ba819c5b82af1d9284525bd7a16784e0254be3f
Parents: 5ffdaaf
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Thu Oct 27 14:16:32 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Tue Jan 31 16:57:09 2017 -0600
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 4 +
query_optimizer/ExecutionGenerator.cpp | 62 +++
query_optimizer/ExecutionGenerator.hpp | 8 +
query_optimizer/LIPFilterGenerator.cpp | 109 +++--
query_optimizer/LIPFilterGenerator.hpp | 49 ++-
query_optimizer/PhysicalGenerator.cpp | 19 +-
query_optimizer/cost_model/CMakeLists.txt | 5 +
query_optimizer/cost_model/SimpleCostModel.cpp | 9 +
query_optimizer/cost_model/SimpleCostModel.hpp | 5 +
.../cost_model/StarSchemaSimpleCostModel.cpp | 163 ++++++-
.../cost_model/StarSchemaSimpleCostModel.hpp | 83 ++++
query_optimizer/expressions/ExpressionUtil.hpp | 8 +-
query_optimizer/physical/CMakeLists.txt | 14 +
query_optimizer/physical/FilterJoin.cpp | 115 +++++
query_optimizer/physical/FilterJoin.hpp | 187 ++++++++
.../physical/LIPFilterConfiguration.hpp | 265 ++++++++---
query_optimizer/physical/PatternMatcher.hpp | 2 +
query_optimizer/physical/PhysicalType.hpp | 1 +
query_optimizer/physical/TopLevelPlan.hpp | 3 +-
query_optimizer/rules/AttachLIPFilters.cpp | 28 +-
query_optimizer/rules/CMakeLists.txt | 22 +
query_optimizer/rules/InjectJoinFilters.cpp | 438 +++++++++++++++++++
query_optimizer/rules/InjectJoinFilters.hpp | 116 +++++
query_optimizer/tests/OptimizerTextTest.cpp | 2 +
relational_operators/BuildLIPFilterOperator.cpp | 154 +++++++
relational_operators/BuildLIPFilterOperator.hpp | 200 +++++++++
relational_operators/CMakeLists.txt | 24 +
relational_operators/WorkOrder.proto | 49 ++-
relational_operators/WorkOrderFactory.cpp | 45 ++
utility/CMakeLists.txt | 1 +
utility/PlanVisualizer.cpp | 24 +-
utility/lip_filter/BitVectorExactFilter.hpp | 202 +++++++++
utility/lip_filter/CMakeLists.txt | 11 +
utility/lip_filter/LIPFilter.hpp | 2 +-
utility/lip_filter/LIPFilter.proto | 25 +-
utility/lip_filter/LIPFilterDeployment.cpp | 72 +--
utility/lip_filter/LIPFilterDeployment.hpp | 28 +-
utility/lip_filter/LIPFilterFactory.cpp | 50 +++
38 files changed, 2394 insertions(+), 210 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 0ca971d..7f90e11 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -96,6 +96,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
quickstep_queryoptimizer_physical_CreateTable
quickstep_queryoptimizer_physical_DeleteTuples
quickstep_queryoptimizer_physical_DropTable
+ quickstep_queryoptimizer_physical_FilterJoin
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_InsertSelection
quickstep_queryoptimizer_physical_InsertTuple
@@ -115,6 +116,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
quickstep_queryoptimizer_physical_WindowAggregate
quickstep_relationaloperators_AggregationOperator
quickstep_relationaloperators_BuildHashOperator
+ quickstep_relationaloperators_BuildLIPFilterOperator
quickstep_relationaloperators_CreateIndexOperator
quickstep_relationaloperators_CreateTableOperator
quickstep_relationaloperators_DeleteOperator
@@ -161,6 +163,7 @@ target_link_libraries(quickstep_queryoptimizer_LIPFilterGenerator
quickstep_queryoptimizer_QueryPlan
quickstep_queryoptimizer_expressions_ExprId
quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_FilterJoin
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_LIPFilterConfiguration
quickstep_queryoptimizer_physical_Physical
@@ -206,6 +209,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_logical_Logical
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_rules_AttachLIPFilters
+ quickstep_queryoptimizer_rules_InjectJoinFilters
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
quickstep_queryoptimizer_rules_ReorderColumns
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index e25b8ad..ce1452e 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -76,6 +76,7 @@
#include "query_optimizer/physical/CreateTable.hpp"
#include "query_optimizer/physical/DeleteTuples.hpp"
#include "query_optimizer/physical/DropTable.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/InsertSelection.hpp"
#include "query_optimizer/physical/InsertTuple.hpp"
@@ -95,6 +96,7 @@
#include "query_optimizer/physical/WindowAggregate.hpp"
#include "relational_operators/AggregationOperator.hpp"
#include "relational_operators/BuildHashOperator.hpp"
+#include "relational_operators/BuildLIPFilterOperator.hpp"
#include "relational_operators/CreateIndexOperator.hpp"
#include "relational_operators/CreateTableOperator.hpp"
#include "relational_operators/DeleteOperator.hpp"
@@ -271,6 +273,9 @@ void ExecutionGenerator::generatePlanInternal(
case P::PhysicalType::kDropTable:
return convertDropTable(
std::static_pointer_cast<const P::DropTable>(physical_plan));
+ case P::PhysicalType::kFilterJoin:
+ return convertFilterJoin(
+ std::static_pointer_cast<const P::FilterJoin>(physical_plan));
case P::PhysicalType::kHashJoin:
return convertHashJoin(
std::static_pointer_cast<const P::HashJoin>(physical_plan));
@@ -608,6 +613,63 @@ void ExecutionGenerator::convertSharedSubplanReference(const physical::SharedSub
}
}
+void ExecutionGenerator::convertFilterJoin(const P::FilterJoinPtr &physical_plan) {
+ P::PhysicalPtr probe_physical = physical_plan->left();
+ P::PhysicalPtr build_physical = physical_plan->right();
+
+ // Let B denote the build side child. If B is also a FilterJoin, then the
+ // actual "concrete" input relation is B's probe side child, and B's build
+ // side becomes a LIPFilter that is attached to the BuildLIPFilterOperator
+ // created below.
+ P::FilterJoinPtr filter_join;
+ if (P::SomeFilterJoin::MatchesWithConditionalCast(build_physical, &filter_join)) {
+ build_physical = filter_join->left();
+ DCHECK(build_physical->getPhysicalType() != P::PhysicalType::kFilterJoin);
+ }
+
+ // Convert the predicate proto.
+ QueryContext::predicate_id build_side_predicate_index = QueryContext::kInvalidPredicateId;
+ if (physical_plan->build_side_filter_predicate()) {
+ build_side_predicate_index = query_context_proto_->predicates_size();
+
+ std::unique_ptr<const Predicate> build_side_predicate(
+ convertPredicate(physical_plan->build_side_filter_predicate()));
+ query_context_proto_->add_predicates()->CopyFrom(build_side_predicate->getProto());
+ }
+
+ const CatalogRelationInfo *probe_relation_info =
+ findRelationInfoOutputByPhysical(probe_physical);
+ const CatalogRelationInfo *build_relation_info =
+ findRelationInfoOutputByPhysical(build_physical);
+
+ // Create a BuildLIPFilterOperator for the FilterJoin. This operator builds
+ // LIP filters that are applied properly in downstream operators to achieve
+ // the filter-join semantics.
+ const QueryPlan::DAGNodeIndex build_filter_operator_index =
+ execution_plan_->addRelationalOperator(
+ new BuildLIPFilterOperator(
+ query_handle_->query_id(),
+ *build_relation_info->relation,
+ build_side_predicate_index,
+ build_relation_info->isStoredRelation()));
+
+ if (!build_relation_info->isStoredRelation()) {
+ execution_plan_->addDirectDependency(build_filter_operator_index,
+ build_relation_info->producer_operator_index,
+ false /* is_pipeline_breaker */);
+ }
+
+ physical_to_output_relation_map_.emplace(
+ std::piecewise_construct,
+ std::forward_as_tuple(physical_plan),
+ std::forward_as_tuple(probe_relation_info->producer_operator_index,
+ probe_relation_info->relation));
+
+ DCHECK(lip_filter_generator_ != nullptr);
+ lip_filter_generator_->addFilterJoinInfo(physical_plan,
+ build_filter_operator_index);
+}
+
void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
// HashJoin is converted to three operators:
// BuildHash, HashJoin, DestroyHash. The second is the primary operator.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp
index 55197c9..eba6eee 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -46,6 +46,7 @@
#include "query_optimizer/physical/CreateTable.hpp"
#include "query_optimizer/physical/DeleteTuples.hpp"
#include "query_optimizer/physical/DropTable.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/InsertSelection.hpp"
#include "query_optimizer/physical/InsertTuple.hpp"
@@ -248,6 +249,13 @@ class ExecutionGenerator {
void convertSharedSubplanReference(const physical::SharedSubplanReferencePtr &physical_plan);
/**
+ * @brief Converts a FilterJoin to a BuildLIPFilter operator.
+ *
+ * @param physical_plan The FilterJoin to be converted.
+ */
+ void convertFilterJoin(const physical::FilterJoinPtr &physical_plan);
+
+ /**
* @brief Converts a HashJoin to BuildHash, HashJoin and
* DestroyHash operators.
*
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/LIPFilterGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/LIPFilterGenerator.cpp b/query_optimizer/LIPFilterGenerator.cpp
index 404037e..2ce2ea8 100644
--- a/query_optimizer/LIPFilterGenerator.cpp
+++ b/query_optimizer/LIPFilterGenerator.cpp
@@ -20,11 +20,13 @@
#include "query_optimizer/LIPFilterGenerator.hpp"
#include <map>
+#include <memory>
#include <utility>
#include <vector>
#include "catalog/CatalogAttribute.hpp"
#include "query_execution/QueryContext.pb.h"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
#include "relational_operators/RelationalOperator.hpp"
#include "types/Type.hpp"
#include "utility/lip_filter/LIPFilter.hpp"
@@ -47,7 +49,7 @@ void LIPFilterGenerator::registerAttributeMap(
if (build_it != build_info_map.end()) {
auto &map_entry = attribute_map_[node];
for (const auto &info : build_it->second) {
- E::ExprId attr_id = info.build_attribute->id();
+ E::ExprId attr_id = info->build_attribute()->id();
map_entry.emplace(attr_id, attribute_substitution_map.at(attr_id));
}
}
@@ -57,15 +59,16 @@ void LIPFilterGenerator::registerAttributeMap(
if (probe_it != probe_info_map.end()) {
auto &map_entry = attribute_map_[node];
for (const auto &info : probe_it->second) {
- E::ExprId attr_id = info.probe_attribute->id();
+ E::ExprId attr_id = info->probe_attribute()->id();
map_entry.emplace(attr_id, attribute_substitution_map.at(attr_id));
}
}
}
void LIPFilterGenerator::deployLIPFilters(QueryPlan *execution_plan,
- serialization::QueryContext *query_context_proto) const {
- LIPFilterBuilderMap lip_filter_builder_map;
+ serialization::QueryContext *query_context_proto) {
+ lip_filter_builder_map_.clear();
+ lip_filter_deployment_protos_.clear();
// Deploy builders
const auto &build_info_map = lip_filter_configuration_->getBuildInfoMap();
@@ -76,8 +79,7 @@ void LIPFilterGenerator::deployLIPFilters(QueryPlan *execution_plan,
query_context_proto,
info.builder_node,
info.builder_operator_index,
- build_it->second,
- &lip_filter_builder_map);
+ build_it->second);
}
}
@@ -90,10 +92,16 @@ void LIPFilterGenerator::deployLIPFilters(QueryPlan *execution_plan,
query_context_proto,
info.prober_node,
info.prober_operator_index,
- probe_it->second,
- lip_filter_builder_map);
+ probe_it->second);
}
}
+
+ // Attach LIPFilterDeployment information to the RelationalOperators.
+ for (const auto &entry : lip_filter_deployment_protos_) {
+ RelationalOperator *relop =
+ execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(entry.first);
+ relop->deployLIPFilters(entry.second.first);
+ }
}
void LIPFilterGenerator::deployBuilderInternal(
@@ -101,30 +109,46 @@ void LIPFilterGenerator::deployBuilderInternal(
serialization::QueryContext *query_context_proto,
const physical::PhysicalPtr &builder_node,
const QueryPlan::DAGNodeIndex builder_operator_index,
- const std::vector<physical::LIPFilterBuildInfo> &build_info_vec,
- LIPFilterBuilderMap *lip_filter_builder_map) const {
- const auto lip_deployment_index = query_context_proto->lip_filter_deployments_size();
+ const std::vector<physical::LIPFilterBuildInfoPtr> &build_info_vec) {
auto *lip_filter_deployment_info_proto =
- query_context_proto->add_lip_filter_deployments();
- lip_filter_deployment_info_proto->set_action_type(serialization::LIPFilterActionType::BUILD);
+ getLIPFilterDeploymentProto(builder_operator_index, query_context_proto);
const auto &builder_attribute_map = attribute_map_.at(builder_node);
for (const auto &info : build_info_vec) {
// Add the LIPFilter information into query context.
const QueryContext::lip_filter_id lip_filter_id = query_context_proto->lip_filters_size();
serialization::LIPFilter *lip_filter_proto = query_context_proto->add_lip_filters();
- const CatalogAttribute *target_attr = builder_attribute_map.at(info.build_attribute->id());
+ const CatalogAttribute *target_attr =
+ builder_attribute_map.at(info->build_attribute()->id());
const Type &attr_type = target_attr->getType();
- switch (info.filter_type) {
+ switch (info->filter_type()) {
case LIPFilterType::kSingleIdentityHashFilter: {
DCHECK(!attr_type.isVariableLength());
+ const P::SingleIdentityHashFilterBuildInfo &sihf_info =
+ *std::static_pointer_cast<const P::SingleIdentityHashFilterBuildInfo>(info);
lip_filter_proto->set_lip_filter_type(
serialization::LIPFilterType::SINGLE_IDENTITY_HASH_FILTER);
- lip_filter_proto->SetExtension(
- serialization::SingleIdentityHashFilter::filter_cardinality, info.filter_cardinality);
- lip_filter_proto->SetExtension(
- serialization::SingleIdentityHashFilter::attribute_size, attr_type.minimumByteLength());
+ lip_filter_proto->SetExtension(serialization::SingleIdentityHashFilter::filter_cardinality,
+ sihf_info.filter_cardinality());
+ lip_filter_proto->SetExtension(serialization::SingleIdentityHashFilter::attribute_size,
+ attr_type.minimumByteLength());
+ break;
+ }
+ case LIPFilterType::kBitVectorExactFilter: {
+ DCHECK(!attr_type.isVariableLength());
+ const P::BitVectorExactFilterBuildInfo &bvef_info =
+ *std::static_pointer_cast<const P::BitVectorExactFilterBuildInfo>(info);
+ lip_filter_proto->set_lip_filter_type(
+ serialization::LIPFilterType::BIT_VECTOR_EXACT_FILTER);
+ lip_filter_proto->SetExtension(serialization::BitVectorExactFilter::min_value,
+ bvef_info.min_value());
+ lip_filter_proto->SetExtension(serialization::BitVectorExactFilter::max_value,
+ bvef_info.max_value());
+ lip_filter_proto->SetExtension(serialization::BitVectorExactFilter::attribute_size,
+ attr_type.minimumByteLength());
+ lip_filter_proto->SetExtension(serialization::BitVectorExactFilter::is_anti_filter,
+ bvef_info.is_anti_filter());
break;
}
default:
@@ -133,21 +157,16 @@ void LIPFilterGenerator::deployBuilderInternal(
}
// Register the builder information which is needed later by the probers.
- lip_filter_builder_map->emplace(
- std::make_pair(info.build_attribute->id(), builder_node),
+ lip_filter_builder_map_.emplace(
+ std::make_pair(info->build_attribute()->id(), builder_node),
std::make_pair(lip_filter_id, builder_operator_index));
// Add the builder deployment information into query context.
- auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_entries();
+ auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_build_entries();
lip_filter_entry_proto->set_lip_filter_id(lip_filter_id);
lip_filter_entry_proto->set_attribute_id(target_attr->getID());
lip_filter_entry_proto->mutable_attribute_type()->CopyFrom(attr_type.getProto());
}
-
- // Attach the LIPFilterDeployment information to the RelationalOperator.
- RelationalOperator *relop =
- execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(builder_operator_index);
- relop->deployLIPFilters(lip_deployment_index);
}
void LIPFilterGenerator::deployProberInteral(
@@ -155,23 +174,21 @@ void LIPFilterGenerator::deployProberInteral(
serialization::QueryContext *query_context_proto,
const physical::PhysicalPtr &prober_node,
const QueryPlan::DAGNodeIndex prober_operator_index,
- const std::vector<physical::LIPFilterProbeInfo> &probe_info_vec,
- const LIPFilterBuilderMap &lip_filter_builder_map) const {
- const auto lip_deployment_index = query_context_proto->lip_filter_deployments_size();
+ const std::vector<physical::LIPFilterProbeInfoPtr> &probe_info_vec) {
auto *lip_filter_deployment_info_proto =
- query_context_proto->add_lip_filter_deployments();
- lip_filter_deployment_info_proto->set_action_type(serialization::LIPFilterActionType::PROBE);
+ getLIPFilterDeploymentProto(prober_operator_index, query_context_proto);
const auto &prober_attribute_map = attribute_map_.at(prober_node);
for (const auto &info : probe_info_vec) {
// Find the corresponding builder for the to-be-probed LIPFilter.
const auto &builder_info =
- lip_filter_builder_map.at(
- std::make_pair(info.build_attribute->id(), info.builder));
- const CatalogAttribute *target_attr = prober_attribute_map.at(info.probe_attribute->id());
+ lip_filter_builder_map_.at(
+ std::make_pair(info->build_attribute()->id(), info->builder()));
+ const CatalogAttribute *target_attr =
+ prober_attribute_map.at(info->probe_attribute()->id());
// Add the prober deployment information into query context.
- auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_entries();
+ auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_probe_entries();
lip_filter_entry_proto->set_lip_filter_id(builder_info.first);
lip_filter_entry_proto->set_attribute_id(target_attr->getID());
lip_filter_entry_proto->mutable_attribute_type()->CopyFrom(
@@ -183,11 +200,23 @@ void LIPFilterGenerator::deployProberInteral(
builder_info.second,
true /* is_pipeline_breaker */);
}
+}
- // Attach the LIPFilterDeployment information to the RelationalOperator.
- RelationalOperator *relop =
- execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(prober_operator_index);
- relop->deployLIPFilters(lip_deployment_index);
+serialization::LIPFilterDeployment* LIPFilterGenerator::getLIPFilterDeploymentProto(
+ const QueryPlan::DAGNodeIndex op_index,
+ serialization::QueryContext *query_context_proto) {
+ const auto proto_it = lip_filter_deployment_protos_.find(op_index);
+ if (proto_it == lip_filter_deployment_protos_.end()) {
+ const int lip_deployment_index =
+ query_context_proto->lip_filter_deployments_size();
+ auto *lip_filter_deployment_proto =
+ query_context_proto->add_lip_filter_deployments();
+ lip_filter_deployment_protos_.emplace(
+ op_index, std::make_pair(lip_deployment_index, lip_filter_deployment_proto));
+ return lip_filter_deployment_proto;
+ } else {
+ return proto_it->second.second;
+ }
}
} // namespace optimizer
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/LIPFilterGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/LIPFilterGenerator.hpp b/query_optimizer/LIPFilterGenerator.hpp
index 9d191a1..750499d 100644
--- a/query_optimizer/LIPFilterGenerator.hpp
+++ b/query_optimizer/LIPFilterGenerator.hpp
@@ -30,6 +30,7 @@
#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/Selection.hpp"
@@ -39,7 +40,12 @@
namespace quickstep {
-namespace serialization { class QueryContext; }
+namespace serialization {
+
+class QueryContext;
+class LIPFilterDeployment;
+
+}
class CatalogAttribute;
@@ -93,6 +99,20 @@ class LIPFilterGenerator {
/**
* @brief Add physical-to-execution mapping information for deploying LIPFilters
+ * to a FilterJoin node.
+ *
+ * @param filter_join A physical FilterJoin node.
+ * @param build_filter_operator_index The index of the BuildLIPFilterOperator
+ * that corresponds to \p filter_join in the execution plan.
+ */
+ void addFilterJoinInfo(const physical::FilterJoinPtr &filter_join,
+ const QueryPlan::DAGNodeIndex build_filter_operator_index) {
+ builder_infos_.emplace_back(filter_join, build_filter_operator_index);
+ prober_infos_.emplace_back(filter_join, build_filter_operator_index);
+ }
+
+ /**
+ * @brief Add physical-to-execution mapping information for deploying LIPFilters
* to a hash-join.
*
* @param hash_join A physical HashJoin node.
@@ -128,7 +148,7 @@ class LIPFilterGenerator {
* @param query_context_proto QueryContext protobuf for the execution plan.
*/
void deployLIPFilters(QueryPlan *execution_plan,
- serialization::QueryContext *query_context_proto) const;
+ serialization::QueryContext *query_context_proto);
private:
/**
@@ -157,24 +177,21 @@ class LIPFilterGenerator {
const QueryPlan::DAGNodeIndex prober_operator_index;
};
- // Maps each LIPFilter's building attribute to the LIPFilter's id in QueryContext
- // as well as the LIPFilter's building relational operator's index.
- typedef std::map<std::pair<expressions::ExprId, physical::PhysicalPtr>,
- std::pair<QueryContext::lip_filter_id, QueryPlan::DAGNodeIndex>> LIPFilterBuilderMap;
-
void deployBuilderInternal(QueryPlan *execution_plan,
serialization::QueryContext *query_context_proto,
const physical::PhysicalPtr &builder_node,
const QueryPlan::DAGNodeIndex builder_operator_index,
- const std::vector<physical::LIPFilterBuildInfo> &build_info_vec,
- LIPFilterBuilderMap *lip_filter_builder_map) const;
+ const std::vector<physical::LIPFilterBuildInfoPtr> &build_info_vec);
void deployProberInteral(QueryPlan *execution_plan,
serialization::QueryContext *query_context_proto,
const physical::PhysicalPtr &prober_node,
const QueryPlan::DAGNodeIndex prober_operator_index,
- const std::vector<physical::LIPFilterProbeInfo> &probe_info_vec,
- const LIPFilterBuilderMap &lip_filter_builder_map) const;
+ const std::vector<physical::LIPFilterProbeInfoPtr> &probe_info_vec);
+
+ serialization::LIPFilterDeployment* getLIPFilterDeploymentProto(
+ const QueryPlan::DAGNodeIndex op_index,
+ serialization::QueryContext *query_context_proto);
const physical::LIPFilterConfigurationPtr lip_filter_configuration_;
@@ -183,6 +200,16 @@ class LIPFilterGenerator {
std::map<physical::PhysicalPtr, std::map<expressions::ExprId, const CatalogAttribute *>> attribute_map_;
+ // Maps each LIPFilter's building attribute to the LIPFilter's id in QueryContext
+ // as well as the LIPFilter's building relational operator's index.
+ std::map<std::pair<expressions::ExprId, physical::PhysicalPtr>,
+ std::pair<QueryContext::lip_filter_id, QueryPlan::DAGNodeIndex>> lip_filter_builder_map_;
+
+ // Maps each relational operator's index to the attached LIPFilterDeployment's
+ // index and proto.
+ std::map<QueryPlan::DAGNodeIndex,
+ std::pair<int, serialization::LIPFilterDeployment *>> lip_filter_deployment_protos_;
+
DISALLOW_COPY_AND_ASSIGN(LIPFilterGenerator);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index bd05267..5dc0ffb 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -27,6 +27,7 @@
#include "query_optimizer/logical/Logical.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/rules/AttachLIPFilters.hpp"
+#include "query_optimizer/rules/InjectJoinFilters.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
#include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
#include "query_optimizer/rules/ReorderColumns.hpp"
@@ -56,6 +57,14 @@ DEFINE_bool(reorder_hash_joins, true,
"cardinality and selective tables to be joined first, which is suitable "
"for queries on star-schema tables.");
+DEFINE_bool(use_filter_joins, true,
+ "If true, apply an optimization that strength-reduces HashJoins to "
+ "FilterJoins (implemented as LIPFilters attached to some anchoring "
+ "operators. Briefly speaking, in the case that the join attribute has "
+ "consecutive integer values bounded in a reasonably small range, we "
+ "build a BitVector on the build-side attribute and use the BitVector "
+ "to filter the probe side table.");
+
DEFINE_bool(use_lip_filters, true,
"If true, use LIP (Lookahead Information Passing) filters to accelerate "
"query processing. LIP filters are effective for queries on star schema "
@@ -133,9 +142,13 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
rules.emplace_back(new ReorderColumns());
}
- // NOTE(jianqiao): Adding rules after AttachLIPFilters requires extra handling
- // of LIPFilterConfiguration for transformed nodes. So currently it is suggested
- // that all the new rules be placed before this point.
+ // NOTE(jianqiao): Adding rules after InjectJoinFilters (or AttachLIPFilters) requires
+ // extra handling of LIPFilterConfiguration for transformed nodes. So currently it is
+ // suggested that all the new rules be placed before this point.
+ if (FLAGS_use_filter_joins) {
+ rules.emplace_back(new InjectJoinFilters());
+ }
+
if (FLAGS_use_lip_filters) {
rules.emplace_back(new AttachLIPFilters());
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/cost_model/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/CMakeLists.txt b/query_optimizer/cost_model/CMakeLists.txt
index 90133e7..5f28bb3 100644
--- a/query_optimizer/cost_model/CMakeLists.txt
+++ b/query_optimizer/cost_model/CMakeLists.txt
@@ -33,6 +33,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_SimpleCostModel
quickstep_catalog_CatalogRelationStatistics
quickstep_queryoptimizer_costmodel_CostModel
quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_FilterJoin
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_NestedLoopsJoin
quickstep_queryoptimizer_physical_Physical
@@ -49,6 +50,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostMod
glog
quickstep_catalog_CatalogRelation
quickstep_catalog_CatalogRelationStatistics
+ quickstep_catalog_CatalogTypedefs
quickstep_queryoptimizer_costmodel_CostModel
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ComparisonExpression
@@ -60,6 +62,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostMod
quickstep_queryoptimizer_expressions_PatternMatcher
quickstep_queryoptimizer_expressions_Predicate
quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_FilterJoin
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_NestedLoopsJoin
quickstep_queryoptimizer_physical_PatternMatcher
@@ -72,6 +75,8 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostMod
quickstep_queryoptimizer_physical_TableReference
quickstep_queryoptimizer_physical_TopLevelPlan
quickstep_queryoptimizer_physical_WindowAggregate
+ quickstep_types_NullType
+ quickstep_types_TypedValue
quickstep_utility_Macros)
# Module all-in-one library:
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/cost_model/SimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/SimpleCostModel.cpp b/query_optimizer/cost_model/SimpleCostModel.cpp
index 7808898..e9d2e3a 100644
--- a/query_optimizer/cost_model/SimpleCostModel.cpp
+++ b/query_optimizer/cost_model/SimpleCostModel.cpp
@@ -27,6 +27,7 @@
#include "query_optimizer/cost_model/CostModel.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
#include "query_optimizer/physical/NestedLoopsJoin.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/PhysicalType.hpp"
@@ -61,6 +62,9 @@ std::size_t SimpleCostModel::estimateCardinality(
case P::PhysicalType::kTableGenerator:
return estimateCardinalityForTableGenerator(
std::static_pointer_cast<const P::TableGenerator>(physical_plan));
+ case P::PhysicalType::kFilterJoin:
+ return estimateCardinalityForFilterJoin(
+ std::static_pointer_cast<const P::FilterJoin>(physical_plan));
case P::PhysicalType::kHashJoin:
return estimateCardinalityForHashJoin(
std::static_pointer_cast<const P::HashJoin>(physical_plan));
@@ -119,6 +123,11 @@ std::size_t SimpleCostModel::estimateCardinalityForTableGenerator(
return physical_plan->generator_function_handle()->getEstimatedCardinality();
}
+std::size_t SimpleCostModel::estimateCardinalityForFilterJoin(
+ const P::FilterJoinPtr &physical_plan) {
+ return estimateCardinality(physical_plan->left());
+}
+
std::size_t SimpleCostModel::estimateCardinalityForHashJoin(
const P::HashJoinPtr &physical_plan) {
return std::max(estimateCardinality(physical_plan->left()),
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/cost_model/SimpleCostModel.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/SimpleCostModel.hpp b/query_optimizer/cost_model/SimpleCostModel.hpp
index 16366cd..4edc2fe 100644
--- a/query_optimizer/cost_model/SimpleCostModel.hpp
+++ b/query_optimizer/cost_model/SimpleCostModel.hpp
@@ -26,6 +26,7 @@
#include "query_optimizer/cost_model/CostModel.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
#include "query_optimizer/physical/NestedLoopsJoin.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/Selection.hpp"
@@ -80,6 +81,10 @@ class SimpleCostModel : public CostModel {
std::size_t estimateCardinalityForSort(
const physical::SortPtr &physical_plan);
+ // Returns the left child's cardinality
+ std::size_t estimateCardinalityForFilterJoin(
+ const physical::FilterJoinPtr &physical_plan);
+
// Returns the larger value of the estimated cardinalities of two
// input plans.
std::size_t estimateCardinalityForHashJoin(
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index 75b1b2b..7afa1c3 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -21,11 +21,11 @@
#include <algorithm>
#include <memory>
-#include <unordered_map>
#include <vector>
#include "catalog/CatalogRelation.hpp"
#include "catalog/CatalogRelationStatistics.hpp"
+#include "catalog/CatalogTypedefs.hpp"
#include "query_optimizer/cost_model/CostModel.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ComparisonExpression.hpp"
@@ -38,6 +38,7 @@
#include "query_optimizer/expressions/PatternMatcher.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
#include "query_optimizer/physical/NestedLoopsJoin.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/PatternMatcher.hpp"
#include "query_optimizer/physical/Physical.hpp"
@@ -48,6 +49,8 @@
#include "query_optimizer/physical/TableGenerator.hpp"
#include "query_optimizer/physical/TableReference.hpp"
#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "types/TypedValue.hpp"
+#include "types/NullType.hpp"
#include "glog/logging.h"
@@ -73,6 +76,9 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinality(
case P::PhysicalType::kTableGenerator:
return estimateCardinalityForTableGenerator(
std::static_pointer_cast<const P::TableGenerator>(physical_plan));
+ case P::PhysicalType::kFilterJoin:
+ return estimateCardinalityForFilterJoin(
+ std::static_pointer_cast<const P::FilterJoin>(physical_plan));
case P::PhysicalType::kHashJoin:
return estimateCardinalityForHashJoin(
std::static_pointer_cast<const P::HashJoin>(physical_plan));
@@ -134,6 +140,17 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinalityForTableGenerator(
return physical_plan->generator_function_handle()->getEstimatedCardinality();
}
+std::size_t StarSchemaSimpleCostModel::estimateCardinalityForFilterJoin(
+ const P::FilterJoinPtr &physical_plan) {
+ double build_side_filter_selectivity =
+ estimateSelectivityForPredicate(physical_plan->build_side_filter_predicate(),
+ physical_plan->right());
+ std::size_t left_cardinality = estimateCardinality(physical_plan->left());
+ double right_selectivity = estimateSelectivity(physical_plan->right());
+ return static_cast<std::size_t>(
+ left_cardinality * build_side_filter_selectivity * right_selectivity + 0.5);
+}
+
std::size_t StarSchemaSimpleCostModel::estimateCardinalityForHashJoin(
const P::HashJoinPtr &physical_plan) {
std::size_t left_cardinality = estimateCardinality(physical_plan->left());
@@ -216,6 +233,18 @@ std::size_t StarSchemaSimpleCostModel::estimateNumDistinctValues(
}
break;
}
+ case P::PhysicalType::kFilterJoin: {
+ const P::FilterJoinPtr &filter_join =
+ std::static_pointer_cast<const P::FilterJoin>(physical_plan);
+ if (E::ContainsExprId(filter_join->left()->getOutputAttributes(), attribute_id)) {
+ std::size_t left_child_num_distinct_values =
+ estimateNumDistinctValues(attribute_id, filter_join->left());
+ double right_child_selectivity =
+ estimateSelectivity(filter_join->right());
+ return static_cast<std::size_t>(
+ left_child_num_distinct_values * right_child_selectivity + 0.5);
+ }
+ }
case P::PhysicalType::kHashJoin: {
const P::HashJoinPtr &hash_join =
std::static_pointer_cast<const P::HashJoin>(physical_plan);
@@ -254,6 +283,16 @@ double StarSchemaSimpleCostModel::estimateSelectivity(
double child_selectivity = estimateSelectivity(selection->input());
return filter_selectivity * child_selectivity;
}
+ case P::PhysicalType::kFilterJoin: {
+ const P::FilterJoinPtr &filter_join =
+ std::static_pointer_cast<const P::FilterJoin>(physical_plan);
+ double left_selectivity = estimateSelectivity(filter_join->left());
+ double right_selectivity = estimateSelectivity(filter_join->right());
+ double build_side_filter_selectivity =
+ estimateSelectivityForPredicate(filter_join->build_side_filter_predicate(),
+ filter_join->right());
+ return left_selectivity * right_selectivity * build_side_filter_selectivity;
+ }
case P::PhysicalType::kHashJoin: {
const P::HashJoinPtr &hash_join =
std::static_pointer_cast<const P::HashJoin>(physical_plan);
@@ -383,18 +422,124 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
std::size_t StarSchemaSimpleCostModel::getNumDistinctValues(
const E::ExprId attribute_id,
const P::TableReferencePtr &table_reference) {
- const CatalogRelation &relation = *table_reference->relation();
- const std::vector<E::AttributeReferencePtr> &attributes = table_reference->attribute_list();
- for (std::size_t i = 0; i < attributes.size(); ++i) {
- if (attributes[i]->id() == attribute_id) {
- const CatalogRelationStatistics &stat = relation.getStatistics();
- if (stat.hasNumDistinctValues(i)) {
- return stat.getNumDistinctValues(i);
+ const auto rel_attr_id =
+ findCatalogRelationAttributeId(table_reference, attribute_id);
+ if (rel_attr_id != kInvalidAttributeID) {
+ const CatalogRelationStatistics &stat =
+ table_reference->relation()->getStatistics();
+ if (stat.hasNumDistinctValues(rel_attr_id)) {
+ return stat.getNumDistinctValues(rel_attr_id);
+ }
+ }
+ return estimateCardinalityForTableReference(table_reference);
+}
+
+bool StarSchemaSimpleCostModel::impliesUniqueAttributes(
+ const P::PhysicalPtr &physical_plan,
+ const std::vector<E::AttributeReferencePtr> &attributes) {
+ switch (physical_plan->getPhysicalType()) {
+ case P::PhysicalType::kAggregate: {
+ const P::AggregatePtr &aggregate =
+ std::static_pointer_cast<const P::Aggregate>(physical_plan);
+ return E::SubsetOfExpressions(aggregate->grouping_expressions(), attributes);
+ }
+ case P::PhysicalType::kHashJoin: {
+ const P::HashJoinPtr &hash_join =
+ std::static_pointer_cast<const P::HashJoin>(physical_plan);
+ bool unique_from_left =
+ impliesUniqueAttributes(hash_join->right(), hash_join->right_join_attributes())
+ && impliesUniqueAttributes(hash_join->left(), attributes);
+ bool unique_from_right =
+ impliesUniqueAttributes(hash_join->left(), hash_join->left_join_attributes())
+ && impliesUniqueAttributes(hash_join->right(), attributes);
+ return unique_from_left || unique_from_right;
+ }
+ case P::PhysicalType::kTableReference: {
+ const P::TableReferencePtr &table_reference =
+ std::static_pointer_cast<const P::TableReference>(physical_plan);
+ const CatalogRelationStatistics &stat =
+ table_reference->relation()->getStatistics();
+ if (stat.hasNumTuples()) {
+ const std::size_t num_tuples = stat.getNumTuples();
+ for (const auto &attr : attributes) {
+ const attribute_id rel_attr_id =
+ findCatalogRelationAttributeId(table_reference, attr->id());
+ if (rel_attr_id != kInvalidAttributeID &&
+ stat.hasNumDistinctValues(rel_attr_id) &&
+ stat.getNumDistinctValues(rel_attr_id) == num_tuples) {
+ return true;
+ }
+ }
}
+ return false;
+ }
+ case P::PhysicalType::kSample: // Fall through
+ case P::PhysicalType::kSelection:
+ case P::PhysicalType::kSort: {
+ DCHECK_EQ(physical_plan->getNumChildren(), 1u);
+ return impliesUniqueAttributes(physical_plan->children()[0], attributes);
+ }
+ default:
break;
+ }
+ return false;
+}
+
+TypedValue StarSchemaSimpleCostModel::findCatalogRelationStat(
+ const P::PhysicalPtr &physical_plan,
+ const E::ExprId attr_id,
+ const StatType stat_type,
+ bool *is_exact_stat) {
+ P::TableReferencePtr table_reference;
+ if (P::SomeTableReference::MatchesWithConditionalCast(physical_plan, &table_reference)) {
+ const attribute_id rel_attr_id =
+ findCatalogRelationAttributeId(table_reference, attr_id);
+ if (rel_attr_id != kInvalidAttributeID) {
+ const CatalogRelationStatistics &stat =
+ table_reference->relation()->getStatistics();
+
+ if (is_exact_stat != nullptr) {
+ *is_exact_stat = stat.isExact();
+ }
+
+ switch (stat_type) {
+ case StatType::kMin: {
+ if (stat.hasMinValue(rel_attr_id)) {
+ return stat.getMinValue(rel_attr_id);
+ }
+ break;
+ }
+ case StatType::kMax: {
+ if (stat.hasMaxValue(rel_attr_id)) {
+ return stat.getMaxValue(rel_attr_id);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return NullType::InstanceNullable().makeNullValue();
}
}
- return estimateCardinalityForTableReference(table_reference);
+
+ for (const auto &child : physical_plan->children()) {
+ if (E::ContainsExprId(child->getOutputAttributes(), attr_id)) {
+ return findCatalogRelationStat(child, attr_id, stat_type, is_exact_stat);
+ }
+ }
+ return NullType::InstanceNullable().makeNullValue();
+}
+
+attribute_id StarSchemaSimpleCostModel::findCatalogRelationAttributeId(
+ const physical::TableReferencePtr &table_reference,
+ const expressions::ExprId expr_id) {
+ const auto &attribute_list = table_reference->attribute_list();
+ for (std::size_t i = 0; i < attribute_list.size(); ++i) {
+ if (attribute_list[i]->id() == expr_id) {
+ return i;
+ }
+ }
+ return kInvalidAttributeID;
}
} // namespace cost
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
index 6f6aa29..cbe18f4 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
@@ -23,11 +23,14 @@
#include <cstddef>
#include <vector>
+#include "catalog/CatalogTypedefs.hpp"
#include "query_optimizer/cost_model/CostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/expressions/Predicate.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
#include "query_optimizer/physical/NestedLoopsJoin.hpp"
+#include "query_optimizer/physical/FilterJoin.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/Selection.hpp"
@@ -36,6 +39,7 @@
#include "query_optimizer/physical/TableReference.hpp"
#include "query_optimizer/physical/TopLevelPlan.hpp"
#include "query_optimizer/physical/WindowAggregate.hpp"
+#include "types/TypedValue.hpp"
#include "utility/Macros.hpp"
namespace quickstep {
@@ -105,10 +109,70 @@ class StarSchemaSimpleCostModel : public CostModel {
double estimateSelectivityForFilterPredicate(
const physical::PhysicalPtr &physical_plan);
+ /**
+ * @brief Check whether a set of attributes are unique (i.e. have distinct
+ * values) for a relation.
+ *
+ * @param physical_plan The physical plan that corresponds to a relation.
+ * @param attributes The set of attributes to be checked. Note that each
+ * attribute in this set must be an output attribute of the physical
+ * plan.
+ * @return True if it is guaranteed that the attributes are unique; false
+ * otherwise.
+ */
+ bool impliesUniqueAttributes(
+ const physical::PhysicalPtr &physical_plan,
+ const std::vector<expressions::AttributeReferencePtr> &attributes);
+
+ /**
+ * @brief For a physical plan attribute, find its correponding catalog attribute's
+ * MIN statistic. Returns Null value if there is no corresponding catalog
+ * attribute for the physical plan attribute.
+ *
+ * @param physical_plan The physical plan.
+ * @param attribute The attribute. Must be an output attribute of the given
+ * physical plan.
+ * @param is_exact_stat If this pointer is not null, its pointed content will
+ * be modified by this method to indicate whether the returned statistic
+ * is EXACT for the stored relation (i.e. not outdated or estimated).
+ * @return The MIN statistic for the attribute.
+ */
+ TypedValue findMinValueStat(
+ const physical::PhysicalPtr &physical_plan,
+ const expressions::AttributeReferencePtr &attribute,
+ bool *is_exact_stat = nullptr) {
+ return findCatalogRelationStat(
+ physical_plan, attribute->id(), StatType::kMin, is_exact_stat);
+ }
+
+ /**
+ * @brief For a physical plan attribute, find its correponding catalog attribute's
+ * MAX statistic. Returns Null value if there is no corresponding catalog
+ * attribute for the physical plan attribute.
+ *
+ * @param physical_plan The physical plan.
+ * @param attribute The attribute. Must be an output attribute of the given
+ * physical plan.
+ * @param is_exact_stat If this pointer is not null, its pointed content will
+ * be modified by this method to indicate whether the returned statistic
+ * is EXACT for the stored relation (i.e. not not outdated or estimated).
+ * @return The MAX statistic for the attribute.
+ */
+ TypedValue findMaxValueStat(
+ const physical::PhysicalPtr &physical_plan,
+ const expressions::AttributeReferencePtr &attribute,
+ bool *is_exact_stat = nullptr) {
+ return findCatalogRelationStat(
+ physical_plan, attribute->id(), StatType::kMax, is_exact_stat);
+ }
+
private:
std::size_t estimateCardinalityForAggregate(
const physical::AggregatePtr &physical_plan);
+ std::size_t estimateCardinalityForFilterJoin(
+ const physical::FilterJoinPtr &physical_plan);
+
std::size_t estimateCardinalityForHashJoin(
const physical::HashJoinPtr &physical_plan);
@@ -144,6 +208,25 @@ class StarSchemaSimpleCostModel : public CostModel {
std::size_t getNumDistinctValues(const expressions::ExprId attribute_id,
const physical::TableReferencePtr &table_reference);
+ enum class StatType {
+ kMax = 0,
+ kMin
+ };
+
+ // For a physical plan attribute, find its correponding catalog attribute's
+ // min/max statistics. Returns Null value if there is no corresponding catalog
+ // attribute for the physical plan attribute (e.g. the attribute is the result
+ // of an expression).
+ TypedValue findCatalogRelationStat(
+ const physical::PhysicalPtr &physical_plan,
+ const expressions::ExprId expr_id,
+ const StatType stat_type,
+ bool *is_exact_stat);
+
+ // For a table reference attribute, find its correponding catalog attribute.
+ attribute_id findCatalogRelationAttributeId(
+ const physical::TableReferencePtr &table_reference,
+ const expressions::ExprId expr_id);
DISALLOW_COPY_AND_ASSIGN(StarSchemaSimpleCostModel);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/expressions/ExpressionUtil.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/ExpressionUtil.hpp b/query_optimizer/expressions/ExpressionUtil.hpp
index 422d5ab..6b8666e 100644
--- a/query_optimizer/expressions/ExpressionUtil.hpp
+++ b/query_optimizer/expressions/ExpressionUtil.hpp
@@ -122,12 +122,12 @@ bool ContainsExprId(
* contain the other operand).
* @return True if \p left is a subset of \p right.
*/
-template <class NamedExpressionType>
+template <class LeftNamedExpressionType, class RightNamedExpressionType>
bool SubsetOfExpressions(
- const std::vector<std::shared_ptr<const NamedExpressionType>> &left,
- const std::vector<std::shared_ptr<const NamedExpressionType>> &right) {
+ const std::vector<std::shared_ptr<const LeftNamedExpressionType>> &left,
+ const std::vector<std::shared_ptr<const RightNamedExpressionType>> &right) {
UnorderedNamedExpressionSet supset(right.begin(), right.end());
- for (const std::shared_ptr<const NamedExpressionType> &expr : left) {
+ for (const std::shared_ptr<const LeftNamedExpressionType> &expr : left) {
if (supset.find(expr) == supset.end()) {
return false;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/CMakeLists.txt b/query_optimizer/physical/CMakeLists.txt
index 7f26943..f68ed39 100644
--- a/query_optimizer/physical/CMakeLists.txt
+++ b/query_optimizer/physical/CMakeLists.txt
@@ -23,6 +23,7 @@ add_library(quickstep_queryoptimizer_physical_CreateIndex CreateIndex.cpp Create
add_library(quickstep_queryoptimizer_physical_CreateTable CreateTable.cpp CreateTable.hpp)
add_library(quickstep_queryoptimizer_physical_DeleteTuples DeleteTuples.cpp DeleteTuples.hpp)
add_library(quickstep_queryoptimizer_physical_DropTable DropTable.cpp DropTable.hpp)
+add_library(quickstep_queryoptimizer_physical_FilterJoin FilterJoin.cpp FilterJoin.hpp)
add_library(quickstep_queryoptimizer_physical_HashJoin HashJoin.cpp HashJoin.hpp)
add_library(quickstep_queryoptimizer_physical_InsertSelection InsertSelection.cpp InsertSelection.hpp)
add_library(quickstep_queryoptimizer_physical_InsertTuple InsertTuple.cpp InsertTuple.hpp)
@@ -115,6 +116,18 @@ target_link_libraries(quickstep_queryoptimizer_physical_DropTable
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_physical_PhysicalType
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_physical_FilterJoin
+ glog
+ quickstep_queryoptimizer_OptimizerTree
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_expressions_NamedExpression
+ quickstep_queryoptimizer_expressions_Predicate
+ quickstep_queryoptimizer_physical_BinaryJoin
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_utility_Cast
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_physical_HashJoin
glog
quickstep_queryoptimizer_OptimizerTree
@@ -282,6 +295,7 @@ target_link_libraries(quickstep_queryoptimizer_physical
quickstep_queryoptimizer_physical_CreateTable
quickstep_queryoptimizer_physical_DeleteTuples
quickstep_queryoptimizer_physical_DropTable
+ quickstep_queryoptimizer_physical_FilterJoin
quickstep_queryoptimizer_physical_HashJoin
quickstep_queryoptimizer_physical_InsertSelection
quickstep_queryoptimizer_physical_InsertTuple
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/FilterJoin.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/FilterJoin.cpp b/query_optimizer/physical/FilterJoin.cpp
new file mode 100644
index 0000000..1817a1c
--- /dev/null
+++ b/query_optimizer/physical/FilterJoin.cpp
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/physical/FilterJoin.hpp"
+
+#include <string>
+#include <vector>
+
+#include "query_optimizer/OptimizerTree.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "utility/Cast.hpp"
+
+namespace quickstep {
+namespace optimizer {
+namespace physical {
+
+namespace E = ::quickstep::optimizer::expressions;
+
+std::vector<E::AttributeReferencePtr> FilterJoin::getReferencedAttributes() const {
+ std::vector<E::AttributeReferencePtr> referenced_attributes;
+ for (const auto &project_expression : project_expressions()) {
+ const auto referenced_attributes_in_expression =
+ project_expression->getReferencedAttributes();
+ referenced_attributes.insert(referenced_attributes.end(),
+ referenced_attributes_in_expression.begin(),
+ referenced_attributes_in_expression.end());
+ }
+ referenced_attributes.insert(referenced_attributes.end(),
+ probe_attributes_.begin(),
+ probe_attributes_.end());
+ referenced_attributes.insert(referenced_attributes.end(),
+ build_attributes_.begin(),
+ build_attributes_.end());
+ if (build_side_filter_predicate_ != nullptr) {
+ const auto referenced_attributes_in_predicate =
+ build_side_filter_predicate_->getReferencedAttributes();
+ referenced_attributes.insert(referenced_attributes.end(),
+ referenced_attributes_in_predicate.begin(),
+ referenced_attributes_in_predicate.end());
+ }
+ return referenced_attributes;
+}
+
+bool FilterJoin::maybeCopyWithPrunedExpressions(
+ const expressions::UnorderedNamedExpressionSet &referenced_expressions,
+ PhysicalPtr *output) const {
+ std::vector<E::NamedExpressionPtr> new_project_expressions;
+ const auto ¤t_project_expressions = project_expressions();
+ for (const auto &project_expression : current_project_expressions) {
+ if (referenced_expressions.find(project_expression) != referenced_expressions.end()) {
+ new_project_expressions.emplace_back(project_expression);
+ }
+ }
+ if (new_project_expressions.size() != current_project_expressions.size()) {
+ *output = Create(left(),
+ right(),
+ probe_attributes_,
+ build_attributes_,
+ new_project_expressions,
+ build_side_filter_predicate_,
+ is_anti_join_);
+ return true;
+ }
+ return false;
+}
+
+void FilterJoin::getFieldStringItems(
+ std::vector<std::string> *inline_field_names,
+ std::vector<std::string> *inline_field_values,
+ std::vector<std::string> *non_container_child_field_names,
+ std::vector<OptimizerTreeBaseNodePtr> *non_container_child_fields,
+ std::vector<std::string> *container_child_field_names,
+ std::vector<std::vector<OptimizerTreeBaseNodePtr>> *container_child_fields) const {
+ BinaryJoin::getFieldStringItems(inline_field_names,
+ inline_field_values,
+ non_container_child_field_names,
+ non_container_child_fields,
+ container_child_field_names,
+ container_child_fields);
+
+ inline_field_names->push_back("is_anti_join");
+ inline_field_values->push_back(std::to_string(is_anti_join_));
+
+ if (build_side_filter_predicate_ != nullptr) {
+ non_container_child_field_names->emplace_back("build_side_filter_predicate");
+ non_container_child_fields->emplace_back(build_side_filter_predicate_);
+ }
+
+ container_child_field_names->push_back("probe_attributes");
+ container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(probe_attributes_));
+ container_child_field_names->push_back("build_attributes");
+ container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(build_attributes_));
+}
+
+} // namespace physical
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/FilterJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/FilterJoin.hpp b/query_optimizer/physical/FilterJoin.hpp
new file mode 100644
index 0000000..ad4e18b
--- /dev/null
+++ b/query_optimizer/physical/FilterJoin.hpp
@@ -0,0 +1,187 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_FILTER_JOIN_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_FILTER_JOIN_HPP_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "query_optimizer/OptimizerTree.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/BinaryJoin.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+namespace physical {
+
+/** \addtogroup OptimizerPhysical
+ * @{
+ */
+
+class FilterJoin;
+typedef std::shared_ptr<const FilterJoin> FilterJoinPtr;
+
+/**
+ * @brief Physical filter join node. Semantically, FilterJoin is similar to
+ * HashJoin where the difference is that FilterJoin builds a bit vector
+ * instead of a hash table.
+ *
+ * @note FilterJoin's backend execution relies on LIPFilter injection (attach
+ * the bit vectors as filters into downstream relational operators).
+ */
+class FilterJoin : public BinaryJoin {
+ public:
+ PhysicalType getPhysicalType() const override {
+ return PhysicalType::kFilterJoin;
+ }
+
+ std::string getName() const override {
+ if (is_anti_join_) {
+ return "FilterJoin(Anti)";
+ } else {
+ return "FilterJoin";
+ }
+ }
+
+ /**
+ * @return The probe side attributes.
+ */
+ const std::vector<expressions::AttributeReferencePtr>& probe_attributes() const {
+ return probe_attributes_;
+ }
+
+ /**
+ * @return The build side attributes.
+ */
+ const std::vector<expressions::AttributeReferencePtr>& build_attributes() const {
+ return build_attributes_;
+ }
+
+ /**
+ * @return The build side filter predicate.
+ */
+ const expressions::PredicatePtr& build_side_filter_predicate() const {
+ return build_side_filter_predicate_;
+ }
+
+ /**
+ * @return Whether this is an anti-join.
+ */
+ const bool is_anti_join() const {
+ return is_anti_join_;
+ }
+
+ PhysicalPtr copyWithNewChildren(
+ const std::vector<PhysicalPtr> &new_children) const override {
+ DCHECK_EQ(children().size(), new_children.size());
+ return Create(new_children[0],
+ new_children[1],
+ probe_attributes_,
+ build_attributes_,
+ project_expressions(),
+ build_side_filter_predicate_,
+ is_anti_join_);
+ }
+
+ std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override;
+
+ bool maybeCopyWithPrunedExpressions(
+ const expressions::UnorderedNamedExpressionSet &referenced_expressions,
+ PhysicalPtr *output) const override;
+
+ /**
+ * @brief Creates a physical FilterJoin.
+ * @param probe_child The probe side child plan.
+ * @param build_child The build side child plan.
+ * @param probe_attributes The probe side attributes.
+ * @param build_attributes The build side attributes.
+ * @param project_expressions The project expressions.
+ * @param build_side_filter_predicate Optional filtering predicate to be
+ * applied to the build side child BEFORE join.
+ * @param is_anti_join Whether this is an anti-join.
+ * @return An immutable physical FilterJoin.
+ */
+ static FilterJoinPtr Create(
+ const PhysicalPtr &probe_child,
+ const PhysicalPtr &build_child,
+ const std::vector<expressions::AttributeReferencePtr> &probe_attributes,
+ const std::vector<expressions::AttributeReferencePtr> &build_attributes,
+ const std::vector<expressions::NamedExpressionPtr> &project_expressions,
+ const expressions::PredicatePtr &build_side_filter_predicate,
+ const bool is_anti_join) {
+ return FilterJoinPtr(
+ new FilterJoin(probe_child,
+ build_child,
+ probe_attributes,
+ build_attributes,
+ project_expressions,
+ build_side_filter_predicate,
+ is_anti_join));
+ }
+
+ protected:
+ void getFieldStringItems(
+ std::vector<std::string> *inline_field_names,
+ std::vector<std::string> *inline_field_values,
+ std::vector<std::string> *non_container_child_field_names,
+ std::vector<OptimizerTreeBaseNodePtr> *non_container_child_fields,
+ std::vector<std::string> *container_child_field_names,
+ std::vector<std::vector<OptimizerTreeBaseNodePtr>> *container_child_fields) const override;
+
+ private:
+ FilterJoin(
+ const PhysicalPtr &probe_child,
+ const PhysicalPtr &build_child,
+ const std::vector<expressions::AttributeReferencePtr> &probe_attributes,
+ const std::vector<expressions::AttributeReferencePtr> &build_attributes,
+ const std::vector<expressions::NamedExpressionPtr> &project_expressions,
+ const expressions::PredicatePtr &build_side_filter_predicate,
+ const bool is_anti_join)
+ : BinaryJoin(probe_child, build_child, project_expressions),
+ probe_attributes_(probe_attributes),
+ build_attributes_(build_attributes),
+ build_side_filter_predicate_(build_side_filter_predicate),
+ is_anti_join_(is_anti_join) {
+ }
+
+ std::vector<expressions::AttributeReferencePtr> probe_attributes_;
+ std::vector<expressions::AttributeReferencePtr> build_attributes_;
+ expressions::PredicatePtr build_side_filter_predicate_;
+ bool is_anti_join_;
+
+ DISALLOW_COPY_AND_ASSIGN(FilterJoin);
+};
+
+/** @} */
+
+} // namespace physical
+} // namespace optimizer
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_FILTER_JOIN_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/LIPFilterConfiguration.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/LIPFilterConfiguration.hpp b/query_optimizer/physical/LIPFilterConfiguration.hpp
index 62a6149..90c81fe 100644
--- a/query_optimizer/physical/LIPFilterConfiguration.hpp
+++ b/query_optimizer/physical/LIPFilterConfiguration.hpp
@@ -21,6 +21,7 @@
#define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_LIP_FILTER_CONFIGURATION_HPP_
#include <cstddef>
+#include <cstdint>
#include <map>
#include <memory>
#include <vector>
@@ -40,50 +41,211 @@ namespace physical {
class Physical;
typedef std::shared_ptr<const Physical> PhysicalPtr;
+class LIPFilterBuildInfo;
+typedef std::shared_ptr<const LIPFilterBuildInfo> LIPFilterBuildInfoPtr;
+
+class LIPFilterProbeInfo;
+typedef std::shared_ptr<const LIPFilterProbeInfo> LIPFilterProbeInfoPtr;
+
/**
* @brief Optimizer information for a LIP filter builder.
*/
-struct LIPFilterBuildInfo {
+class LIPFilterBuildInfo {
+ public:
+ /**
+ * @return The LIPFilter's type.
+ */
+ LIPFilterType filter_type() const {
+ return filter_type_;
+ }
+
+ /**
+ * @return The LIPFilter's build attribute.
+ */
+ const expressions::AttributeReferencePtr& build_attribute() const {
+ return build_attribute_;
+ }
+
+ protected:
/**
* @brief Constructor.
*
- * @param build_attribute_in The attribute to build the LIP filter with.
- * @param filter_cardinality_in The LIP filter's cardinality.
* @param filter_type_in The LIP filter's type.
+ * @param build_attribute_in The attribute to build the LIP filter with.
+ */
+ LIPFilterBuildInfo(const LIPFilterType &filter_type,
+ const expressions::AttributeReferencePtr &build_attribute)
+ : filter_type_(filter_type),
+ build_attribute_(build_attribute) {}
+
+ private:
+ const LIPFilterType filter_type_;
+ const expressions::AttributeReferencePtr build_attribute_;
+
+ DISALLOW_COPY_AND_ASSIGN(LIPFilterBuildInfo);
+};
+
+/**
+ * @brief Subclass that contains extra information for SingleIdentityHashFilter
+ * builder.
+ */
+class SingleIdentityHashFilterBuildInfo : public LIPFilterBuildInfo {
+ public:
+ /**
+ * @return The cardinality of this SingleIdentityHashFilter.
*/
- LIPFilterBuildInfo(const expressions::AttributeReferencePtr &build_attribute_in,
- const std::size_t filter_cardinality_in,
- const LIPFilterType &filter_type_in)
- : build_attribute(build_attribute_in),
- filter_cardinality(filter_cardinality_in),
- filter_type(filter_type_in) {
+ std::size_t filter_cardinality() const {
+ return filter_cardinality_;
}
- const expressions::AttributeReferencePtr build_attribute;
- const std::size_t filter_cardinality;
- const LIPFilterType filter_type;
+
+ /**
+ * @brief Creates a shared SingleIdentityHashFilterBuildInfo.
+ *
+ * @param build_attribute The attribute to build the filter with.
+ * @param filter_cardinality The cardinality of this SingleIdentityHashFilter.
+ */
+ static LIPFilterBuildInfoPtr Create(
+ const expressions::AttributeReferencePtr &build_attribute,
+ const std::size_t filter_cardinality) {
+ return LIPFilterBuildInfoPtr(
+ new SingleIdentityHashFilterBuildInfo(build_attribute,
+ filter_cardinality));
+ }
+
+ private:
+ SingleIdentityHashFilterBuildInfo(const expressions::AttributeReferencePtr &build_attribute,
+ const std::size_t filter_cardinality)
+ : LIPFilterBuildInfo(LIPFilterType::kSingleIdentityHashFilter,
+ build_attribute),
+ filter_cardinality_(filter_cardinality) {}
+
+ const std::size_t filter_cardinality_;
+
+ DISALLOW_COPY_AND_ASSIGN(SingleIdentityHashFilterBuildInfo);
};
/**
+ * @brief Subclass that contains extra information for BitVectorExactFilter
+ * builder.
+ */
+class BitVectorExactFilterBuildInfo : public LIPFilterBuildInfo {
+ public:
+ /**
+ * @return The minimum possible value for this BitVectorExactFilter.
+ */
+ std::int64_t min_value() const {
+ return min_value_;
+ }
+
+ /**
+ * @return The maximum possible value for this BitVectorExactFilter.
+ */
+ std::int64_t max_value() const {
+ return max_value_;
+ }
+
+ /**
+ * @return Whether this is an anti-filter.
+ */
+ bool is_anti_filter() const {
+ return is_anti_filter_;
+ }
+
+ /**
+ * @brief Creates a shared BitVectorExactFilterBuildInfo.
+ *
+ * @param build_attribute The attribute to build the filter with.
+ * @param min_value The minimum possible value for this BitVectorExactFilter
+ * to set.
+ * @param max_value The maximum possible value for this BitVectorExactFilter
+ * to set.
+ * @param is_anti_filter Whether this is an anti-filter.
+ */
+ static LIPFilterBuildInfoPtr Create(
+ const expressions::AttributeReferencePtr &build_attribute,
+ const std::int64_t min_value,
+ const std::int64_t max_value,
+ const bool is_anti_filter) {
+ return LIPFilterBuildInfoPtr(
+ new BitVectorExactFilterBuildInfo(build_attribute,
+ min_value,
+ max_value,
+ is_anti_filter));
+ }
+
+ private:
+ BitVectorExactFilterBuildInfo(const expressions::AttributeReferencePtr &build_attribute,
+ const std::int64_t min_value,
+ const std::int64_t max_value,
+ const bool is_anti_filter)
+ : LIPFilterBuildInfo(LIPFilterType::kBitVectorExactFilter,
+ build_attribute),
+ min_value_(min_value),
+ max_value_(max_value),
+ is_anti_filter_(is_anti_filter) {}
+
+ const std::int64_t min_value_;
+ const std::int64_t max_value_;
+ const bool is_anti_filter_;
+
+ DISALLOW_COPY_AND_ASSIGN(BitVectorExactFilterBuildInfo);
+};
+
+
+/**
* @brief Optimizer information for a LIP filter prober.
*/
-struct LIPFilterProbeInfo {
+class LIPFilterProbeInfo {
+ public:
/**
- * @brief Constructor.
+ * @return The attribute to probe the LIP Filter with.
+ */
+ const expressions::AttributeReferencePtr& probe_attribute() const {
+ return probe_attribute_;
+ }
+
+ /**
+ * @return The attribute that the LIP filter is built with.
+ */
+ const expressions::AttributeReferencePtr& build_attribute() const {
+ return build_attribute_;
+ }
+
+ /**
+ * @return The physical node that the LIP filter's builder is attached to.
+ */
+ const PhysicalPtr& builder() const {
+ return builder_;
+ }
+
+ /**
+ * @brief Creates a shared LIPFilterProbeInfo.
*
- * @param probe_attribute_in The attribute to probe the LIP filter with.
- * @param build_attribute_in The attribute that the LIP filter is built with.
- * @param builder_in The physical node that the LIP filter's builder is attached to.
- */
- LIPFilterProbeInfo(const expressions::AttributeReferencePtr &probe_attribute_in,
- const expressions::AttributeReferencePtr &build_attribute_in,
- const PhysicalPtr &builder_in)
- : probe_attribute(probe_attribute_in),
- build_attribute(build_attribute_in),
- builder(builder_in) {
- }
- const expressions::AttributeReferencePtr probe_attribute;
- const expressions::AttributeReferencePtr build_attribute;
- const PhysicalPtr builder;
+ * @param probe_attribute The attribute to probe the LIP filter with.
+ * @param build_attribute The attribute that the LIP filter is built with.
+ * @param builder The physical node that the LIP filter's builder is attached to.
+ */
+ static LIPFilterProbeInfoPtr Create(
+ const expressions::AttributeReferencePtr &probe_attribute,
+ const expressions::AttributeReferencePtr &build_attribute,
+ const PhysicalPtr &builder) {
+ return LIPFilterProbeInfoPtr(
+ new LIPFilterProbeInfo(probe_attribute, build_attribute, builder));
+ }
+
+ private:
+ LIPFilterProbeInfo(const expressions::AttributeReferencePtr &probe_attribute,
+ const expressions::AttributeReferencePtr &build_attribute,
+ const PhysicalPtr &builder)
+ : probe_attribute_(probe_attribute),
+ build_attribute_(build_attribute),
+ builder_(builder) {}
+
+ const expressions::AttributeReferencePtr probe_attribute_;
+ const expressions::AttributeReferencePtr build_attribute_;
+ const PhysicalPtr builder_;
+
+ DISALLOW_COPY_AND_ASSIGN(LIPFilterProbeInfo);
};
@@ -104,33 +266,23 @@ class LIPFilterConfiguration {
/**
* @brief Add information for a LIP filter builder.
*
- * @param build_attribute The attribute to build the LIP filter with.
+ * @param build_info A shared_ptr to LIPFilterBuildInfo.
* @param builder The physical node to attach the LIP filter builder to.
- * @param filter_size The LIP filter's cardinality.
- * @param filter_type The LIP filter's type.
*/
- void addBuildInfo(const expressions::AttributeReferencePtr &build_attribute,
- const PhysicalPtr &builder,
- const std::size_t filter_size,
- const LIPFilterType &filter_type) {
- build_info_map_[builder].emplace_back(
- build_attribute, filter_size, filter_type);
+ void addBuildInfo(const LIPFilterBuildInfoPtr &build_info,
+ const PhysicalPtr &builder) {
+ build_info_map_[builder].emplace_back(build_info);
}
/**
* @brief Add information for a LIP filter prober.
*
- * @param probe_attribute The attribute to probe the LIP filter with.
+ * @param probe_info A shared_ptr to LIPFilterProbeInfo.
* @param prober The physical node to attach the LIP filter prober to.
- * @param build_attribute The attribute that the LIP filter is built with.
- * @param builder The physical node that the LIP filter's builder is attached to.
*/
- void addProbeInfo(const expressions::AttributeReferencePtr &probe_attribute,
- const PhysicalPtr &prober,
- const expressions::AttributeReferencePtr &build_attribute,
- const PhysicalPtr &builder) {
- probe_info_map_[prober].emplace_back(
- probe_attribute, build_attribute, builder);
+ void addProbeInfo(const LIPFilterProbeInfoPtr &probe_info,
+ const PhysicalPtr &prober) {
+ probe_info_map_[prober].emplace_back(probe_info);
}
/**
@@ -140,7 +292,7 @@ class LIPFilterConfiguration {
* a vector of all the LIP filter builders that are attached to the
* physical node.
*/
- const std::map<PhysicalPtr, std::vector<LIPFilterBuildInfo>>& getBuildInfoMap() const {
+ const std::map<PhysicalPtr, std::vector<LIPFilterBuildInfoPtr>>& getBuildInfoMap() const {
return build_info_map_;
}
@@ -151,13 +303,26 @@ class LIPFilterConfiguration {
* a vector of all the LIP filter probers that are attached to the
* physical node.
*/
- const std::map<PhysicalPtr, std::vector<LIPFilterProbeInfo>>& getProbeInfoMap() const {
+ const std::map<PhysicalPtr, std::vector<LIPFilterProbeInfoPtr>>& getProbeInfoMap() const {
return probe_info_map_;
}
+ /**
+ * @brief Clone a copy of this configuration.
+ *
+ * @return A copy of this confiugration. Caller should take ownership of the
+ * returned object.
+ */
+ LIPFilterConfiguration* clone() const {
+ LIPFilterConfiguration *new_conf = new LIPFilterConfiguration();
+ new_conf->build_info_map_ = build_info_map_;
+ new_conf->probe_info_map_ = probe_info_map_;
+ return new_conf;
+ }
+
private:
- std::map<PhysicalPtr, std::vector<LIPFilterBuildInfo>> build_info_map_;
- std::map<PhysicalPtr, std::vector<LIPFilterProbeInfo>> probe_info_map_;
+ std::map<PhysicalPtr, std::vector<LIPFilterBuildInfoPtr>> build_info_map_;
+ std::map<PhysicalPtr, std::vector<LIPFilterProbeInfoPtr>> probe_info_map_;
DISALLOW_COPY_AND_ASSIGN(LIPFilterConfiguration);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/PatternMatcher.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/PatternMatcher.hpp b/query_optimizer/physical/PatternMatcher.hpp
index 5cd6fd3..4336767 100644
--- a/query_optimizer/physical/PatternMatcher.hpp
+++ b/query_optimizer/physical/PatternMatcher.hpp
@@ -35,6 +35,7 @@ class CopyFrom;
class CreateTable;
class DeleteTuples;
class DropTable;
+class FilterJoin;
class HashJoin;
class InsertTuple;
class Join;
@@ -113,6 +114,7 @@ using SomeCopyFrom = SomePhysicalNode<CopyFrom, PhysicalType::kCopyFrom>;
using SomeCreateTable = SomePhysicalNode<CreateTable, PhysicalType::kCreateTable>;
using SomeDeleteTuples = SomePhysicalNode<DeleteTuples, PhysicalType::kDeleteTuples>;
using SomeDropTable = SomePhysicalNode<DropTable, PhysicalType::kDropTable>;
+using SomeFilterJoin = SomePhysicalNode<FilterJoin, PhysicalType::kFilterJoin>;
using SomeHashJoin = SomePhysicalNode<HashJoin, PhysicalType::kHashJoin>;
using SomeInsertTuple = SomePhysicalNode<InsertTuple, PhysicalType::kInsertTuple>;
using SomeJoin = SomePhysicalNode<Join, PhysicalType::kHashJoin, PhysicalType::kNestedLoopsJoin>;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/PhysicalType.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/PhysicalType.hpp b/query_optimizer/physical/PhysicalType.hpp
index f5f35a1..1da5929 100644
--- a/query_optimizer/physical/PhysicalType.hpp
+++ b/query_optimizer/physical/PhysicalType.hpp
@@ -38,6 +38,7 @@ enum class PhysicalType {
kCreateTable,
kDeleteTuples,
kDropTable,
+ kFilterJoin,
kHashJoin,
kInsertSelection,
kInsertTuple,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/physical/TopLevelPlan.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/TopLevelPlan.hpp b/query_optimizer/physical/TopLevelPlan.hpp
index 7dfc2b6..9e567e1 100644
--- a/query_optimizer/physical/TopLevelPlan.hpp
+++ b/query_optimizer/physical/TopLevelPlan.hpp
@@ -126,7 +126,8 @@ class TopLevelPlan : public Physical {
}
return TopLevelPlan::Create(new_children[0],
new_shared_subplans,
- uncorrelated_subquery_map_);
+ uncorrelated_subquery_map_,
+ lip_filter_configuration_);
}
std::vector<expressions::AttributeReferencePtr> getOutputAttributes() const override {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/rules/AttachLIPFilters.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachLIPFilters.cpp b/query_optimizer/rules/AttachLIPFilters.cpp
index b3c57ab..48b68bc 100644
--- a/query_optimizer/rules/AttachLIPFilters.cpp
+++ b/query_optimizer/rules/AttachLIPFilters.cpp
@@ -55,7 +55,14 @@ P::PhysicalPtr AttachLIPFilters::apply(const P::PhysicalPtr &input) {
cost_model_.reset(
new cost::StarSchemaSimpleCostModel(
top_level_plan->shared_subplans()));
- lip_filter_configuration_.reset(new P::LIPFilterConfiguration());
+
+ const P::LIPFilterConfigurationPtr &existing_configuration =
+ top_level_plan->lip_filter_configuration();
+ if (existing_configuration != nullptr) {
+ lip_filter_configuration_.reset(existing_configuration->clone());
+ } else {
+ lip_filter_configuration_.reset(new P::LIPFilterConfiguration());
+ }
std::set<E::ExprId> already_filtered_attributes;
attachLIPFilters(NodeList(input), &already_filtered_attributes);
@@ -101,7 +108,7 @@ void AttachLIPFilters::attachLIPFilters(
}
if (probe_child != nullptr &&
- cost_model_->estimateCardinality(probe_child) > 10000000) {
+ cost_model_->estimateCardinality(probe_child) >= 100000) {
const auto &candidate_lip_filters = getProbeSideInfo(path.cons(probe_child));
if (!candidate_lip_filters.empty()) {
std::map<E::AttributeReferencePtr, LIPFilterInfoPtr> selected_filters;
@@ -119,15 +126,16 @@ void AttachLIPFilters::attachLIPFilters(
if (already_filtered_attributes->find(source_attr_id)
== already_filtered_attributes->end()) {
lip_filter_configuration_->addBuildInfo(
- pair.second->source_attribute,
- pair.second->source,
- pair.second->estimated_cardinality * 8,
- LIPFilterType::kSingleIdentityHashFilter);
- lip_filter_configuration_->addProbeInfo(
- pair.first,
- node,
- pair.second->source_attribute,
+ P::SingleIdentityHashFilterBuildInfo::Create(
+ pair.second->source_attribute,
+ pair.second->estimated_cardinality * 8),
pair.second->source);
+ lip_filter_configuration_->addProbeInfo(
+ P::LIPFilterProbeInfo::Create(
+ pair.first,
+ pair.second->source_attribute,
+ pair.second->source),
+ node);
already_filtered_attributes->emplace(source_attr_id);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4ba819c5/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 86d1ef7..223c78c 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -22,6 +22,7 @@ add_library(quickstep_queryoptimizer_rules_AttachLIPFilters AttachLIPFilters.cpp
add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp)
add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp)
add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
+add_library(quickstep_queryoptimizer_rules_InjectJoinFilters InjectJoinFilters.cpp InjectJoinFilters.hpp)
add_library(quickstep_queryoptimizer_rules_PruneColumns PruneColumns.cpp PruneColumns.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownFilter PushDownFilter.cpp PushDownFilter.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
@@ -196,6 +197,26 @@ target_link_libraries(quickstep_queryoptimizer_rules_SwapProbeBuild
target_link_libraries(quickstep_queryoptimizer_rules_TopDownRule
quickstep_queryoptimizer_rules_Rule
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_InjectJoinFilters
+ quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_expressions_Predicate
+ quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_FilterJoin
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_LIPFilterConfiguration
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_Selection
+ quickstep_queryoptimizer_physical_TopLevelPlan
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_queryoptimizer_rules_PruneColumns
+ quickstep_types_TypeID
+ quickstep_types_TypedValue
+ quickstep_utility_Macros
+ quickstep_utility_lipfilter_LIPFilter)
target_link_libraries(quickstep_queryoptimizer_rules_UnnestSubqueries
quickstep_queryoptimizer_OptimizerContext
quickstep_queryoptimizer_expressions_AttributeReference
@@ -246,6 +267,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
quickstep_queryoptimizer_rules_BottomUpRule
quickstep_queryoptimizer_rules_CollapseProject
quickstep_queryoptimizer_rules_GenerateJoins
+ quickstep_queryoptimizer_rules_InjectJoinFilters
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_PushDownFilter
quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate