You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2018/04/26 06:15:49 UTC
incubator-quickstep git commit: Added the an optimization rule
Repository: incubator-quickstep
Updated Branches:
refs/heads/master 612e103cb -> cc9ab901f
Added the an optimization rule
that eliminates a Physical node with an empty TableReference into
a Selection w/ an temp TableReference.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/cc9ab901
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/cc9ab901
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/cc9ab901
Branch: refs/heads/master
Commit: cc9ab901f92880f4dd33c46797381e081241c281
Parents: 612e103
Author: Zuyu Zhang <zu...@cs.wisc.edu>
Authored: Wed Apr 18 17:15:34 2018 -0500
Committer: Zuyu Zhang <zu...@cs.wisc.edu>
Committed: Thu Apr 26 00:24:29 2018 -0500
----------------------------------------------------------------------
cli/tests/CMakeLists.txt | 6 +-
cli/tests/CommandExecutorTest.cpp | 2 -
cli/tests/CommandExecutorTestRunner.cpp | 31 +-
cli/tests/CommandExecutorTestRunner.hpp | 35 +-
cli/tests/command_executor/Analyze.test | 136 +++++++
cli/tests/command_executor/CMakeLists.txt | 6 +
cli/tests/command_executor/D.test | 1 -
cli/tests/command_executor/Dt.test | 1 -
query_optimizer/CMakeLists.txt | 1 +
query_optimizer/ExecutionGenerator.cpp | 18 +
query_optimizer/Optimizer.cpp | 3 +-
query_optimizer/PhysicalGenerator.cpp | 15 +-
query_optimizer/PhysicalGenerator.hpp | 10 +-
query_optimizer/physical/Aggregate.cpp | 20 ++
query_optimizer/physical/Aggregate.hpp | 3 +
query_optimizer/physical/CMakeLists.txt | 2 +
query_optimizer/physical/HashJoin.cpp | 10 +
query_optimizer/physical/HashJoin.hpp | 3 +
query_optimizer/physical/NestedLoopsJoin.cpp | 9 +
query_optimizer/physical/NestedLoopsJoin.hpp | 3 +
query_optimizer/physical/PatternMatcher.hpp | 2 +
query_optimizer/physical/Physical.hpp | 6 +
query_optimizer/physical/Selection.cpp | 7 +
query_optimizer/physical/Selection.hpp | 3 +
query_optimizer/physical/UnionAll.hpp | 4 +
query_optimizer/rules/CMakeLists.txt | 24 ++
query_optimizer/rules/EliminateEmptyNode.cpp | 358 +++++++++++++++++++
query_optimizer/rules/EliminateEmptyNode.hpp | 70 ++++
.../tests/OptimizerTextTestRunner.cpp | 3 +-
query_optimizer/tests/TestDatabaseLoader.cpp | 2 +-
30 files changed, 741 insertions(+), 53 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cli/tests/CMakeLists.txt b/cli/tests/CMakeLists.txt
index f402ac0..a59491e 100644
--- a/cli/tests/CMakeLists.txt
+++ b/cli/tests/CMakeLists.txt
@@ -49,6 +49,7 @@ target_link_libraries(quickstep_cli_tests_CommandExecutorTest
gtest
quickstep_catalog_CatalogDatabase
quickstep_cli_CommandExecutor
+ quickstep_cli_DefaultsConfigurator
quickstep_cli_DropRelation
quickstep_cli_PrintToScreen
quickstep_parser_ParseStatement
@@ -59,10 +60,9 @@ target_link_libraries(quickstep_cli_tests_CommandExecutorTest
quickstep_queryexecution_QueryExecutionUtil
quickstep_queryexecution_Worker
quickstep_queryexecution_WorkerDirectory
- quickstep_queryoptimizer_Optimizer
- quickstep_queryoptimizer_OptimizerContext
quickstep_queryoptimizer_QueryHandle
- quickstep_queryoptimizer_tests_TestDatabaseLoader
+ quickstep_queryoptimizer_QueryProcessor
+ quickstep_storage_StorageConstants
quickstep_utility_Macros
quickstep_utility_MemStream
quickstep_utility_SqlError
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/CommandExecutorTest.cpp
----------------------------------------------------------------------
diff --git a/cli/tests/CommandExecutorTest.cpp b/cli/tests/CommandExecutorTest.cpp
index 6ad2183..a9c64dd 100644
--- a/cli/tests/CommandExecutorTest.cpp
+++ b/cli/tests/CommandExecutorTest.cpp
@@ -45,8 +45,6 @@ int main(int argc, char** argv) {
new quickstep::CommandExecutorTestRunner(argv[3]));
test_driver.reset(
new quickstep::TextBasedTestDriver(&input_file, test_runner.get()));
- test_driver->registerOption(
- quickstep::CommandExecutorTestRunner::kResetOption);
::testing::InitGoogleTest(&argc, argv);
const int success = RUN_ALL_TESTS();
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/CommandExecutorTestRunner.cpp
----------------------------------------------------------------------
diff --git a/cli/tests/CommandExecutorTestRunner.cpp b/cli/tests/CommandExecutorTestRunner.cpp
index e352b24..4ab94d8 100644
--- a/cli/tests/CommandExecutorTestRunner.cpp
+++ b/cli/tests/CommandExecutorTestRunner.cpp
@@ -32,9 +32,8 @@
#include "query_execution/AdmitRequestMessage.hpp"
#include "query_execution/ForemanSingleNode.hpp"
#include "query_execution/QueryExecutionTypedefs.hpp"
-#include "query_optimizer/Optimizer.hpp"
-#include "query_optimizer/OptimizerContext.hpp"
#include "query_optimizer/QueryHandle.hpp"
+#include "query_optimizer/QueryProcessor.hpp"
#include "utility/MemStream.hpp"
#include "utility/SqlError.hpp"
@@ -48,9 +47,6 @@ class CatalogRelation;
namespace O = ::quickstep::optimizer;
-const char CommandExecutorTestRunner::kResetOption[] =
- "reset_before_execution";
-
void CommandExecutorTestRunner::runTestCase(
const std::string &input, const std::set<std::string> &options,
std::string *output) {
@@ -58,12 +54,6 @@ void CommandExecutorTestRunner::runTestCase(
VLOG(4) << "Test SQL(s): " << input;
- if (options.find(kResetOption) != options.end()) {
- test_database_loader_.clear();
- test_database_loader_.createTestRelation(false /* allow_vchar */);
- test_database_loader_.loadTestRelation();
- }
-
MemStream output_stream;
sql_parser_.feedNextBuffer(new std::string(input));
@@ -81,23 +71,18 @@ void CommandExecutorTestRunner::runTestCase(
if (parse_statement.getStatementType() == ParseStatement::kCommand) {
quickstep::cli::executeCommand(
*result.parsed_statement,
- *(test_database_loader_.catalog_database()),
+ *query_processor_->getDefaultDatabase(),
main_thread_client_id_,
foreman_->getBusClientID(),
&bus_,
- test_database_loader_.storage_manager(),
- nullptr,
+ &storage_manager_,
+ query_processor_.get(),
output_stream.file());
} else {
const CatalogRelation *query_result_relation = nullptr;
{
auto query_handle = std::make_unique<QueryHandle>(0 /* query_id */, main_thread_client_id_);
- O::OptimizerContext optimizer_context;
-
- optimizer_.generateQueryHandle(parse_statement,
- test_database_loader_.catalog_database(),
- &optimizer_context,
- query_handle.get());
+ query_processor_->generateQueryHandle(parse_statement, query_handle.get());
query_result_relation = query_handle->getQueryResultRelation();
QueryExecutionUtil::ConstructAndSendAdmitRequestMessage(
@@ -111,11 +96,11 @@ void CommandExecutorTestRunner::runTestCase(
DCHECK_EQ(kWorkloadCompletionMessage, tagged_message.message_type());
if (query_result_relation) {
PrintToScreen::PrintRelation(*query_result_relation,
- test_database_loader_.storage_manager(),
+ &storage_manager_,
output_stream.file());
DropRelation::Drop(*query_result_relation,
- test_database_loader_.catalog_database(),
- test_database_loader_.storage_manager());
+ query_processor_->getDefaultDatabase(),
+ &storage_manager_);
}
}
} catch (const SqlError &error) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/CommandExecutorTestRunner.hpp
----------------------------------------------------------------------
diff --git a/cli/tests/CommandExecutorTestRunner.hpp b/cli/tests/CommandExecutorTestRunner.hpp
index 83c5a9a..fb65498 100644
--- a/cli/tests/CommandExecutorTestRunner.hpp
+++ b/cli/tests/CommandExecutorTestRunner.hpp
@@ -20,12 +20,14 @@
#ifndef QUICKSTEP_CLI_TESTS_COMMAND_EXECUTOR_TEST_RUNNER_HPP_
#define QUICKSTEP_CLI_TESTS_COMMAND_EXECUTOR_TEST_RUNNER_HPP_
+#include <cstdlib>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
+#include "cli/DefaultsConfigurator.hpp"
#include "parser/SqlParserWrapper.hpp"
#include "query_execution/ForemanSingleNode.hpp"
#include "query_execution/QueryExecutionTypedefs.hpp"
@@ -33,14 +35,16 @@
#include "query_execution/Worker.hpp"
#include "query_execution/WorkerDirectory.hpp"
#include "query_execution/WorkerMessage.hpp"
-#include "query_optimizer/Optimizer.hpp"
-#include "query_optimizer/tests/TestDatabaseLoader.hpp"
+#include "query_optimizer/QueryProcessor.hpp"
+#include "storage/StorageConstants.hpp"
#include "utility/Macros.hpp"
#include "utility/textbased_test/TextBasedTestDriver.hpp"
#include "tmb/id_typedefs.h"
#include "tmb/message_bus.h"
+#include "glog/logging.h"
+
namespace quickstep {
/**
@@ -49,18 +53,13 @@ namespace quickstep {
class CommandExecutorTestRunner : public TextBasedTestRunner {
public:
/**
- * @brief If this option is enabled, recreate the entire database and
- * repopulate the data before every test.
- */
- static const char kResetOption[];
-
- /**
* @brief Constructor.
*/
explicit CommandExecutorTestRunner(const std::string &storage_path)
- : test_database_loader_(storage_path) {
- test_database_loader_.createTestRelation(false /* allow_vchar */);
- test_database_loader_.loadTestRelation();
+ : catalog_path_(storage_path + kCatalogFilename),
+ storage_manager_(storage_path) {
+ DefaultsConfigurator::InitializeDefaultDatabase(storage_path, catalog_path_);
+ query_processor_ = std::make_unique<QueryProcessor>(std::string(catalog_path_));
bus_.Initialize();
@@ -84,8 +83,8 @@ class CommandExecutorTestRunner : public TextBasedTestRunner {
new ForemanSingleNode(main_thread_client_id_,
workers_.get(),
&bus_,
- test_database_loader_.catalog_database(),
- test_database_loader_.storage_manager()));
+ query_processor_->getDefaultDatabase(),
+ &storage_manager_));
foreman_->start();
worker_->start();
@@ -95,6 +94,10 @@ class CommandExecutorTestRunner : public TextBasedTestRunner {
QueryExecutionUtil::BroadcastPoisonMessage(main_thread_client_id_, &bus_);
worker_->join();
foreman_->join();
+
+ const std::string command = "rm -f " + catalog_path_;
+ CHECK(!std::system(command.c_str()))
+ << "Failed when attempting to remove catalog proto file: " << catalog_path_;
}
void runTestCase(const std::string &input,
@@ -102,9 +105,11 @@ class CommandExecutorTestRunner : public TextBasedTestRunner {
std::string *output) override;
private:
+ const std::string catalog_path_;
+
SqlParserWrapper sql_parser_;
- optimizer::TestDatabaseLoader test_database_loader_;
- optimizer::Optimizer optimizer_;
+ StorageManager storage_manager_;
+ std::unique_ptr<QueryProcessor> query_processor_;
tmb::client_id main_thread_client_id_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/command_executor/Analyze.test
----------------------------------------------------------------------
diff --git a/cli/tests/command_executor/Analyze.test b/cli/tests/command_executor/Analyze.test
new file mode 100644
index 0000000..57b8312
--- /dev/null
+++ b/cli/tests/command_executor/Analyze.test
@@ -0,0 +1,136 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+CREATE TABLE r (src INT, dst INT);
+CREATE TABLE s (src INT, dst INT);
+CREATE TABLE t (src INT, dst INT);
+
+INSERT INTO s VALUES(0, 0);
+INSERT INTO s VALUES(1, 5);
+
+INSERT INTO t VALUES(0, 0);
+INSERT INTO t VALUES(0, 0);
+--
+==
+
+\analyze
+--
+Analyzing r ... done
+Analyzing s ... done
+Analyzing t ... done
+==
+
+SELECT r.src, r.dst FROM r;
+--
++-----------+-----------+
+|src |dst |
++-----------+-----------+
++-----------+-----------+
+==
+
+# One side of InnerJoin is empty.
+SELECT r.src, r.dst
+FROM r, t
+WHERE r.src = t.src AND r.dst = t.dst;
+--
++-----------+-----------+
+|src |dst |
++-----------+-----------+
++-----------+-----------+
+==
+
+# One side of LeftSemiJoin is empty.
+SELECT s.src, s.dst
+FROM s
+WHERE EXISTS(
+ SELECT r.src, r.dst FROM r WHERE s.src=r.src AND s.dst=r.dst);
+--
++-----------+-----------+
+|src |dst |
++-----------+-----------+
++-----------+-----------+
+==
+
+# One side of LeftAntiJoin is empty.
+SELECT s.src, s.dst
+FROM s
+WHERE NOT EXISTS(
+ SELECT r.src, r.dst FROM r WHERE s.src=r.src AND s.dst=r.dst);
+--
++-----------+-----------+
+|src |dst |
++-----------+-----------+
+| 0| 0|
+| 1| 5|
++-----------+-----------+
+==
+
+# Union between an empty relation and a non-empty.
+SELECT r.src, r.dst FROM r
+UNION
+SELECT t.src, t.dst FROM t;
+--
++-----------+-----------+
+|src |dst |
++-----------+-----------+
+| 0| 0|
++-----------+-----------+
+==
+
+# Union All between an empty relation and a non-empty.
+SELECT r.src, r.dst FROM r
+UNION ALL
+SELECT t.src, t.dst FROM t;
+--
++-----------+-----------+
+|src |dst |
++-----------+-----------+
+| 0| 0|
+| 0| 0|
++-----------+-----------+
+==
+
+# Union on two InnerJoins, one of which involves an empty relation.
+SELECT r.src, r.dst FROM r, s WHERE r.src=s.src AND r.dst=s.dst
+UNION
+SELECT s.src, s.dst FROM s, t WHERE s.src=t.src AND s.dst=t.dst;
+--
++-----------+-----------+
+|src |dst |
++-----------+-----------+
+| 0| 0|
++-----------+-----------+
+==
+
+# Union All on two InnerJoins, one of which involves an empty relation.
+SELECT r.src, r.dst FROM r, s WHERE r.src=s.src AND r.dst=s.dst
+UNION ALL
+SELECT s.src, s.dst FROM s, t WHERE s.src=t.src AND s.dst=t.dst;
+--
++-----------+-----------+
+|src |dst |
++-----------+-----------+
+| 0| 0|
+| 0| 0|
++-----------+-----------+
+==
+
+DROP TABLE r;
+DROP TABLE s;
+DROP TABLE t;
+--
+==
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/command_executor/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cli/tests/command_executor/CMakeLists.txt b/cli/tests/command_executor/CMakeLists.txt
index e62d954..3850de7 100644
--- a/cli/tests/command_executor/CMakeLists.txt
+++ b/cli/tests/command_executor/CMakeLists.txt
@@ -15,6 +15,11 @@
# specific language governing permissions and limitations
# under the License.
+add_test(quickstep_cli_tests_commandexecutor_analyze
+ "../quickstep_cli_tests_CommandExecutorTest"
+ "${CMAKE_CURRENT_SOURCE_DIR}/Analyze.test"
+ "${CMAKE_CURRENT_BINARY_DIR}/Analyze.test"
+ "${CMAKE_CURRENT_BINARY_DIR}/Analyze/")
add_test(quickstep_cli_tests_commandexecutor_d
"../quickstep_cli_tests_CommandExecutorTest"
"${CMAKE_CURRENT_SOURCE_DIR}/D.test"
@@ -41,6 +46,7 @@ endif(ENABLE_DISTRIBUTED)
# Create the folders where the unit tests will store their data blocks for the
# duration of their test.
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Analyze)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/D)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Dt)
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/command_executor/D.test
----------------------------------------------------------------------
diff --git a/cli/tests/command_executor/D.test b/cli/tests/command_executor/D.test
index c3564a6..2d96b47 100644
--- a/cli/tests/command_executor/D.test
+++ b/cli/tests/command_executor/D.test
@@ -44,7 +44,6 @@ PARTITION BY HASH(col1) PARTITIONS 4;
INSERT INTO foo_hash_part
SELECT i, i * 3.0 FROM generate_series(1, 100) AS gs(i);
CREATE TABLE averylongtablenamethatseemstoneverend (col1 INT);
-DROP TABLE TEST;
INSERT INTO averylongtablenamethatseemstoneverend VALUES (1);
INSERT INTO averylongtablenamethatseemstoneverend VALUES (2);
INSERT INTO averylongtablenamethatseemstoneverend VALUES (3);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/command_executor/Dt.test
----------------------------------------------------------------------
diff --git a/cli/tests/command_executor/Dt.test b/cli/tests/command_executor/Dt.test
index 022cae6..f735fd3 100644
--- a/cli/tests/command_executor/Dt.test
+++ b/cli/tests/command_executor/Dt.test
@@ -35,7 +35,6 @@ CREATE TABLE foo4 (col1 INT,
col3 DOUBLE,
col4 FLOAT,
col5 CHAR(5));
-DROP TABLE TEST;
CREATE TABLE averylongtablenamethatseemstoneverend (col1 INT);
INSERT INTO averylongtablenamethatseemstoneverend VALUES (1);
INSERT INTO averylongtablenamethatseemstoneverend VALUES (2);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 9128c63..ab4f6ec 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -226,6 +226,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_rules_AttachLIPFilters
quickstep_queryoptimizer_rules_CollapseSelection
+ quickstep_queryoptimizer_rules_EliminateEmptyNode
quickstep_queryoptimizer_rules_ExtractCommonSubexpression
quickstep_queryoptimizer_rules_FuseAggregateJoin
quickstep_queryoptimizer_rules_FuseHashSelect
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 2cdcffe..8dd93d0 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -25,6 +25,7 @@
#include <functional>
#include <memory>
#include <numeric>
+#include <sstream>
#include <string>
#include <type_traits>
#include <unordered_map>
@@ -347,6 +348,14 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) {
if (it != physical_to_output_relation_map_.end()) {
result_relation = it->second.relation;
}
+
+#ifdef QUICKSTEP_DEBUG
+ std::unordered_set<relation_id> temp_relations;
+ for (const CatalogRelationInfo &temporary_relation_info : temporary_relation_info_vec_) {
+ temp_relations.insert(temporary_relation_info.relation->getID());
+ }
+ DCHECK_EQ(temporary_relation_info_vec_.size(), temp_relations.size());
+#endif
} catch (...) {
// Drop all temporary relations.
dropAllTemporaryRelations();
@@ -784,6 +793,9 @@ void ExecutionGenerator::convertSelection(
execution_plan_->addDirectDependency(select_index,
input_relation_info->producer_operator_index,
false /* is_pipeline_breaker */);
+ } else if (input_relation.isTemporary()) {
+ // NOTE(zuyu): drop the temp relation created by EliminateEmptyNode rule.
+ temporary_relation_info_vec_.emplace_back(select_index, &input_relation);
}
physical_to_output_relation_map_.emplace(
std::piecewise_construct,
@@ -1285,6 +1297,9 @@ void ExecutionGenerator::convertCopyTo(const P::CopyToPtr &physical_plan) {
execution_plan_->addDirectDependency(table_export_operator_index,
input_relation_info->producer_operator_index,
false /* is_pipeline_breaker */);
+ } else if (input_relation->isTemporary()) {
+ // NOTE(zuyu): drop the temp relation created by EliminateEmptyNode rule.
+ temporary_relation_info_vec_.emplace_back(table_export_operator_index, input_relation);
}
}
@@ -1639,6 +1654,9 @@ void ExecutionGenerator::convertInsertSelection(
execution_plan_->addDirectDependency(insert_selection_index,
selection_relation_info->producer_operator_index,
false /* is_pipeline_breaker */);
+ } else if (selection_relation.isTemporary()) {
+ // NOTE(zuyu): drop the temp relation created by EliminateEmptyNode rule.
+ temporary_relation_info_vec_.emplace_back(insert_selection_index, &selection_relation);
}
execution_plan_->addDirectDependency(save_blocks_index,
insert_selection_index,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/Optimizer.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/Optimizer.cpp b/query_optimizer/Optimizer.cpp
index d002ca1..9e6472f 100644
--- a/query_optimizer/Optimizer.cpp
+++ b/query_optimizer/Optimizer.cpp
@@ -37,7 +37,8 @@ void Optimizer::generateQueryHandle(const ParseStatement &parse_statement,
execution_generator.generatePlan(
physical_generator.generatePlan(
- logical_generator.generatePlan(*catalog_database, parse_statement)));
+ logical_generator.generatePlan(*catalog_database, parse_statement),
+ catalog_database));
}
void Optimizer::findReferencedBaseRelations(const ParseStatement &parse_statement,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 0d15a66..c53707c 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -28,6 +28,7 @@
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/rules/AttachLIPFilters.hpp"
#include "query_optimizer/rules/CollapseSelection.hpp"
+#include "query_optimizer/rules/EliminateEmptyNode.hpp"
#include "query_optimizer/rules/ExtractCommonSubexpression.hpp"
#include "query_optimizer/rules/FuseAggregateJoin.hpp"
#include "query_optimizer/rules/FuseHashSelect.hpp"
@@ -64,6 +65,9 @@ DEFINE_bool(reorder_hash_joins, true,
"cardinality and selective tables to be joined first, which is suitable "
"for queries on star-schema tables.");
+DEFINE_bool(use_eliminate_empty_node, true,
+ "If true, apply an optimization to eliminate joins if at least "
+ "one side is empty.");
DEFINE_bool(use_partition_rule, true,
"If true, apply an optimization to support partitioned inputs. The "
"optimization may add additional Selection for repartitioning.");
@@ -105,9 +109,9 @@ void PhysicalGenerator::createStrategies() {
}
P::PhysicalPtr PhysicalGenerator::generatePlan(
- const L::LogicalPtr &logical_plan) {
+ const L::LogicalPtr &logical_plan, CatalogDatabase *catalog_database) {
physical_plan_ = generateInitialPlan(logical_plan);
- return optimizePlan();
+ return optimizePlan(catalog_database);
}
P::PhysicalPtr PhysicalGenerator::generateInitialPlan(
@@ -130,10 +134,15 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan(
return physical_plan;
}
-P::PhysicalPtr PhysicalGenerator::optimizePlan() {
+P::PhysicalPtr PhysicalGenerator::optimizePlan(
+ CatalogDatabase *catalog_database) {
std::vector<std::unique_ptr<Rule<P::Physical>>> rules;
rules.emplace_back(new PruneColumns());
+ if (FLAGS_use_eliminate_empty_node) {
+ rules.emplace_back(new EliminateEmptyNode(catalog_database));
+ }
+
rules.emplace_back(new PushDownLowCostDisjunctivePredicate());
rules.emplace_back(new ReduceGroupByAttributes(optimizer_context_));
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/PhysicalGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.hpp b/query_optimizer/PhysicalGenerator.hpp
index 42fea86..1b6a6a6 100644
--- a/query_optimizer/PhysicalGenerator.hpp
+++ b/query_optimizer/PhysicalGenerator.hpp
@@ -31,6 +31,9 @@
#include "utility/Macros.hpp"
namespace quickstep {
+
+class CatalogDatabase;
+
namespace optimizer {
class OptimizerContext;
@@ -68,9 +71,11 @@ class PhysicalGenerator : public LogicalToPhysicalMapper {
* @brief Generates and optimizes a physical plan for \p logical_plan.
*
* @param logical_plan The logical plan to be converted to a physical plan.
+ * @param catalog_database The catalog database where this query is executed.
* @return The physical plan.
*/
- physical::PhysicalPtr generatePlan(const logical::LogicalPtr &logical_plan);
+ physical::PhysicalPtr generatePlan(const logical::LogicalPtr &logical_plan,
+ CatalogDatabase *catalog_database);
/**
* @brief Sets the best physical plan for \p logical_plan is \p physical_plan.
@@ -106,9 +111,10 @@ class PhysicalGenerator : public LogicalToPhysicalMapper {
/**
* @brief Applies physical rules to the initial physical plan.
*
+ * @param catalog_database The catalog database where this query is executed.
* @return The optimized physical plan.
*/
- physical::PhysicalPtr optimizePlan();
+ physical::PhysicalPtr optimizePlan(CatalogDatabase *catalog_database);
std::vector<std::unique_ptr<strategy::Strategy>> strategies_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/Aggregate.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/Aggregate.cpp b/query_optimizer/physical/Aggregate.cpp
index 94be616..5ae9fc7 100644
--- a/query_optimizer/physical/Aggregate.cpp
+++ b/query_optimizer/physical/Aggregate.cpp
@@ -23,14 +23,18 @@
#include <vector>
#include "query_optimizer/OptimizerTree.hpp"
+#include "query_optimizer/expressions/Alias.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ExpressionUtil.hpp"
#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
#include "query_optimizer/expressions/Predicate.hpp"
#include "query_optimizer/physical/PartitionSchemeHeader.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "utility/Cast.hpp"
+#include "glog/logging.h"
+
namespace quickstep {
namespace optimizer {
namespace physical {
@@ -89,6 +93,22 @@ std::vector<E::AttributeReferencePtr> Aggregate::getReferencedAttributes()
return referenced_attributes;
}
+PhysicalPtr Aggregate::copyWithNewProjectExpressions(
+ const std::vector<E::NamedExpressionPtr> &output_expressions) const {
+ DCHECK_EQ(aggregate_expressions_.size(), output_expressions.size());
+
+ std::vector<E::AliasPtr> new_aggregate_expressions;
+ new_aggregate_expressions.reserve(aggregate_expressions_.size());
+ for (const auto &output_expression : output_expressions) {
+ DCHECK(E::SomeAlias::Matches(output_expression));
+
+ new_aggregate_expressions.push_back(
+ std::static_pointer_cast<const E::Alias>(output_expression));
+ }
+
+ return Create(input_, grouping_expressions_, new_aggregate_expressions, filter_predicate_);
+}
+
void Aggregate::getFieldStringItems(
std::vector<std::string> *inline_field_names,
std::vector<std::string> *inline_field_values,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/Aggregate.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/Aggregate.hpp b/query_optimizer/physical/Aggregate.hpp
index 68f8811..736082c 100644
--- a/query_optimizer/physical/Aggregate.hpp
+++ b/query_optimizer/physical/Aggregate.hpp
@@ -103,6 +103,9 @@ class Aggregate : public Physical {
has_repartition, partition_scheme_header);
}
+ PhysicalPtr copyWithNewProjectExpressions(
+ const std::vector<expressions::NamedExpressionPtr> &output_expressions) const override;
+
bool maybeCopyWithPrunedExpressions(
const expressions::UnorderedNamedExpressionSet &referenced_expressions,
PhysicalPtr *output) const override {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/CMakeLists.txt b/query_optimizer/physical/CMakeLists.txt
index a1a72f7..dff8f99 100644
--- a/query_optimizer/physical/CMakeLists.txt
+++ b/query_optimizer/physical/CMakeLists.txt
@@ -58,6 +58,7 @@ target_link_libraries(quickstep_queryoptimizer_physical_Aggregate
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ExpressionUtil
quickstep_queryoptimizer_expressions_NamedExpression
+ quickstep_queryoptimizer_expressions_PatternMatcher
quickstep_queryoptimizer_expressions_Predicate
quickstep_queryoptimizer_physical_PartitionSchemeHeader
quickstep_queryoptimizer_physical_Physical
@@ -220,6 +221,7 @@ target_link_libraries(quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_OptimizerTree
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_expressions_NamedExpression
quickstep_queryoptimizer_physical_PartitionSchemeHeader
quickstep_queryoptimizer_physical_PhysicalType
quickstep_utility_Macros)
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/HashJoin.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.cpp b/query_optimizer/physical/HashJoin.cpp
index ca6bf9f..7ce80e8 100644
--- a/query_optimizer/physical/HashJoin.cpp
+++ b/query_optimizer/physical/HashJoin.cpp
@@ -34,6 +34,8 @@ namespace quickstep {
namespace optimizer {
namespace physical {
+namespace E = ::quickstep::optimizer::expressions;
+
std::vector<expressions::AttributeReferencePtr> HashJoin::getReferencedAttributes() const {
std::vector<expressions::AttributeReferencePtr> referenced_attributes;
for (const expressions::NamedExpressionPtr &project_expression :
@@ -67,6 +69,14 @@ std::vector<expressions::AttributeReferencePtr> HashJoin::getReferencedAttribute
return referenced_attributes;
}
+PhysicalPtr HashJoin::copyWithNewProjectExpressions(
+ const std::vector<E::NamedExpressionPtr> &output_expressions) const {
+ DCHECK_EQ(project_expressions().size(), output_expressions.size());
+
+ return Create(left(), right(), left_join_attributes_, right_join_attributes_,
+ residual_predicate_, build_predicate_, output_expressions, join_type_);
+}
+
bool HashJoin::maybeCopyWithPrunedExpressions(
const expressions::UnorderedNamedExpressionSet &referenced_expressions,
PhysicalPtr *output) const {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/HashJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.hpp b/query_optimizer/physical/HashJoin.hpp
index 1c341df..1f0df0c 100644
--- a/query_optimizer/physical/HashJoin.hpp
+++ b/query_optimizer/physical/HashJoin.hpp
@@ -141,6 +141,9 @@ class HashJoin : public BinaryJoin {
has_repartition, partition_scheme_header);
}
+ PhysicalPtr copyWithNewProjectExpressions(
+ const std::vector<expressions::NamedExpressionPtr> &output_expressions) const override;
+
bool maybeCopyWithPrunedExpressions(
const expressions::UnorderedNamedExpressionSet &referenced_expressions,
PhysicalPtr *output) const override;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/NestedLoopsJoin.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/NestedLoopsJoin.cpp b/query_optimizer/physical/NestedLoopsJoin.cpp
index ba3d223..59e3b00 100644
--- a/query_optimizer/physical/NestedLoopsJoin.cpp
+++ b/query_optimizer/physical/NestedLoopsJoin.cpp
@@ -31,6 +31,8 @@ namespace quickstep {
namespace optimizer {
namespace physical {
+namespace E = ::quickstep::optimizer::expressions;
+
std::vector<expressions::AttributeReferencePtr> NestedLoopsJoin::getReferencedAttributes() const {
std::vector<expressions::AttributeReferencePtr> referenced_attributes;
for (const expressions::NamedExpressionPtr &project_expression :
@@ -49,6 +51,13 @@ std::vector<expressions::AttributeReferencePtr> NestedLoopsJoin::getReferencedAt
return referenced_attributes;
}
+PhysicalPtr NestedLoopsJoin::copyWithNewProjectExpressions(
+ const std::vector<E::NamedExpressionPtr> &output_expressions) const {
+ DCHECK_EQ(project_expressions().size(), output_expressions.size());
+
+ return Create(left(), right(), join_predicate_, output_expressions);
+}
+
bool NestedLoopsJoin::maybeCopyWithPrunedExpressions(
const expressions::UnorderedNamedExpressionSet &referenced_expressions,
PhysicalPtr *output) const {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/NestedLoopsJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/NestedLoopsJoin.hpp b/query_optimizer/physical/NestedLoopsJoin.hpp
index 0e0260c..87db5c3 100644
--- a/query_optimizer/physical/NestedLoopsJoin.hpp
+++ b/query_optimizer/physical/NestedLoopsJoin.hpp
@@ -81,6 +81,9 @@ class NestedLoopsJoin : public BinaryJoin {
std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override;
+ PhysicalPtr copyWithNewProjectExpressions(
+ const std::vector<expressions::NamedExpressionPtr> &output_expressions) const override;
+
bool maybeCopyWithPrunedExpressions(
const expressions::UnorderedNamedExpressionSet &referenced_expressions,
PhysicalPtr *output) const override;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/PatternMatcher.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/PatternMatcher.hpp b/query_optimizer/physical/PatternMatcher.hpp
index 0204504..8ae7da0 100644
--- a/query_optimizer/physical/PatternMatcher.hpp
+++ b/query_optimizer/physical/PatternMatcher.hpp
@@ -46,6 +46,7 @@ class SharedSubplanReference;
class Sort;
class TableReference;
class TopLevelPlan;
+class UnionAll;
class UpdateTable;
/** \addtogroup OptimizerPhysical
@@ -127,6 +128,7 @@ using SomeSharedSubplanReference = SomePhysicalNode<SharedSubplanReference, Phys
using SomeSort = SomePhysicalNode<Sort, PhysicalType::kSort>;
using SomeTableReference = SomePhysicalNode<TableReference, PhysicalType::kTableReference>;
using SomeTopLevelPlan = SomePhysicalNode<TopLevelPlan, PhysicalType::kTopLevelPlan>;
+using SomeUnionAll = SomePhysicalNode<UnionAll, PhysicalType::kUnionAll>;
using SomeUpdateTable = SomePhysicalNode<UpdateTable, PhysicalType::kUpdateTable>;
/** @} */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/Physical.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/Physical.hpp b/query_optimizer/physical/Physical.hpp
index 4491520..be5d282 100644
--- a/query_optimizer/physical/Physical.hpp
+++ b/query_optimizer/physical/Physical.hpp
@@ -26,6 +26,7 @@
#include "query_optimizer/OptimizerTree.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
#include "query_optimizer/physical/PartitionSchemeHeader.hpp"
#include "query_optimizer/physical/PhysicalType.hpp"
#include "utility/Macros.hpp"
@@ -107,6 +108,11 @@ class Physical : public OptimizerTree<Physical> {
LOG(FATAL) << "copyWithNewOutputPartitionSchemeHeader is not implemented for " << getName();
}
+ virtual PhysicalPtr copyWithNewProjectExpressions(
+ const std::vector<expressions::NamedExpressionPtr> &output_expressions) const {
+ LOG(FATAL) << "copyWithNewProjectExpressions is not implemented for " << getName();
+ }
+
/**
* @brief Get the partition scheme of the physical plan node.
*
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/Selection.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/Selection.cpp b/query_optimizer/physical/Selection.cpp
index b2b3f7e..bd155ea 100644
--- a/query_optimizer/physical/Selection.cpp
+++ b/query_optimizer/physical/Selection.cpp
@@ -72,6 +72,13 @@ std::vector<E::AttributeReferencePtr> Selection::getReferencedAttributes() const
return referenced_attributes;
}
+PhysicalPtr Selection::copyWithNewProjectExpressions(
+ const std::vector<E::NamedExpressionPtr> &output_expressions) const {
+ DCHECK_EQ(project_expressions_.size(), output_expressions.size());
+
+ return Create(input(), output_expressions, filter_predicate_);
+}
+
bool Selection::maybeCopyWithPrunedExpressions(
const E::UnorderedNamedExpressionSet &referenced_attributes,
PhysicalPtr *output) const {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/Selection.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/Selection.hpp b/query_optimizer/physical/Selection.hpp
index 2b50061..fb1fa0b 100644
--- a/query_optimizer/physical/Selection.hpp
+++ b/query_optimizer/physical/Selection.hpp
@@ -91,6 +91,9 @@ class Selection : public Physical {
has_repartition, partition_scheme_header);
}
+ PhysicalPtr copyWithNewProjectExpressions(
+ const std::vector<expressions::NamedExpressionPtr> &output_expressions) const override;
+
bool maybeCopyWithPrunedExpressions(
const expressions::UnorderedNamedExpressionSet &referenced_attributes,
PhysicalPtr *output) const override;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/UnionAll.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/UnionAll.hpp b/query_optimizer/physical/UnionAll.hpp
index 939249f..943c3c3 100644
--- a/query_optimizer/physical/UnionAll.hpp
+++ b/query_optimizer/physical/UnionAll.hpp
@@ -132,6 +132,10 @@ class UnionAll : public Physical {
return true;
}
+ const std::vector<expressions::AttributeReferencePtr>& project_attributes() const {
+ return project_attributes_;
+ }
+
/**
* @brief Creates the physical node of UnionAll.
*
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index f9cd51c..17ef695 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -22,6 +22,7 @@ add_library(quickstep_queryoptimizer_rules_AttachLIPFilters AttachLIPFilters.cpp
add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp)
add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp)
add_library(quickstep_queryoptimizer_rules_CollapseSelection CollapseSelection.cpp CollapseSelection.hpp)
+add_library(quickstep_queryoptimizer_rules_EliminateEmptyNode EliminateEmptyNode.cpp EliminateEmptyNode.hpp)
add_library(quickstep_queryoptimizer_rules_ExtractCommonSubexpression
ExtractCommonSubexpression.cpp
ExtractCommonSubexpression.hpp)
@@ -101,6 +102,28 @@ target_link_libraries(quickstep_queryoptimizer_rules_CollapseSelection
quickstep_queryoptimizer_rules_BottomUpRule
quickstep_queryoptimizer_rules_RuleHelper
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_EliminateEmptyNode
+ quickstep_catalog_CatalogAttribute
+ quickstep_catalog_CatalogDatabase
+ quickstep_catalog_CatalogRelation
+ quickstep_catalog_CatalogRelationStatistics
+ quickstep_queryoptimizer_OptimizerContext
+ quickstep_queryoptimizer_expressions_Alias
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_expressions_NamedExpression
+ quickstep_queryoptimizer_expressions_PatternMatcher
+ quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_Selection
+ quickstep_queryoptimizer_physical_TableReference
+ quickstep_queryoptimizer_physical_TopLevelPlan
+ quickstep_queryoptimizer_physical_UnionAll
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_ExtractCommonSubexpression
glog
quickstep_queryoptimizer_OptimizerContext
@@ -434,6 +457,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
quickstep_queryoptimizer_rules_BottomUpRule
quickstep_queryoptimizer_rules_CollapseProject
quickstep_queryoptimizer_rules_CollapseSelection
+ quickstep_queryoptimizer_rules_EliminateEmptyNode
quickstep_queryoptimizer_rules_ExtractCommonSubexpression
quickstep_queryoptimizer_rules_FuseAggregateJoin
quickstep_queryoptimizer_rules_FuseHashSelect
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/rules/EliminateEmptyNode.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/EliminateEmptyNode.cpp b/query_optimizer/rules/EliminateEmptyNode.cpp
new file mode 100644
index 0000000..716a167
--- /dev/null
+++ b/query_optimizer/rules/EliminateEmptyNode.cpp
@@ -0,0 +1,358 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/EliminateEmptyNode.hpp"
+
+#include <cstddef>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "catalog/CatalogAttribute.hpp"
+#include "catalog/CatalogDatabase.hpp"
+#include "catalog/CatalogRelation.hpp"
+#include "catalog/CatalogRelationStatistics.hpp"
+#include "query_optimizer/OptimizerContext.hpp"
+#include "query_optimizer/expressions/Alias.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/physical/UnionAll.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = expressions;
+namespace P = physical;
+
+namespace {
+
+std::string GetNewRelationName(const std::size_t id) {
+ std::ostringstream out;
+ out << OptimizerContext::kInternalTemporaryRelationNamePrefix << id;
+ return out.str();
+}
+
+bool IsTableReferenceOnEmptyRelation(const P::PhysicalPtr &node) {
+ P::TableReferencePtr table_reference;
+ if (!P::SomeTableReference::MatchesWithConditionalCast(node, &table_reference)) {
+ return false;
+ }
+
+ const CatalogRelationStatistics &stat = table_reference->relation()->getStatistics();
+ return stat.isExact() && (stat.getNumTuples() == 0u);
+}
+
+P::PhysicalPtr ApplyToHashJoin(const P::HashJoinPtr &hash_join) {
+ const P::PhysicalPtr &left = hash_join->left();
+ if (IsTableReferenceOnEmptyRelation(left)) {
+ return nullptr;
+ }
+
+ const P::PhysicalPtr &right = hash_join->right();
+ switch (hash_join->join_type()) {
+ case P::HashJoin::JoinType::kInnerJoin:
+ case P::HashJoin::JoinType::kLeftSemiJoin:
+ if (IsTableReferenceOnEmptyRelation(right)) {
+ return nullptr;
+ }
+ break;
+ case P::HashJoin::JoinType::kLeftAntiJoin: {
+ const auto &project_expressions = hash_join->project_expressions();
+#ifdef QUICKSTEP_DEBUG
+ const auto left_output_attributes = left->getOutputAttributes();
+ if (!E::SubsetOfExpressions(project_expressions, left_output_attributes)) {
+ std::vector<E::NamedExpressionPtr> non_alias_expressions, alias_expressions;
+ for (const auto &project_expression : project_expressions) {
+ if (E::SomeAlias::Matches(project_expression)) {
+ for (const auto referenced_attribute : project_expression->getReferencedAttributes()) {
+ alias_expressions.push_back(referenced_attribute);
+ }
+ } else {
+ non_alias_expressions.push_back(project_expression);
+ }
+ }
+
+ CHECK(E::SubsetOfExpressions(non_alias_expressions, left_output_attributes));
+ CHECK(E::SubsetOfExpressions(alias_expressions, left_output_attributes));
+ }
+#endif
+
+ if (IsTableReferenceOnEmptyRelation(right)) {
+ if (hash_join->residual_predicate()) {
+ return nullptr;
+ }
+ return P::Selection::Create(left, project_expressions, nullptr);
+ }
+ break;
+ }
+ case P::HashJoin::JoinType::kLeftOuterJoin:
+ if (IsTableReferenceOnEmptyRelation(right)) {
+ if (hash_join->residual_predicate()) {
+ return nullptr;
+ }
+
+ const auto &project_expressions = hash_join->project_expressions();
+ if (E::SubsetOfExpressions(project_expressions, left->getOutputAttributes())) {
+ return P::Selection::Create(left, project_expressions, nullptr);
+ }
+ }
+ break;
+ }
+
+ return hash_join;
+}
+
+P::PhysicalPtr ApplyToNode(const P::PhysicalPtr &node) {
+ switch (node->getPhysicalType()) {
+ case P::PhysicalType::kHashJoin:
+ return ApplyToHashJoin(std::static_pointer_cast<const P::HashJoin>(node));
+ case P::PhysicalType::kTableReference:
+ if (IsTableReferenceOnEmptyRelation(node)) {
+ return nullptr;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return node;
+}
+
+
+P::PhysicalPtr CopyWithNewProjectExpressions(const P::UnionAllPtr &union_all,
+ const P::PhysicalPtr &child) {
+ const auto &project_attributes = union_all->project_attributes();
+
+ std::vector<E::NamedExpressionPtr> alias_expressions;
+ P::AggregatePtr aggregate;
+ if (P::SomeAggregate::MatchesWithConditionalCast(child, &aggregate)) {
+ const auto &aggregate_expressions = aggregate->aggregate_expressions();
+ alias_expressions.reserve(aggregate_expressions.size());
+
+ int aid = 0;
+ for (const auto &project_attribute : project_attributes) {
+ const auto alias_referenced_attributes =
+ aggregate_expressions[aid]->getReferencedAttributes();
+ DCHECK_EQ(1u, alias_referenced_attributes.size());
+
+ alias_expressions.emplace_back(E::Alias::Create(
+ project_attribute->id(),
+ alias_referenced_attributes.front(),
+ project_attribute->attribute_name(),
+ project_attribute->attribute_alias(),
+ project_attribute->relation_name()));
+ ++aid;
+ }
+ } else {
+ const auto child_output_attributes = child->getOutputAttributes();
+ alias_expressions.reserve(child_output_attributes.size());
+
+ int aid = 0;
+ for (const auto &project_attribute : project_attributes) {
+ alias_expressions.emplace_back(E::Alias::Create(
+ project_attribute->id(),
+ child_output_attributes[aid],
+ project_attribute->attribute_name(),
+ project_attribute->attribute_alias(),
+ project_attribute->relation_name()));
+ ++aid;
+ }
+ }
+
+ return child->copyWithNewProjectExpressions(alias_expressions);
+}
+
+// TopDown approach.
+P::PhysicalPtr Apply(const P::PhysicalPtr &node) {
+ DCHECK(node);
+
+ const auto new_node = ApplyToNode(node);
+ if (new_node == nullptr) {
+ return nullptr;
+ }
+
+ std::vector<P::PhysicalPtr> new_children;
+ bool has_changed_children = false;
+ for (const auto &child : new_node->children()) {
+ const auto new_child = Apply(child);
+ if (new_child == nullptr) {
+ if (P::SomeUnionAll::Matches(node)) {
+ has_changed_children = true;
+ continue;
+ }
+
+ return nullptr;
+ } else if (child != new_child && !has_changed_children) {
+ has_changed_children = true;
+ }
+ new_children.push_back(new_child);
+ }
+
+ if (has_changed_children) {
+ if (P::SomeUnionAll::Matches(node)) {
+ switch (new_children.size()) {
+ case 0u:
+ return nullptr;
+ case 1u:
+ return CopyWithNewProjectExpressions(
+ std::static_pointer_cast<const P::UnionAll>(node), new_children.front());
+ default:
+ break;
+ }
+ }
+
+ DCHECK(!new_children.empty());
+ return new_node->copyWithNewChildren(new_children);
+ } else {
+ return new_node;
+ }
+}
+
+} // namespace
+
+P::PhysicalPtr EliminateEmptyNode::apply(const P::PhysicalPtr &input) {
+ DCHECK(P::SomeTopLevelPlan::Matches(input));
+ const P::TopLevelPlanPtr &top_level_plan =
+ std::static_pointer_cast<const P::TopLevelPlan>(input);
+
+ // TODO(quickstep-team): handle subquery.
+ if (!top_level_plan->shared_subplans().empty()) {
+ return input;
+ }
+
+ const auto &plan = top_level_plan->plan();
+
+ P::SelectionPtr selection;
+ if (P::SomeSelection::MatchesWithConditionalCast(plan, &selection) &&
+ P::SomeTableReference::Matches(selection->input())) {
+ return input;
+ }
+
+ const P::PhysicalType plan_type = plan->getPhysicalType();
+ std::vector<E::AttributeReferencePtr> project_expressions;
+ switch (plan_type) {
+ case P::PhysicalType::kAggregate:
+ case P::PhysicalType::kCrossReferenceCoalesceAggregate:
+ case P::PhysicalType::kFilterJoin:
+ case P::PhysicalType::kHashJoin:
+ case P::PhysicalType::kNestedLoopsJoin:
+ case P::PhysicalType::kSample:
+ case P::PhysicalType::kSelection:
+ case P::PhysicalType::kSort:
+ case P::PhysicalType::kUnionAll:
+ case P::PhysicalType::kWindowAggregate:
+ project_expressions = plan->getOutputAttributes();
+ break;
+ case P::PhysicalType::kCopyTo:
+ case P::PhysicalType::kInsertSelection:
+ project_expressions = plan->getReferencedAttributes();
+ break;
+ case P::PhysicalType::kCopyFrom:
+ case P::PhysicalType::kCreateIndex:
+ case P::PhysicalType::kCreateTable:
+ case P::PhysicalType::kDeleteTuples:
+ case P::PhysicalType::kDropTable:
+ case P::PhysicalType::kInsertTuple:
+ case P::PhysicalType::kSharedSubplanReference:
+ case P::PhysicalType::kTableGenerator:
+ case P::PhysicalType::kUpdateTable:
+ // TODO(quickstep-team): revisit the cases above.
+ return input;
+ case P::PhysicalType::kTableReference:
+ case P::PhysicalType::kTopLevelPlan:
+ LOG(FATAL) << "Unexpected PhysicalType.";
+ }
+ DCHECK(!project_expressions.empty());
+
+ auto output = Apply(plan);
+ if (output == plan) {
+ return input;
+ }
+
+ if (output) {
+ return input->copyWithNewChildren({output});
+ }
+
+ auto catalog_relation = std::make_unique<CatalogRelation>(
+ catalog_database_, GetNewRelationName(catalog_database_->size()), -1, true);
+
+ attribute_id aid = 0;
+ std::unordered_set<std::string> relation_name_alias;
+ for (const auto &project_expression : project_expressions) {
+ // The attribute name is simply set to the attribute id to make it distinct.
+ auto catalog_attribute = std::make_unique<CatalogAttribute>(
+ catalog_relation.get(), project_expression->attribute_name(),
+ project_expression->getValueType(), aid,
+ project_expression->attribute_alias());
+ catalog_relation->addAttribute(catalog_attribute.release());
+ ++aid;
+
+ relation_name_alias.insert(project_expression->relation_name());
+ }
+
+ // TODO(quickstep-team): use Nop physical node.
+ const auto temp_table =
+ P::TableReference::Create(catalog_relation.get(),
+ relation_name_alias.size() == 1u ? *relation_name_alias.begin() : "",
+ project_expressions);
+ catalog_database_->addRelation(catalog_relation.release());
+
+ switch (plan_type) {
+ case P::PhysicalType::kAggregate:
+ case P::PhysicalType::kCrossReferenceCoalesceAggregate:
+ case P::PhysicalType::kFilterJoin:
+ case P::PhysicalType::kHashJoin:
+ case P::PhysicalType::kNestedLoopsJoin:
+ case P::PhysicalType::kSample:
+ case P::PhysicalType::kSelection:
+ case P::PhysicalType::kSort:
+ case P::PhysicalType::kUnionAll:
+ case P::PhysicalType::kWindowAggregate:
+ output = P::Selection::Create(temp_table, E::ToNamedExpressions(project_expressions), nullptr);
+ break;
+ case P::PhysicalType::kCopyTo:
+ output = plan->copyWithNewChildren({ temp_table });
+ break;
+ case P::PhysicalType::kInsertSelection:
+ output = plan->copyWithNewChildren({ plan->children().front(), temp_table });
+ break;
+ default:
+ LOG(FATAL) << "Unexpected PhysicalType.";
+ }
+
+ DCHECK(output);
+ return input->copyWithNewChildren({output});
+}
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/rules/EliminateEmptyNode.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/EliminateEmptyNode.hpp b/query_optimizer/rules/EliminateEmptyNode.hpp
new file mode 100644
index 0000000..1e8f6fd
--- /dev/null
+++ b/query_optimizer/rules/EliminateEmptyNode.hpp
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_ELIMINATE_EMPTY_NODE_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_ELIMINATE_EMPTY_NODE_HPP_
+
+#include <string>
+
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+
+class CatalogDatabase;
+
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+/**
+ * @brief Rule that applies to a physical plan to eliminate a node with an empty
+ * TableReference to a Selection on a temp table.
+ */
+class EliminateEmptyNode : public Rule<physical::Physical> {
+ public:
+ /**
+ * @brief Constructor.
+ */
+ explicit EliminateEmptyNode(CatalogDatabase *catalog_database)
+ : catalog_database_(catalog_database) {}
+
+ ~EliminateEmptyNode() override {}
+
+ std::string getName() const override {
+ return "EliminateEmptyNode";
+ }
+
+ physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+ CatalogDatabase *catalog_database_;
+
+ DISALLOW_COPY_AND_ASSIGN(EliminateEmptyNode);
+};
+
+/** @} */
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_ELIMINATE_EMPTY_NODE_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/tests/OptimizerTextTestRunner.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/OptimizerTextTestRunner.cpp b/query_optimizer/tests/OptimizerTextTestRunner.cpp
index cb8f153..cea529d 100644
--- a/query_optimizer/tests/OptimizerTextTestRunner.cpp
+++ b/query_optimizer/tests/OptimizerTextTestRunner.cpp
@@ -129,7 +129,8 @@ physical::PhysicalPtr OptimizerTextTestRunner::generatePhysicalPlan(
const logical::LogicalPtr &logical_plan,
OptimizerContext *optimizer_context) {
PhysicalGenerator physical_generator(optimizer_context);
- return physical_generator.generatePlan(logical_plan);
+ return physical_generator.generatePlan(logical_plan,
+ test_database_loader_.catalog_database());
}
} // namespace optimizer
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/tests/TestDatabaseLoader.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/TestDatabaseLoader.cpp b/query_optimizer/tests/TestDatabaseLoader.cpp
index f40a3e0..45bb6d3 100644
--- a/query_optimizer/tests/TestDatabaseLoader.cpp
+++ b/query_optimizer/tests/TestDatabaseLoader.cpp
@@ -51,7 +51,7 @@ CatalogRelation *TestDatabaseLoader::createTestRelation(bool allow_vchar) {
catalog_relation.reset(new CatalogRelation(&catalog_database_,
"Test" /* name */,
0 /* id */,
- true /* temporary */));
+ false /* temporary */));
int attr_id = -1;
catalog_relation->addAttribute(new CatalogAttribute(catalog_relation.get(),
"int_col" /* name */,