You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2017/01/31 08:17:02 UTC
[1/4] incubator-quickstep git commit: Minor refactor for
InsertDestinations. [Forced Update!]
Repository: incubator-quickstep
Updated Branches:
refs/heads/push-down-disjunctive-predicate 6a184c8ad -> 26e7f80a6 (forced update)
Minor refactor for InsertDestinations.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/f2e77266
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/f2e77266
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/f2e77266
Branch: refs/heads/push-down-disjunctive-predicate
Commit: f2e77266edeaff38a60650b48836ff6ddb3b84ca
Parents: 0f4938c
Author: Zuyu Zhang <zu...@apache.org>
Authored: Mon Jan 30 15:24:03 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Mon Jan 30 15:24:03 2017 -0800
----------------------------------------------------------------------
storage/InsertDestination.cpp | 17 ++++-------------
storage/InsertDestination.hpp | 4 +++-
storage/InsertDestinationInterface.hpp | 2 +-
storage/StorageBlock.hpp | 2 +-
4 files changed, 9 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f2e77266/storage/InsertDestination.cpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.cpp b/storage/InsertDestination.cpp
index 944998f..714e6e5 100644
--- a/storage/InsertDestination.cpp
+++ b/storage/InsertDestination.cpp
@@ -290,7 +290,6 @@ void InsertDestination::bulkInsertTuplesFromValueAccessors(
ValueAccessor *accessor = p.first;
std::vector<attribute_id> attribute_map = p.second;
-
InvokeOnAnyValueAccessor(
accessor,
[&](auto *accessor) -> void { // NOLINT(build/c++11)
@@ -621,11 +620,10 @@ void PartitionAwareInsertDestination::bulkInsertTuples(ValueAccessor *accessor,
&always_mark_full,
&num_partitions](auto *accessor) -> void { // NOLINT(build/c++11)
std::vector<std::unique_ptr<TupleIdSequence>> partition_membership;
- partition_membership.resize(num_partitions);
// Create a tuple-id sequence for each partition.
for (std::size_t partition = 0; partition < num_partitions; ++partition) {
- partition_membership[partition].reset(new TupleIdSequence(accessor->getEndPosition()));
+ partition_membership.emplace_back(std::make_unique<TupleIdSequence>(accessor->getEndPosition()));
}
// Iterate over ValueAccessor for each tuple,
@@ -641,9 +639,8 @@ void PartitionAwareInsertDestination::bulkInsertTuples(ValueAccessor *accessor,
// TupleIdSequence.
std::vector<std::unique_ptr<typename std::remove_pointer<
decltype(accessor->createSharedTupleIdSequenceAdapter(*partition_membership.front()))>::type>> adapter;
- adapter.resize(num_partitions);
for (std::size_t partition = 0; partition < num_partitions; ++partition) {
- adapter[partition].reset(accessor->createSharedTupleIdSequenceAdapter(*partition_membership[partition]));
+ adapter.emplace_back(accessor->createSharedTupleIdSequenceAdapter(*partition_membership[partition]));
}
// Bulk-insert into a block belonging to the partition.
@@ -678,11 +675,10 @@ void PartitionAwareInsertDestination::bulkInsertTuplesWithRemappedAttributes(
&always_mark_full,
&num_partitions](auto *accessor) -> void { // NOLINT(build/c++11)
std::vector<std::unique_ptr<TupleIdSequence>> partition_membership;
- partition_membership.resize(num_partitions);
// Create a tuple-id sequence for each partition.
for (std::size_t partition = 0; partition < num_partitions; ++partition) {
- partition_membership[partition].reset(new TupleIdSequence(accessor->getEndPosition()));
+ partition_membership.emplace_back(std::make_unique<TupleIdSequence>(accessor->getEndPosition()));
}
// Iterate over ValueAccessor for each tuple,
@@ -698,9 +694,8 @@ void PartitionAwareInsertDestination::bulkInsertTuplesWithRemappedAttributes(
// TupleIdSequence.
std::vector<std::unique_ptr<typename std::remove_pointer<
decltype(accessor->createSharedTupleIdSequenceAdapter(*partition_membership.front()))>::type>> adapter;
- adapter.resize(num_partitions);
for (std::size_t partition = 0; partition < num_partitions; ++partition) {
- adapter[partition].reset(accessor->createSharedTupleIdSequenceAdapter(*partition_membership[partition]));
+ adapter.emplace_back(accessor->createSharedTupleIdSequenceAdapter(*partition_membership[partition]));
}
// Bulk-insert into a block belonging to the partition.
@@ -742,10 +737,6 @@ void PartitionAwareInsertDestination::insertTuplesFromVector(std::vector<Tuple>:
}
}
-MutableBlockReference PartitionAwareInsertDestination::getBlockForInsertion() {
- FATAL_ERROR("PartitionAwareInsertDestination::getBlockForInsertion needs a partition id as an argument.");
-}
-
MutableBlockReference PartitionAwareInsertDestination::getBlockForInsertionInPartition(const partition_id part_id) {
DCHECK_LT(part_id, partition_scheme_header_->getNumPartitions());
SpinMutexLock lock(mutexes_for_partition_[part_id]);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f2e77266/storage/InsertDestination.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.hpp b/storage/InsertDestination.hpp
index c3c40bd..6707192 100644
--- a/storage/InsertDestination.hpp
+++ b/storage/InsertDestination.hpp
@@ -539,7 +539,9 @@ class PartitionAwareInsertDestination : public InsertDestination {
std::vector<Tuple>::const_iterator end) override;
protected:
- MutableBlockReference getBlockForInsertion() override;
+ MutableBlockReference getBlockForInsertion() override {
+ LOG(FATAL) << "PartitionAwareInsertDestination::getBlockForInsertion needs a partition id as an argument.";
+ }
/**
* @brief Get a block to use for insertion from a partition.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f2e77266/storage/InsertDestinationInterface.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestinationInterface.hpp b/storage/InsertDestinationInterface.hpp
index b62d3e5..be6b0c2 100644
--- a/storage/InsertDestinationInterface.hpp
+++ b/storage/InsertDestinationInterface.hpp
@@ -131,7 +131,7 @@ class InsertDestinationInterface {
*
* @param accessor_attribute_map A vector of pairs of ValueAccessor and
* corresponding attribute map
- * The i-th attribute ID in the attr map for a value accessor is "n"
+ * The i-th attribute ID in the attr map for a value accessor is "n"
* if the attribute_id "i" in the output relation
* is the attribute_id "n" in corresponding input value accessor.
* Set the i-th element to kInvalidCatalogId if it doesn't come from
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f2e77266/storage/StorageBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.hpp b/storage/StorageBlock.hpp
index ed252c5..16ea50f 100644
--- a/storage/StorageBlock.hpp
+++ b/storage/StorageBlock.hpp
@@ -325,7 +325,7 @@ class StorageBlock : public StorageBlockBase {
* function with the appropriate attribute_map for each value
* accessor (InsertDestination::bulkInsertTuplesFromValueAccessors
* handles all the details) to insert tuples without an extra temp copy.
- *
+ *
* @warning Must call bulkInsertPartialTuplesFinalize() to update the header,
* until which point, the insertion is not visible to others.
* @warning The inserted tuples may be placed in sub-optimal locations in this
[4/4] incubator-quickstep git commit: Push down low cost disjunctive
predicates to filter the stored relations early
Posted by zu...@apache.org.
Push down low cost disjunctive predicates to filter the stored relations early
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/26e7f80a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/26e7f80a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/26e7f80a
Branch: refs/heads/push-down-disjunctive-predicate
Commit: 26e7f80a6ca04765d6c1acfa8490fcffd4393f48
Parents: 6d83b46
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Mon Jan 30 01:02:19 2017 -0600
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Tue Jan 31 00:16:51 2017 -0800
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 3 +-
query_optimizer/PhysicalGenerator.cpp | 2 +
query_optimizer/rules/CMakeLists.txt | 23 ++
.../PushDownLowCostDisjunctivePredicate.cpp | 218 +++++++++++++++++++
.../PushDownLowCostDisjunctivePredicate.hpp | 118 ++++++++++
5 files changed, 363 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/26e7f80a/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index e8bc21c..a335aa4 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -203,10 +203,12 @@ target_link_libraries(quickstep_queryoptimizer_OptimizerTree
target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
${GFLAGS_LIB_NAME}
quickstep_queryoptimizer_LogicalToPhysicalMapper
+ quickstep_queryoptimizer_Validator
quickstep_queryoptimizer_logical_Logical
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_rules_AttachLIPFilters
quickstep_queryoptimizer_rules_PruneColumns
+ quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
quickstep_queryoptimizer_rules_ReorderColumns
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
quickstep_queryoptimizer_rules_SwapProbeBuild
@@ -215,7 +217,6 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_strategy_OneToOne
quickstep_queryoptimizer_strategy_Selection
quickstep_queryoptimizer_strategy_Strategy
- quickstep_queryoptimizer_Validator
quickstep_utility_Macros
quickstep_utility_PlanVisualizer)
target_link_libraries(quickstep_queryoptimizer_QueryHandle
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/26e7f80a/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index e12f8be..e16369a 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -27,6 +27,7 @@
#include "query_optimizer/logical/Logical.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/rules/AttachLIPFilters.hpp"
+#include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
#include "query_optimizer/rules/ReorderColumns.hpp"
#include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
@@ -108,6 +109,7 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan(
P::PhysicalPtr PhysicalGenerator::optimizePlan() {
std::vector<std::unique_ptr<Rule<P::Physical>>> rules;
rules.emplace_back(new PruneColumns());
+ rules.emplace_back(new PushDownLowCostDisjunctivePredicate());
if (FLAGS_reorder_hash_joins) {
rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
rules.emplace_back(new PruneColumns());
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/26e7f80a/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index fe2fd17..0ef2d61 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -24,6 +24,9 @@ add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp C
add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
add_library(quickstep_queryoptimizer_rules_PruneColumns PruneColumns.cpp PruneColumns.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownFilter PushDownFilter.cpp PushDownFilter.hpp)
+add_library(quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
+ PushDownLowCostDisjunctivePredicate.cpp
+ PushDownLowCostDisjunctivePredicate.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin PushDownSemiAntiJoin.cpp PushDownSemiAntiJoin.hpp)
add_library(quickstep_queryoptimizer_rules_ReorderColumns ReorderColumns.cpp ReorderColumns.hpp)
add_library(quickstep_queryoptimizer_rules_Rule ../../empty_src.cpp Rule.hpp)
@@ -111,6 +114,25 @@ target_link_libraries(quickstep_queryoptimizer_rules_PushDownFilter
quickstep_queryoptimizer_rules_RuleHelper
quickstep_queryoptimizer_rules_TopDownRule
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
+ quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_expressions_LogicalAnd
+ quickstep_queryoptimizer_expressions_LogicalOr
+ quickstep_queryoptimizer_expressions_PatternMatcher
+ quickstep_queryoptimizer_expressions_Predicate
+ quickstep_queryoptimizer_physical_Aggregate
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_NestedLoopsJoin
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_Selection
+ quickstep_queryoptimizer_physical_TableReference
+ quickstep_queryoptimizer_physical_TopLevelPlan
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ExpressionUtil
@@ -225,6 +247,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
quickstep_queryoptimizer_rules_GenerateJoins
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_PushDownFilter
+ quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
quickstep_queryoptimizer_rules_ReorderColumns
quickstep_queryoptimizer_rules_Rule
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/26e7f80a/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp
new file mode 100644
index 0000000..7db8f7f
--- /dev/null
+++ b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.cpp
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
+
+#include <cstddef>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/LogicalAnd.hpp"
+#include "query_optimizer/expressions/LogicalOr.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/NestedLoopsJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr PushDownLowCostDisjunctivePredicate::apply(const P::PhysicalPtr &input) {
+ DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+
+ const P::TopLevelPlanPtr top_level_plan =
+ std::static_pointer_cast<const P::TopLevelPlan>(input);
+ cost_model_.reset(
+ new cost::StarSchemaSimpleCostModel(
+ top_level_plan->shared_subplans()));
+
+ collectApplicablePredicates(input);
+
+ if (!applicable_predicates_.empty()) {
+ // Apply the selected predicates to stored relations.
+ return attachPredicates(input);
+ } else {
+ return input;
+ }
+}
+
+void PushDownLowCostDisjunctivePredicate::collectApplicablePredicates(
+ const physical::PhysicalPtr &input) {
+ P::TableReferencePtr table_reference;
+ if (P::SomeTableReference::MatchesWithConditionalCast(input, &table_reference)) {
+ // Consider only stored relations with small cardinality as targets.
+ if (cost_model_->estimateCardinality(input) <= 100u) {
+ applicable_nodes_.emplace_back(input, &table_reference->attribute_list());
+ }
+ return;
+ }
+
+ for (const auto &child : input->children()) {
+ collectApplicablePredicates(child);
+ }
+
+ E::PredicatePtr filter_predicate = nullptr;
+ switch (input->getPhysicalType()) {
+ case P::PhysicalType::kAggregate: {
+ filter_predicate =
+ std::static_pointer_cast<const P::Aggregate>(input)->filter_predicate();
+ break;
+ }
+ case P::PhysicalType::kHashJoin: {
+ const P::HashJoinPtr hash_join =
+ std::static_pointer_cast<const P::HashJoin>(input);
+ if (hash_join->join_type() == P::HashJoin::JoinType::kInnerJoin) {
+ filter_predicate = hash_join->residual_predicate();
+ }
+ break;
+ }
+ case P::PhysicalType::kNestedLoopsJoin: {
+ filter_predicate =
+ std::static_pointer_cast<const P::NestedLoopsJoin>(input)->join_predicate();
+ break;
+ }
+ case P::PhysicalType::kSelection: {
+ filter_predicate =
+ std::static_pointer_cast<const P::Selection>(input)->filter_predicate();
+ break;
+ }
+ default:
+ break;
+ }
+
+ E::LogicalOrPtr disjunctive_predicate;
+ if (filter_predicate == nullptr ||
+ !E::SomeLogicalOr::MatchesWithConditionalCast(filter_predicate, &disjunctive_predicate)) {
+ return;
+ }
+
+ // Consider only disjunctive normal form, i.e. disjunction of conjunctions.
+ // Divide the disjunctive components into groups.
+ std::vector<std::vector<E::PredicatePtr>> candidate_predicates;
+ std::vector<std::vector<std::vector<E::AttributeReferencePtr>>> candidate_attributes;
+ for (const auto &conjunctive_predicate : disjunctive_predicate->operands()) {
+ candidate_predicates.emplace_back();
+ candidate_attributes.emplace_back();
+ E::LogicalAndPtr logical_and;
+ if (E::SomeLogicalAnd::MatchesWithConditionalCast(conjunctive_predicate, &logical_and)) {
+ for (const auto &predicate : logical_and->operands()) {
+ candidate_predicates.back().emplace_back(predicate);
+ candidate_attributes.back().emplace_back(
+ predicate->getReferencedAttributes());
+ }
+ } else {
+ candidate_predicates.back().emplace_back(conjunctive_predicate);
+ candidate_attributes.back().emplace_back(
+ conjunctive_predicate->getReferencedAttributes());
+ }
+ }
+
+ // Check whether the conditions are met for pushing down part of the predicates
+ // to each small-cardinality stored relation.
+ for (const auto &node_pair : applicable_nodes_) {
+ const std::vector<E::AttributeReferencePtr> &target_attributes = *node_pair.second;
+ std::vector<E::PredicatePtr> selected_disj_preds;
+ for (std::size_t i = 0; i < candidate_predicates.size(); ++i) {
+ const auto &cand_preds = candidate_predicates[i];
+ const auto &cand_attrs = candidate_attributes[i];
+
+ std::vector<E::PredicatePtr> selected_conj_preds;
+ for (std::size_t j = 0; j < cand_preds.size(); ++j) {
+ if (E::SubsetOfExpressions(cand_attrs[j], target_attributes)) {
+ selected_conj_preds.emplace_back(cand_preds[j]);
+ }
+ }
+ if (selected_conj_preds.empty()) {
+ // Not every disjunctive component contains a predicate that can be applied
+ // to the table reference node -- condition failed, exit.
+ selected_disj_preds.clear();
+ break;
+ } else {
+ selected_disj_preds.emplace_back(
+ CreateConjunctive(selected_conj_preds));
+ }
+ }
+ if (!selected_disj_preds.empty()) {
+ applicable_predicates_[node_pair.first].add(
+ CreateDisjunctive(selected_disj_preds));
+ }
+ }
+}
+
+P::PhysicalPtr PushDownLowCostDisjunctivePredicate::attachPredicates(
+ const P::PhysicalPtr &input) const {
+ std::vector<P::PhysicalPtr> new_children;
+ for (const P::PhysicalPtr &child : input->children()) {
+ const P::PhysicalPtr new_child = attachPredicates(child);
+ new_children.push_back(new_child);
+ }
+
+ const P::PhysicalPtr output =
+ new_children == input->children() ? input
+ : input->copyWithNewChildren(new_children);
+
+ const auto &node_it = applicable_predicates_.find(input);
+ if (node_it != applicable_predicates_.end()) {
+ const E::PredicatePtr filter_predicate =
+ CreateConjunctive(node_it->second.predicates);
+ return P::Selection::Create(output,
+ E::ToNamedExpressions(output->getOutputAttributes()),
+ filter_predicate);
+ }
+
+ return output;
+}
+
+E::PredicatePtr PushDownLowCostDisjunctivePredicate::CreateConjunctive(
+ const std::vector<E::PredicatePtr> predicates) {
+ DCHECK_GE(predicates.size(), 1u);
+ if (predicates.size() == 1) {
+ return predicates.front();
+ } else {
+ return E::LogicalAnd::Create(predicates);
+ }
+}
+
+E::PredicatePtr PushDownLowCostDisjunctivePredicate::CreateDisjunctive(
+ const std::vector<E::PredicatePtr> predicates) {
+ DCHECK_GE(predicates.size(), 1u);
+ if (predicates.size() == 1) {
+ return predicates.front();
+ } else {
+ return E::LogicalOr::Create(predicates);
+ }
+}
+
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/26e7f80a/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp
new file mode 100644
index 0000000..bf84293
--- /dev/null
+++ b/query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_PUSH_DOWN_LOW_COST_DISJUNCTIVE_PREDICATE_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_PUSH_DOWN_LOW_COST_DISJUNCTIVE_PREDICATE_HPP_
+
+#include <cstddef>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+/**
+ * @brief Rule that applies to a physical plan to push down low-cost disjunctive
+ * predicate when proper conditions are met.
+ *
+ * Here we elaborate the conditions.
+ *
+ * Let
+ * P = p_{1,1} AND ... AND p_{1, m_1} OR ... OR p_{n,1} AND ... AND p_{n, m_n}
+ * be a predicate in disjunctive normal form.
+ *
+ * Now consider each small-cardinality relation R, if for each i in 1..n, there
+ * exists at least one predicate p_{i, k_i} that is applicable to R. Then we can
+ * construct a new predicate
+ * P' = p_{1, k_1} OR ... OR p_{n, k_n}
+ * and push down P' to be applied to R.
+ *
+ * Also, if any conjunctive component in P contains more than one predicate that
+ * is applicable to R, then we can combine all these applicable predicates as a
+ * conjunctive component in P'.
+ *
+ * Finally, note that if there exists a conjunctive component that contains no
+ * predicate applicable to R. Then the condition fails and we cannot do a push
+ * down for R.
+ */
+class PushDownLowCostDisjunctivePredicate : public Rule<physical::Physical> {
+ public:
+ /**
+ * @brief Constructor.
+ */
+ PushDownLowCostDisjunctivePredicate() {}
+
+ ~PushDownLowCostDisjunctivePredicate() override {}
+
+ std::string getName() const override {
+ return "PushDownLowCostDisjunctivePredicate";
+ }
+
+ physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+ struct PredicateInfo {
+ PredicateInfo() {}
+ inline void add(expressions::PredicatePtr predicate) {
+ predicates.emplace_back(predicate);
+ }
+ std::vector<expressions::PredicatePtr> predicates;
+ };
+
+ void collectApplicablePredicates(const physical::PhysicalPtr &input);
+ physical::PhysicalPtr attachPredicates(const physical::PhysicalPtr &input) const;
+
+ static expressions::PredicatePtr CreateConjunctive(
+ const std::vector<expressions::PredicatePtr> predicates);
+
+ static expressions::PredicatePtr CreateDisjunctive(
+ const std::vector<expressions::PredicatePtr> predicates);
+
+ std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+
+ std::vector<std::pair<physical::PhysicalPtr,
+ const std::vector<expressions::AttributeReferencePtr> *>> applicable_nodes_;
+ std::map<physical::PhysicalPtr, PredicateInfo> applicable_predicates_;
+
+ // Currently consider only stored relations with small cardinality as targets.
+ static constexpr std::size_t kCardinalityThreshold = 100u;
+
+ DISALLOW_COPY_AND_ASSIGN(PushDownLowCostDisjunctivePredicate);
+};
+
+/** @} */
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_PUSH_DOWN_LOW_COST_DISJUNCTIVE_PREDICATE_HPP_
[3/4] incubator-quickstep git commit: Reorder output attribute order
to improve copy performance.
Posted by zu...@apache.org.
Reorder output attribute order to improve copy performance.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/6d83b46a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/6d83b46a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/6d83b46a
Branch: refs/heads/push-down-disjunctive-predicate
Commit: 6d83b46af25b35fb0b3a23452b6fbd2842b33793
Parents: 23e14b8
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Thu Jan 12 18:41:17 2017 -0600
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Tue Jan 31 00:10:45 2017 -0800
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 1 +
query_optimizer/PhysicalGenerator.cpp | 12 +
query_optimizer/rules/CMakeLists.txt | 14 +
query_optimizer/rules/ReorderColumns.cpp | 214 ++++++++++++++++
query_optimizer/rules/ReorderColumns.hpp | 75 ++++++
query_optimizer/tests/OptimizerTextTest.cpp | 6 +-
relational_operators/CMakeLists.txt | 1 +
relational_operators/HashJoinOperator.cpp | 254 +++++++++++--------
relational_operators/HashJoinOperator.hpp | 4 +
storage/SplitRowStoreValueAccessor.hpp | 5 +
storage/ValueAccessor.hpp | 30 +++
types/containers/ColumnVectorsValueAccessor.hpp | 5 +
12 files changed, 515 insertions(+), 106 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index b6c794d..e8bc21c 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -207,6 +207,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_rules_AttachLIPFilters
quickstep_queryoptimizer_rules_PruneColumns
+ quickstep_queryoptimizer_rules_ReorderColumns
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
quickstep_queryoptimizer_rules_SwapProbeBuild
quickstep_queryoptimizer_strategy_Aggregate
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 7cb97dc..e12f8be 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -28,6 +28,7 @@
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/rules/AttachLIPFilters.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
+#include "query_optimizer/rules/ReorderColumns.hpp"
#include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
#include "query_optimizer/rules/SwapProbeBuild.hpp"
#include "query_optimizer/strategy/Aggregate.hpp"
@@ -44,6 +45,10 @@
namespace quickstep {
namespace optimizer {
+DEFINE_bool(reorder_columns, true,
+ "Adjust the ordering of intermediate relations' columns to improve "
+ "copy performance.");
+
DEFINE_bool(reorder_hash_joins, true,
"If true, apply hash join order optimization to each group of hash "
"joins. The optimization applies a greedy algorithm to favor smaller "
@@ -109,6 +114,13 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
} else {
rules.emplace_back(new SwapProbeBuild());
}
+ if (FLAGS_reorder_columns) {
+ // NOTE(jianqiao): This optimization relies on the fact that the intermediate
+ // relations all have SPLIT_ROW_STORE layouts. If this fact gets changed, the
+ // optimization algorithm may need to be updated and the performance impact
+ // should be re-evaluated.
+ rules.emplace_back(new ReorderColumns());
+ }
if (FLAGS_use_lip_filters) {
rules.emplace_back(new AttachLIPFilters());
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 7fffadc..fe2fd17 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -25,6 +25,7 @@ add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp Gener
add_library(quickstep_queryoptimizer_rules_PruneColumns PruneColumns.cpp PruneColumns.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownFilter PushDownFilter.cpp PushDownFilter.hpp)
add_library(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin PushDownSemiAntiJoin.cpp PushDownSemiAntiJoin.hpp)
+add_library(quickstep_queryoptimizer_rules_ReorderColumns ReorderColumns.cpp ReorderColumns.hpp)
add_library(quickstep_queryoptimizer_rules_Rule ../../empty_src.cpp Rule.hpp)
add_library(quickstep_queryoptimizer_rules_RuleHelper RuleHelper.cpp RuleHelper.hpp)
add_library(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
@@ -118,6 +119,18 @@ target_link_libraries(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
quickstep_queryoptimizer_logical_PatternMatcher
quickstep_queryoptimizer_rules_TopDownRule
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_ReorderColumns
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExprId
+ quickstep_queryoptimizer_expressions_NamedExpression
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_Selection
+ quickstep_queryoptimizer_physical_TableReference
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_Rule
glog
quickstep_utility_Macros)
@@ -213,6 +226,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_PushDownFilter
quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
+ quickstep_queryoptimizer_rules_ReorderColumns
quickstep_queryoptimizer_rules_Rule
quickstep_queryoptimizer_rules_RuleHelper
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/query_optimizer/rules/ReorderColumns.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReorderColumns.cpp b/query_optimizer/rules/ReorderColumns.cpp
new file mode 100644
index 0000000..f7e58d5
--- /dev/null
+++ b/query_optimizer/rules/ReorderColumns.cpp
@@ -0,0 +1,214 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/ReorderColumns.hpp"
+
+#include <algorithm>
+#include <cstddef>
+#include <limits>
+#include <unordered_map>
+#include <vector>
+
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr ReorderColumns::apply(const P::PhysicalPtr &input) {
+ DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+
+ return applyInternal(input, true);
+}
+
+P::PhysicalPtr ReorderColumns::applyInternal(const P::PhysicalPtr &input,
+ const bool lock_ordering) {
+ // We have to guarantee that the top level ordering of the columns remain
+ // unchanged so that the output columns are ordered as specified by the user.
+ // So here we use the flag "lock_ordering" to skip the first transformable
+ // node (i.e. the first Selection or HashJoin).
+ const bool is_not_transformable = !IsTransformable(input);
+ const bool skip_transform = lock_ordering || is_not_transformable;
+
+ if (skip_transform) {
+ std::vector<P::PhysicalPtr> new_children;
+ for (const P::PhysicalPtr &child : input->children()) {
+ new_children.emplace_back(applyInternal(child, lock_ordering && is_not_transformable));
+ }
+
+ if (new_children != input->children()) {
+ return input->copyWithNewChildren(new_children);
+ } else {
+ return input;
+ }
+ }
+
+ // Collect the maximal chain of transformable nodes.
+ std::vector<P::PhysicalPtr> nodes;
+ for (P::PhysicalPtr node = input; IsTransformable(node); node = node->children().front()) {
+ nodes.emplace_back(node);
+ }
+ // Arrange the nodes with bottom-up order.
+ std::reverse(nodes.begin(), nodes.end());
+
+ // A greedy algorithm that reorders the output attributes based on the GEN/KILL
+ // intervals. This algorithm works well with SSB/TPCH queries and is not likely
+ // to make the plans worse for whatever queries.
+ //
+ // Here is a brief explanation of the three data structure base/gen/kill.
+ // (1) base: maps each attribute's id to its position in the BASE relation's
+ // output attributes. Note that the base relation is the child
+ // relation of nodes[0].
+ // (2) gen: maps each attribute's id to the MINIMUM index i such that the
+ // attribute is among nodes[i]'s output attributes. I.e. node i
+ // GENERATEs the attribute.
+ // (3) kill: maps each attribute's id to the MAXIMUM index i such that the
+ // attribute is among nodes[i]'s output attributes. I.e. node i+1
+ // KILLs the attribute.
+ std::unordered_map<E::ExprId, std::size_t> base, gen, kill;
+
+ const P::PhysicalPtr base_node =
+ applyInternal(nodes.front()->children().front(), false);
+ const std::vector<E::AttributeReferencePtr> base_attrs =
+ base_node->getOutputAttributes();
+ for (std::size_t i = 0; i < base_attrs.size(); ++i) {
+ base.emplace(base_attrs[i]->id(), i);
+ }
+
+ for (std::size_t i = 0; i < nodes.size(); ++i) {
+ for (const auto &attr : nodes[i]->getOutputAttributes()) {
+ const E::ExprId attr_id = attr->id();
+ if (gen.find(attr_id) == gen.end()) {
+ gen.emplace(attr_id, i);
+ }
+ kill[attr_id] = i;
+ }
+ }
+
+ // TODO(jianqiao): implement this comparator as a standalone and well-documented
+ // struct.
+ const auto comparator = [&gen, &kill, &base](const E::NamedExpressionPtr &lhs,
+ const E::NamedExpressionPtr &rhs) -> bool {
+ const E::ExprId lhs_id = lhs->id();
+ const E::ExprId rhs_id = rhs->id();
+
+ // Sort the attributes first by GEN location.
+ const std::size_t lhs_gen = gen.at(lhs_id);
+ const std::size_t rhs_gen = gen.at(rhs_id);
+ if (lhs_gen != rhs_gen) {
+ return lhs_gen < rhs_gen;
+ }
+
+ // Then by KILL location.
+ const std::size_t lhs_kill = kill.at(lhs_id);
+ const std::size_t rhs_kill = kill.at(rhs_id);
+ if (lhs_kill != rhs_kill) {
+ return lhs_kill < rhs_kill;
+ }
+
+ // Finally by the ordering in the base relaton.
+ const auto lhs_base_it = base.find(lhs_id);
+ const auto rhs_base_it = base.find(rhs_id);
+ const std::size_t lhs_base =
+ lhs_base_it == base.end() ? std::numeric_limits<std::size_t>::max()
+ : lhs_base_it->second;
+ const std::size_t rhs_base =
+ rhs_base_it == base.end() ? std::numeric_limits<std::size_t>::max()
+ : rhs_base_it->second;
+ if (lhs_base != rhs_base) {
+ return lhs_base < rhs_base;
+ }
+
+ return lhs_id < rhs_id;
+ };
+
+ P::PhysicalPtr output = base_node;
+
+ for (const auto &node : nodes) {
+ std::vector<E::NamedExpressionPtr> project_expressions;
+ switch (node->getPhysicalType()) {
+ case P::PhysicalType::kHashJoin: {
+ project_expressions =
+ std::static_pointer_cast<const P::HashJoin>(node)->project_expressions();
+ break;
+ }
+ case P::PhysicalType::kSelection: {
+ project_expressions =
+ std::static_pointer_cast<const P::Selection>(node)->project_expressions();
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported physical type";
+ }
+
+ std::sort(project_expressions.begin(), project_expressions.end(), comparator);
+
+ switch (node->getPhysicalType()) {
+ case P::PhysicalType::kHashJoin: {
+ const P::HashJoinPtr old_node =
+ std::static_pointer_cast<const P::HashJoin>(node);
+ output = P::HashJoin::Create(output,
+ applyInternal(old_node->right(), false),
+ old_node->left_join_attributes(),
+ old_node->right_join_attributes(),
+ old_node->residual_predicate(),
+ project_expressions,
+ old_node->join_type());
+ break;
+ }
+ case P::PhysicalType::kSelection: {
+ const P::SelectionPtr old_node =
+ std::static_pointer_cast<const P::Selection>(node);
+ output = P::Selection::Create(output,
+ project_expressions,
+ old_node->filter_predicate());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported physical type";
+ }
+ }
+
+ return output;
+}
+
+bool ReorderColumns::IsTransformable(const physical::PhysicalPtr &input) {
+ switch (input->getPhysicalType()) {
+ case P::PhysicalType::kHashJoin: // Fall through
+ case P::PhysicalType::kSelection:
+ return true;
+ default:
+ return false;
+ }
+}
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/query_optimizer/rules/ReorderColumns.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReorderColumns.hpp b/query_optimizer/rules/ReorderColumns.hpp
new file mode 100644
index 0000000..36fa183
--- /dev/null
+++ b/query_optimizer/rules/ReorderColumns.hpp
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_REORDER_COLUMNS_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_REORDER_COLUMNS_HPP_
+
+#include <string>
+
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+/**
+ * @brief Rule that applies to a physical plan to adjust the orderings of some
+ * intermediate nodes' output attributes to improve copy performance.
+ *
+ * @note This optimization is based on the fact that the intermediate relations
+ * all have SPLIT_ROW_STORE layouts. If this fact gets changed, the rule's
+ * algorithm may need to be updated and the performance impact should be
+ * re-evaluated.
+ */
+class ReorderColumns : public Rule<physical::Physical> {
+ public:
+ /**
+ * @brief Constructor.
+ */
+ ReorderColumns() {}
+
+ ~ReorderColumns() override {}
+
+ std::string getName() const override {
+ return "ReorderColumns";
+ }
+
+ physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+ physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input,
+ const bool lock_ordering);
+
+ // Whether the physical node can
+ inline static bool IsTransformable(const physical::PhysicalPtr &input);
+
+ DISALLOW_COPY_AND_ASSIGN(ReorderColumns);
+};
+
+/** @} */
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_REORDER_COLUMNS_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/query_optimizer/tests/OptimizerTextTest.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/OptimizerTextTest.cpp b/query_optimizer/tests/OptimizerTextTest.cpp
index 759c173..e17f5c4 100644
--- a/query_optimizer/tests/OptimizerTextTest.cpp
+++ b/query_optimizer/tests/OptimizerTextTest.cpp
@@ -31,6 +31,7 @@
namespace quickstep {
namespace optimizer {
+DECLARE_bool(reorder_columns);
DECLARE_bool(reorder_hash_joins);
DECLARE_bool(use_lip_filters);
@@ -58,8 +59,9 @@ int main(int argc, char** argv) {
test_driver->registerOptions(
quickstep::optimizer::OptimizerTextTestRunner::kTestOptions);
- // Turn off join order optimization and LIPFilter for optimizer test since
- // it is up to change and affects a large number of test cases.
+ // Turn off some optimization rules for optimizer test since they are up to
+ // change and affects a large number of test cases.
+ quickstep::optimizer::FLAGS_reorder_columns = false;
quickstep::optimizer::FLAGS_reorder_hash_joins = false;
quickstep::optimizer::FLAGS_use_lip_filters = false;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index b2e08cf..c8447f3 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -207,6 +207,7 @@ target_link_libraries(quickstep_relationaloperators_HashJoinOperator
quickstep_catalog_PartitionSchemeHeader
quickstep_expressions_predicate_Predicate
quickstep_expressions_scalar_Scalar
+ quickstep_expressions_scalar_ScalarAttribute
quickstep_queryexecution_QueryContext
quickstep_queryexecution_WorkOrderProtosContainer
quickstep_queryexecution_WorkOrdersContainer
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index 7394554..0e75411 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -31,6 +31,7 @@
#include "catalog/CatalogTypedefs.hpp"
#include "expressions/predicate/Predicate.hpp"
#include "expressions/scalar/Scalar.hpp"
+#include "expressions/scalar/ScalarAttribute.hpp"
#include "query_execution/QueryContext.hpp"
#include "query_execution/WorkOrderProtosContainer.hpp"
#include "query_execution/WorkOrdersContainer.hpp"
@@ -64,6 +65,9 @@ namespace quickstep {
namespace {
+typedef std::vector<std::pair<tuple_id, tuple_id>> VectorOfTupleIdPair;
+typedef std::pair<std::vector<tuple_id>, std::vector<tuple_id>> PairOfTupleIdVector;
+
// Functor passed to HashTable::getAllFromValueAccessor() to collect matching
// tuples from the inner relation. It stores matching tuple ID pairs
// in an unordered_map keyed by inner block ID and a vector of
@@ -83,8 +87,7 @@ class VectorsOfPairsJoinedTuplesCollector {
// key is inner block_id, values are vectors of joined tuple ID pairs with
// tuple ID from the inner block on the left and the outer block on the
// right.
- inline std::unordered_map<block_id, std::vector<std::pair<tuple_id, tuple_id>>>*
- getJoinedTuples() {
+ inline std::unordered_map<block_id, VectorOfTupleIdPair>* getJoinedTuples() {
return &joined_tuples_;
}
@@ -94,7 +97,7 @@ class VectorsOfPairsJoinedTuplesCollector {
// cross-product of all tuples from both blocks, but simply using pairs of
// tuple-IDs is expected to be more space efficient if the result set is less
// than 1/64 the cardinality of the cross-product.
- std::unordered_map<block_id, std::vector<std::pair<tuple_id, tuple_id>>> joined_tuples_;
+ std::unordered_map<block_id, VectorOfTupleIdPair> joined_tuples_;
};
// Another collector using an unordered_map keyed on inner block just like above,
@@ -107,15 +110,15 @@ class PairsOfVectorsJoinedTuplesCollector {
template <typename ValueAccessorT>
inline void operator()(const ValueAccessorT &accessor,
const TupleReference &tref) {
- joined_tuples_[tref.block].first.push_back(tref.tuple);
- joined_tuples_[tref.block].second.push_back(accessor.getCurrentPosition());
+ auto &entry = joined_tuples_[tref.block];
+ entry.first.emplace_back(tref.tuple);
+ entry.second.emplace_back(accessor.getCurrentPosition());
}
// Get a mutable pointer to the collected map of joined tuple ID pairs. The
// key is inner block_id, value is a pair consisting of
// inner block tuple IDs (first) and outer block tuple IDs (second).
- inline std::unordered_map< block_id, std::pair<std::vector<tuple_id>, std::vector<tuple_id>>>*
- getJoinedTuples() {
+ inline std::unordered_map<block_id, PairOfTupleIdVector>* getJoinedTuples() {
return &joined_tuples_;
}
@@ -166,12 +169,6 @@ class OuterJoinTupleCollector {
TupleIdSequence *filter_;
};
-// For InnerJoin.
-constexpr std::size_t kNumValueAccessors = 3u;
-constexpr std::size_t kBuildValueAccessorIndex = 0,
- kProbeValueAccessorIndex = 1u,
- kTempResultValueAccessorIndex = 2u;
-
} // namespace
bool HashJoinOperator::getAllWorkOrders(
@@ -473,16 +470,93 @@ void HashInnerJoinWorkOrder::execute() {
base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
}
+ if (probe_accessor->getImplementationType() == ValueAccessor::Implementation::kSplitRowStore) {
+ executeWithCopyElision(probe_accessor.get());
+ } else {
+ executeWithoutCopyElision(probe_accessor.get());
+ }
+}
+
+void HashInnerJoinWorkOrder::executeWithoutCopyElision(ValueAccessor *probe_accessor) {
+ VectorsOfPairsJoinedTuplesCollector collector;
+ if (join_key_attributes_.size() == 1) {
+ hash_table_.getAllFromValueAccessor(
+ probe_accessor,
+ join_key_attributes_.front(),
+ any_join_key_attributes_nullable_,
+ &collector);
+ } else {
+ hash_table_.getAllFromValueAccessorCompositeKey(
+ probe_accessor,
+ join_key_attributes_,
+ any_join_key_attributes_nullable_,
+ &collector);
+ }
+
+ const relation_id build_relation_id = build_relation_.getID();
+ const relation_id probe_relation_id = probe_relation_.getID();
+
+ for (std::pair<const block_id, VectorOfTupleIdPair>
+ &build_block_entry : *collector.getJoinedTuples()) {
+ BlockReference build_block =
+ storage_manager_->getBlock(build_block_entry.first, build_relation_);
+ const TupleStorageSubBlock &build_store = build_block->getTupleStorageSubBlock();
+ std::unique_ptr<ValueAccessor> build_accessor(build_store.createValueAccessor());
+
+ // Evaluate '*residual_predicate_', if any.
+ //
+ // TODO(chasseur): We might consider implementing true vectorized
+ // evaluation for join predicates that are not equijoins (although in
+ // general that would require evaluating and materializing some expressions
+ // over the cross-product of all tuples in a pair of blocks in order to
+ // evaluate the predicate). We could use a heuristic where we only do the
+ // vectorized materialization and evaluation if the set of matches from the
+ // hash join is below a reasonable threshold so that we don't blow up
+ // temporary memory requirements to an unreasonable degree.
+ if (residual_predicate_ != nullptr) {
+ VectorOfTupleIdPair filtered_matches;
+
+ for (const std::pair<tuple_id, tuple_id> &hash_match
+ : build_block_entry.second) {
+ if (residual_predicate_->matchesForJoinedTuples(*build_accessor,
+ build_relation_id,
+ hash_match.first,
+ *probe_accessor,
+ probe_relation_id,
+ hash_match.second)) {
+ filtered_matches.emplace_back(hash_match);
+ }
+ }
+
+ build_block_entry.second = std::move(filtered_matches);
+ }
+
+ ColumnVectorsValueAccessor temp_result;
+ for (auto selection_cit = selection_.begin();
+ selection_cit != selection_.end();
+ ++selection_cit) {
+ temp_result.addColumn((*selection_cit)->getAllValuesForJoin(build_relation_id,
+ build_accessor.get(),
+ probe_relation_id,
+ probe_accessor,
+ build_block_entry.second));
+ }
+
+ output_destination_->bulkInsertTuples(&temp_result);
+ }
+}
+
+void HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor *probe_accessor) {
PairsOfVectorsJoinedTuplesCollector collector;
if (join_key_attributes_.size() == 1) {
hash_table_.getAllFromValueAccessor(
- probe_accessor.get(),
+ probe_accessor,
join_key_attributes_.front(),
any_join_key_attributes_nullable_,
&collector);
} else {
hash_table_.getAllFromValueAccessorCompositeKey(
- probe_accessor.get(),
+ probe_accessor,
join_key_attributes_,
any_join_key_attributes_nullable_,
&collector);
@@ -491,7 +565,37 @@ void HashInnerJoinWorkOrder::execute() {
const relation_id build_relation_id = build_relation_.getID();
const relation_id probe_relation_id = probe_relation_.getID();
- for (std::pair<const block_id, std::pair<std::vector<tuple_id>, std::vector<tuple_id>>>
+ constexpr std::size_t kNumIndexes = 3u;
+ constexpr std::size_t kBuildIndex = 0, kProbeIndex = 1u, kTempIndex = 2u;
+
+ // Create a map of ValueAccessors and what attributes we want to pick from them.
+ std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map(
+ kNumIndexes, std::make_pair(nullptr /* late binding ValueAccessor */,
+ vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
+
+ std::vector<const Scalar *> non_trivial_expressions;
+ attribute_id dest_attr = 0;
+
+ for (const auto &scalar : selection_) {
+ // If the Scalar (column) is not an attribute in build/probe blocks, we will
+ // insert it into a ColumnVectorsValueAccessor.
+ if (scalar->getDataSource() != Scalar::ScalarDataSource::kAttribute) {
+ // Current destination attribute maps to the column we'll create now.
+ accessor_attribute_map[kTempIndex].second[dest_attr] = non_trivial_expressions.size();
+ non_trivial_expressions.emplace_back(scalar.get());
+ } else {
+ const CatalogAttribute &attr = static_cast<const ScalarAttribute *>(scalar.get())->getAttribute();
+ const attribute_id attr_id = attr.getID();
+ if (attr.getParent().getID() == build_relation_id) {
+ accessor_attribute_map[kBuildIndex].second[dest_attr] = attr_id;
+ } else {
+ accessor_attribute_map[kProbeIndex].second[dest_attr] = attr_id;
+ }
+ }
+ ++dest_attr;
+ }
+
+ for (std::pair<const block_id, PairOfTupleIdVector>
&build_block_entry : *collector.getJoinedTuples()) {
BlockReference build_block =
storage_manager_->getBlock(build_block_entry.first, build_relation_);
@@ -511,7 +615,8 @@ void HashInnerJoinWorkOrder::execute() {
// hash join is below a reasonable threshold so that we don't blow up
// temporary memory requirements to an unreasonable degree.
if (residual_predicate_ != nullptr) {
- std::pair<std::vector<tuple_id>, std::vector<tuple_id>> filtered_matches;
+ PairOfTupleIdVector filtered_matches;
+
for (std::size_t i = 0; i < build_tids.size(); ++i) {
if (residual_predicate_->matchesForJoinedTuples(*build_accessor,
build_relation_id,
@@ -519,110 +624,51 @@ void HashInnerJoinWorkOrder::execute() {
*probe_accessor,
probe_relation_id,
probe_tids[i])) {
- filtered_matches.first.push_back(build_tids[i]);
- filtered_matches.second.push_back(probe_tids[i]);
+ filtered_matches.first.emplace_back(build_tids[i]);
+ filtered_matches.second.emplace_back(probe_tids[i]);
}
}
build_block_entry.second = std::move(filtered_matches);
}
- // TODO(chasseur): If all the output expressions are ScalarAttributes,
- // we could implement a similar fast-path to StorageBlock::selectSimple()
- // that avoids a copy.
- //
// TODO(chasseur): See TODO in NestedLoopsJoinOperator.cpp about limiting
// the size of materialized temporary results. In common usage, this
// probably won't be an issue for hash-joins, but in the worst case a hash
// join can still devolve into a cross-product.
- //
- // NOTE(chasseur): We could also create one big ColumnVectorsValueAccessor
- // and accumulate all the results across multiple block pairs into it
- // before inserting anything into output blocks, but this would require
- // some significant API extensions to the expressions system for a dubious
- // benefit (probably only a real performance win when there are very few
- // matching tuples in each individual inner block but very many inner
- // blocks with at least one match).
-
- // We now create ordered value accessors for both build and probe side,
- // using the joined tuple TIDs. Note that we have to use this Lambda-based
- // invocation method here because the accessors don't have a virtual
- // function that creates such an OrderedTupleIdSequenceAdapterValueAccessor.
- std::unique_ptr<ValueAccessor> ordered_build_accessor, ordered_probe_accessor;
- InvokeOnValueAccessorNotAdapter(
- build_accessor.get(),
- [&](auto *accessor) -> void { // NOLINT(build/c++11)
- ordered_build_accessor.reset(
- accessor->createSharedOrderedTupleIdSequenceAdapter(build_tids));
- });
-
- if (probe_accessor->isTupleIdSequenceAdapter()) {
- InvokeOnTupleIdSequenceAdapterValueAccessor(
- probe_accessor.get(),
- [&](auto *accessor) -> void { // NOLINT(build/c++11)
- ordered_probe_accessor.reset(
- accessor->createSharedOrderedTupleIdSequenceAdapter(probe_tids));
- });
- } else {
- InvokeOnValueAccessorNotAdapter(
- probe_accessor.get(),
- [&](auto *accessor) -> void { // NOLINT(build/c++11)
- ordered_probe_accessor.reset(
- accessor->createSharedOrderedTupleIdSequenceAdapter(probe_tids));
- });
- }
// We also need a temp value accessor to store results of any scalar expressions.
ColumnVectorsValueAccessor temp_result;
+ if (!non_trivial_expressions.empty()) {
+ // The getAllValuesForJoin function below needs joined tuple IDs as a
+ // vector of pair of (build-tuple-ID, probe-tuple-ID), and we have a pair
+ // of (build-tuple-IDs-vector, probe-tuple-IDs-vector). So we'll have to
+ // zip our two vectors together.
+ VectorOfTupleIdPair zipped_joined_tuple_ids;
+ for (std::size_t i = 0; i < build_tids.size(); ++i) {
+ zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]);
+ }
- // Create a map of ValueAccessors and what attributes we want to pick from them
- std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map(
- kNumValueAccessors, std::make_pair(nullptr, // A late binding ValueAccessor.
- vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
-
- accessor_attribute_map[kBuildValueAccessorIndex].first = ordered_build_accessor.get();
- accessor_attribute_map[kProbeValueAccessorIndex].first = ordered_probe_accessor.get();
- accessor_attribute_map[kTempResultValueAccessorIndex].first = &temp_result;
-
- attribute_id dest_attr = 0;
- for (auto &selection_cit : selection_) {
- // If the Scalar (column) is not an attribute in build/probe blocks, then
- // insert it into a ColumnVectorsValueAccessor.
- if (selection_cit->getDataSource() != Scalar::ScalarDataSource::kAttribute) {
- // Current destination attribute maps to the column we'll create now.
- accessor_attribute_map[kTempResultValueAccessorIndex].second[dest_attr] = temp_result.getNumColumns();
-
- std::vector<std::pair<tuple_id, tuple_id>> zipped_joined_tuple_ids;
- if (temp_result.getNumColumns() == 0) {
- // The getAllValuesForJoin function below needs joined tuple IDs as
- // a vector of pair of (build-tuple-ID, probe-tuple-ID), and we have
- // a pair of (build-tuple-IDs-vector, probe-tuple-IDs-vector). So
- // we'll have to zip our two vectors together. We do this inside
- // the loop because most queries don't exercise this code since
- // they don't have scalar expressions with attributes from both
- // build and probe relations (other expressions would have been
- // pushed down to before the join).
- for (std::size_t i = 0; i < build_tids.size(); ++i) {
- zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]);
- }
- }
- temp_result.addColumn(
- selection_cit
- ->getAllValuesForJoin(build_relation_id, build_accessor.get(),
- probe_relation_id, probe_accessor.get(),
- zipped_joined_tuple_ids));
- } else {
- const CatalogAttribute &attr = static_cast<const ScalarAttribute *>(selection_cit.get())->getAttribute();
- const attribute_id attr_id = attr.getID();
- if (attr.getParent().getID() == build_relation_id) {
- accessor_attribute_map[kBuildValueAccessorIndex].second[dest_attr] = attr_id;
- } else {
- accessor_attribute_map[kProbeValueAccessorIndex].second[dest_attr] = attr_id;
- }
+ for (const Scalar *scalar : non_trivial_expressions) {
+ temp_result.addColumn(scalar->getAllValuesForJoin(build_relation_id,
+ build_accessor.get(),
+ probe_relation_id,
+ probe_accessor,
+ zipped_joined_tuple_ids));
}
- ++dest_attr;
}
+ // We now create ordered value accessors for both build and probe side,
+ // using the joined tuple IDs.
+ std::unique_ptr<ValueAccessor> ordered_build_accessor(
+ build_accessor->createSharedOrderedTupleIdSequenceAdapterVirtual(build_tids));
+ std::unique_ptr<ValueAccessor> ordered_probe_accessor(
+ probe_accessor->createSharedOrderedTupleIdSequenceAdapterVirtual(probe_tids));
+
+ accessor_attribute_map[kBuildIndex].first = ordered_build_accessor.get();
+ accessor_attribute_map[kProbeIndex].first = ordered_probe_accessor.get();
+ accessor_attribute_map[kTempIndex].first = &temp_result;
+
// NOTE(chasseur): calling the bulk-insert method of InsertDestination once
// for each pair of joined blocks incurs some extra overhead that could be
// avoided by keeping checked-out MutableBlockReferences across iterations
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index acfe3d2..5e9c5d8 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -423,6 +423,10 @@ class HashInnerJoinWorkOrder : public WorkOrder {
}
private:
+ void executeWithoutCopyElision(ValueAccessor *probe_accesor);
+
+ void executeWithCopyElision(ValueAccessor *probe_accessor);
+
const CatalogRelationSchema &build_relation_;
const CatalogRelationSchema &probe_relation_;
const std::vector<attribute_id> join_key_attributes_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/storage/SplitRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/SplitRowStoreValueAccessor.hpp b/storage/SplitRowStoreValueAccessor.hpp
index 951a20a..46367b3 100644
--- a/storage/SplitRowStoreValueAccessor.hpp
+++ b/storage/SplitRowStoreValueAccessor.hpp
@@ -318,6 +318,11 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
return createSharedTupleIdSequenceAdapter(id_sequence);
}
+ ValueAccessor* createSharedOrderedTupleIdSequenceAdapterVirtual(
+ const OrderedTupleIdSequence &id_sequence) override {
+ return createSharedOrderedTupleIdSequenceAdapter(id_sequence);
+ }
+
const TupleIdSequence* getTupleIdSequenceVirtual() const override {
return getTupleIdSequence();
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/storage/ValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/ValueAccessor.hpp b/storage/ValueAccessor.hpp
index 654bbf9..f183efe 100644
--- a/storage/ValueAccessor.hpp
+++ b/storage/ValueAccessor.hpp
@@ -305,6 +305,21 @@ class ValueAccessor {
const TupleIdSequence &id_sequence) = 0;
/**
+ * @brief Create a new OrderedTupleIdSequenceAdapterValueAccessor that wraps
+ * this ValueAccessor.
+ * @warning The newly-created adapter does NOT take ownership of this
+ * ValueAccessor nor the provided OrderedTupleIdSequence. Both must
+ * remain valid so long as the adapter will be used.
+ *
+ * @param id_sequence An OrderedTupleIdSequence specifying some subset of the
+ * tuples for this ValueAccessor that the adapter will iterate over.
+ * @return A new OrderedTupleIdSequenceAdapterValueAccessor that will iterate
+ * over only the tuples specified in id_sequence.
+ **/
+ virtual ValueAccessor* createSharedOrderedTupleIdSequenceAdapterVirtual(
+ const OrderedTupleIdSequence &id_sequence) = 0;
+
+ /**
* @brief Get a TupleIdSequence indicating which positions this ValueAccessor
* is iterating over.
*
@@ -512,6 +527,11 @@ class TupleIdSequenceAdapterValueAccessor : public ValueAccessor {
return createSharedTupleIdSequenceAdapter(id_sequence);
}
+ ValueAccessor* createSharedOrderedTupleIdSequenceAdapterVirtual(
+ const OrderedTupleIdSequence &id_sequence) override {
+ return createSharedOrderedTupleIdSequenceAdapter(id_sequence);
+ }
+
const TupleIdSequence* getTupleIdSequenceVirtual() const override {
return getTupleIdSequence();
}
@@ -718,6 +738,11 @@ class OrderedTupleIdSequenceAdapterValueAccessor : public ValueAccessor {
return createSharedTupleIdSequenceAdapter(id_sequence);
}
+ ValueAccessor* createSharedOrderedTupleIdSequenceAdapterVirtual(
+ const OrderedTupleIdSequence &id_sequence) override {
+ return createSharedOrderedTupleIdSequenceAdapter(id_sequence);
+ }
+
const TupleIdSequence* getTupleIdSequenceVirtual() const override {
return getTupleIdSequence();
}
@@ -944,6 +969,11 @@ class PackedTupleStorageSubBlockValueAccessor : public ValueAccessor {
return createSharedTupleIdSequenceAdapter(id_sequence);
}
+ ValueAccessor* createSharedOrderedTupleIdSequenceAdapterVirtual(
+ const OrderedTupleIdSequence &id_sequence) override {
+ return createSharedOrderedTupleIdSequenceAdapter(id_sequence);
+ }
+
const TupleIdSequence* getTupleIdSequenceVirtual() const override {
return getTupleIdSequence();
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6d83b46a/types/containers/ColumnVectorsValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVectorsValueAccessor.hpp b/types/containers/ColumnVectorsValueAccessor.hpp
index fbbdc1b..6dc1124 100644
--- a/types/containers/ColumnVectorsValueAccessor.hpp
+++ b/types/containers/ColumnVectorsValueAccessor.hpp
@@ -290,6 +290,11 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
return createSharedTupleIdSequenceAdapter(id_sequence);
}
+ ValueAccessor* createSharedOrderedTupleIdSequenceAdapterVirtual(
+ const OrderedTupleIdSequence &id_sequence) override {
+ return createSharedOrderedTupleIdSequenceAdapter(id_sequence);
+ }
+
const TupleIdSequence* getTupleIdSequenceVirtual() const override {
return getTupleIdSequence();
}
[2/4] incubator-quickstep git commit: Minor refactor for
HashJoinInnerJoin.
Posted by zu...@apache.org.
Minor refactor for HashJoinInnerJoin.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/23e14b8e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/23e14b8e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/23e14b8e
Branch: refs/heads/push-down-disjunctive-predicate
Commit: 23e14b8e078f42a8d3e5f6c0c4885dee271d99aa
Parents: f2e7726
Author: Zuyu Zhang <zu...@apache.org>
Authored: Mon Jan 30 15:28:49 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Mon Jan 30 20:21:23 2017 -0800
----------------------------------------------------------------------
relational_operators/CMakeLists.txt | 1 +
relational_operators/HashJoinOperator.cpp | 42 ++++++++++++++------------
2 files changed, 23 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/23e14b8e/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index c1caaa3..b2e08cf 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -199,6 +199,7 @@ target_link_libraries(quickstep_relationaloperators_FinalizeAggregationOperator
target_link_libraries(quickstep_relationaloperators_HashJoinOperator
${GFLAGS_LIB_NAME}
glog
+ quickstep_catalog_CatalogAttribute
quickstep_catalog_CatalogRelation
quickstep_catalog_CatalogRelationSchema
quickstep_catalog_CatalogTypedefs
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/23e14b8e/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index fd3841f..7394554 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -25,6 +25,7 @@
#include <utility>
#include <vector>
+#include "catalog/CatalogAttribute.hpp"
#include "catalog/CatalogRelation.hpp"
#include "catalog/CatalogRelationSchema.hpp"
#include "catalog/CatalogTypedefs.hpp"
@@ -165,6 +166,12 @@ class OuterJoinTupleCollector {
TupleIdSequence *filter_;
};
+// For InnerJoin.
+constexpr std::size_t kNumValueAccessors = 3u;
+constexpr std::size_t kBuildValueAccessorIndex = 0,
+ kProbeValueAccessorIndex = 1u,
+ kTempResultValueAccessorIndex = 2u;
+
} // namespace
bool HashJoinOperator::getAllWorkOrders(
@@ -565,31 +572,27 @@ void HashInnerJoinWorkOrder::execute() {
});
}
-
// We also need a temp value accessor to store results of any scalar expressions.
ColumnVectorsValueAccessor temp_result;
// Create a map of ValueAccessors and what attributes we want to pick from them
- std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map;
- const std::vector<ValueAccessor *> accessors{
- ordered_build_accessor.get(), ordered_probe_accessor.get(), &temp_result};
- const unsigned int build_index = 0, probe_index = 1, temp_index = 2;
- for (auto &accessor : accessors) {
- accessor_attribute_map.push_back(std::make_pair(
- accessor,
- std::vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
- }
+ std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map(
+ kNumValueAccessors, std::make_pair(nullptr, // A late binding ValueAccessor.
+ vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
- attribute_id dest_attr = 0;
- std::vector<std::pair<tuple_id, tuple_id>> zipped_joined_tuple_ids;
+ accessor_attribute_map[kBuildValueAccessorIndex].first = ordered_build_accessor.get();
+ accessor_attribute_map[kProbeValueAccessorIndex].first = ordered_probe_accessor.get();
+ accessor_attribute_map[kTempResultValueAccessorIndex].first = &temp_result;
+ attribute_id dest_attr = 0;
for (auto &selection_cit : selection_) {
// If the Scalar (column) is not an attribute in build/probe blocks, then
// insert it into a ColumnVectorsValueAccessor.
if (selection_cit->getDataSource() != Scalar::ScalarDataSource::kAttribute) {
// Current destination attribute maps to the column we'll create now.
- accessor_attribute_map[temp_index].second[dest_attr] = temp_result.getNumColumns();
+ accessor_attribute_map[kTempResultValueAccessorIndex].second[dest_attr] = temp_result.getNumColumns();
+ std::vector<std::pair<tuple_id, tuple_id>> zipped_joined_tuple_ids;
if (temp_result.getNumColumns() == 0) {
// The getAllValuesForJoin function below needs joined tuple IDs as
// a vector of pair of (build-tuple-ID, probe-tuple-ID), and we have
@@ -599,9 +602,8 @@ void HashInnerJoinWorkOrder::execute() {
// they don't have scalar expressions with attributes from both
// build and probe relations (other expressions would have been
// pushed down to before the join).
- zipped_joined_tuple_ids.reserve(build_tids.size());
for (std::size_t i = 0; i < build_tids.size(); ++i) {
- zipped_joined_tuple_ids.push_back(std::make_pair(build_tids[i], probe_tids[i]));
+ zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]);
}
}
temp_result.addColumn(
@@ -610,12 +612,12 @@ void HashInnerJoinWorkOrder::execute() {
probe_relation_id, probe_accessor.get(),
zipped_joined_tuple_ids));
} else {
- auto scalar_attr = static_cast<const ScalarAttribute *>(selection_cit.get());
- const attribute_id attr_id = scalar_attr->getAttribute().getID();
- if (scalar_attr->getAttribute().getParent().getID() == build_relation_id) {
- accessor_attribute_map[build_index].second[dest_attr] = attr_id;
+ const CatalogAttribute &attr = static_cast<const ScalarAttribute *>(selection_cit.get())->getAttribute();
+ const attribute_id attr_id = attr.getID();
+ if (attr.getParent().getID() == build_relation_id) {
+ accessor_attribute_map[kBuildValueAccessorIndex].second[dest_attr] = attr_id;
} else {
- accessor_attribute_map[probe_index].second[dest_attr] = attr_id;
+ accessor_attribute_map[kProbeValueAccessorIndex].second[dest_attr] = attr_id;
}
}
++dest_attr;