You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/08/04 01:56:30 UTC
[01/13] incubator-quickstep git commit: Refactored getProto in
CatalogRelation. [Forced Update!]
Repository: incubator-quickstep
Updated Branches:
refs/heads/LIP-for-tpch 97d8dca85 -> 5e22b396c (forced update)
Refactored getProto in CatalogRelation.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/e53186e5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/e53186e5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/e53186e5
Branch: refs/heads/LIP-for-tpch
Commit: e53186e5547ac619a67aabdfacbb1abdbca78b60
Parents: aaecc76
Author: Zuyu Zhang <zu...@twitter.com>
Authored: Sat Jul 30 06:29:05 2016 -0700
Committer: Zuyu Zhang <zu...@twitter.com>
Committed: Sat Jul 30 06:29:05 2016 -0700
----------------------------------------------------------------------
catalog/CatalogRelation.cpp | 16 +---------------
1 file changed, 1 insertion(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/e53186e5/catalog/CatalogRelation.cpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelation.cpp b/catalog/CatalogRelation.cpp
index 01aebb5..682b6be 100644
--- a/catalog/CatalogRelation.cpp
+++ b/catalog/CatalogRelation.cpp
@@ -143,21 +143,7 @@ CatalogRelation::CatalogRelation(const serialization::CatalogRelationSchema &pro
}
serialization::CatalogRelationSchema CatalogRelation::getProto() const {
- serialization::CatalogRelationSchema proto;
-
- proto.set_relation_id(id_);
- proto.set_name(name_);
- proto.set_temporary(temporary_);
-
- for (PtrVector<CatalogAttribute, true>::const_iterator it = attr_vec_.begin();
- it != attr_vec_.end();
- ++it) {
- if (it.isNull()) {
- proto.add_attributes();
- } else {
- proto.add_attributes()->MergeFrom(it->getProto());
- }
- }
+ serialization::CatalogRelationSchema proto = CatalogRelationSchema::getProto();
proto.MutableExtension(serialization::CatalogRelation::default_layout)
->MergeFrom(getDefaultStorageBlockLayout().getDescription());
[11/13] incubator-quickstep git commit: Initial commit
Posted by ji...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/rules/AttachBloomFilters.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachBloomFilters.cpp b/query_optimizer/rules/AttachBloomFilters.cpp
new file mode 100644
index 0000000..03a42a0
--- /dev/null
+++ b/query_optimizer/rules/AttachBloomFilters.cpp
@@ -0,0 +1,308 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#include "query_optimizer/rules/AttachBloomFilters.hpp"
+
+#include <memory>
+#include <set>
+#include <unordered_set>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr AttachBloomFilters::apply(const P::PhysicalPtr &input) {
+ DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+ cost_model_.reset(
+ new cost::StarSchemaSimpleCostModel(
+ std::static_pointer_cast<const P::TopLevelPlan>(input)->shared_subplans()));
+
+ visitProducer(input, 0);
+ visitConsumer(input);
+
+// for (const auto &info_vec_pair : consumers_) {
+// std::cerr << "--------\n"
+// << "Node " << info_vec_pair.first->getName()
+// << " " << info_vec_pair.first << "\n";
+//
+// for (const auto &info : info_vec_pair.second) {
+// std::cerr << info.attribute->attribute_alias();
+// if (info.attribute->id() != info.source_attribute->id()) {
+// std::cerr << "{FROM " << info.source_attribute->attribute_alias() << "}";
+// }
+// if (info.from_sibling) {
+// std::cerr << " sibling";
+// }
+// std::cerr << " @" << info.source << "[" << info.depth << "]"
+// << ": " << info.selectivity << "\n";
+// }
+// std::cerr << "********\n";
+// }
+
+ return visitAndAttach(input);
+}
+
+void AttachBloomFilters::visitProducer(const P::PhysicalPtr &node, const int depth) {
+ for (const P::PhysicalPtr &child : node->children()) {
+ visitProducer(child, depth+1);
+ }
+
+ std::vector<BloomFilterInfo> bloom_filters;
+
+ if (node->getPhysicalType() == P::PhysicalType::kHashJoin) {
+ const P::HashJoinPtr &hash_join =
+ std::static_pointer_cast<const P::HashJoin>(node);
+ const P::PhysicalPtr &build_node = hash_join->right();
+ double selectivity = cost_model_->estimateSelectivity(build_node);
+ if (selectivity < 1.0) {
+ auto &build_node_info = producers_[build_node];
+ for (const auto &attr : hash_join->right_join_attributes()) {
+ build_node_info.emplace_back(node, attr, depth, selectivity, false);
+ }
+ }
+ }
+
+ const std::vector<E::AttributeReferencePtr> output_attributes(
+ node->getOutputAttributes());
+ std::unordered_set<E::ExprId> output_attribute_ids;
+ for (const auto &attr : output_attributes) {
+ output_attribute_ids.emplace(attr->id());
+ }
+
+ // First check inherited bloom filters
+ std::vector<const BloomFilterInfo*> candidates;
+ switch (node->getPhysicalType()) {
+ case P::PhysicalType::kAggregate:
+ case P::PhysicalType::kSelection:
+ case P::PhysicalType::kHashJoin: {
+ for (const P::PhysicalPtr &child : node->children()) {
+ for (const BloomFilterInfo &info : producers_[child]) {
+ candidates.emplace_back(&info);
+ }
+ }
+ }
+ default:
+ break;
+ }
+
+ for (const BloomFilterInfo *info : candidates) {
+ if (output_attribute_ids.find(info->attribute->id()) != output_attribute_ids.end()) {
+ bloom_filters.emplace_back(
+ info->source, info->attribute, info->depth, info->selectivity, false);
+ }
+ }
+
+ // Self-produced bloom filters
+// double selectivity = cost_model_->estimateSelectivity(node);
+// if (selectivity < 1.0) {
+// for (const auto &attr : output_attributes) {
+// bloom_filters.emplace_back(node, attr, depth, selectivity, false);
+// }
+// }
+
+ producers_.emplace(node, std::move(bloom_filters));
+}
+
+void AttachBloomFilters::visitConsumer(const P::PhysicalPtr &node) {
+ std::vector<BloomFilterInfo> bloom_filters;
+
+ // Bloom filters from parent
+ const auto &parent_bloom_filters = consumers_[node];
+ if (!parent_bloom_filters.empty()) {
+ for (const auto &child : node->children()) {
+ std::unordered_set<E::ExprId> child_output_attribute_ids;
+ for (const auto &attr : child->getOutputAttributes()) {
+ child_output_attribute_ids.emplace(attr->id());
+ }
+
+ std::vector<BloomFilterInfo> bloom_filters;
+ for (const auto &info : parent_bloom_filters) {
+ if (child_output_attribute_ids.find(info.attribute->id())
+ != child_output_attribute_ids.end()) {
+ bloom_filters.emplace_back(info.source,
+ info.attribute,
+ info.depth,
+ info.selectivity,
+ false,
+ info.source_attribute);
+ }
+ }
+ consumers_.emplace(child, std::move(bloom_filters));
+ }
+ }
+
+ // Bloom filters from build side to probe side via HashJoin
+ if (node->getPhysicalType() == P::PhysicalType::kHashJoin) {
+ const P::HashJoinPtr hash_join =
+ std::static_pointer_cast<const P::HashJoin>(node);
+ if (hash_join->join_type() == P::HashJoin::JoinType::kInnerJoin ||
+ hash_join->join_type() == P::HashJoin::JoinType::kLeftSemiJoin) {
+ const P::PhysicalPtr &producer_child = hash_join->right();
+ const P::PhysicalPtr &consumer_child = hash_join->left();
+ std::unordered_map<E::ExprId, E::AttributeReferencePtr> join_attribute_pairs;
+ for (std::size_t i = 0; i < hash_join->left_join_attributes().size(); ++i) {
+ const E::AttributeReferencePtr probe_join_attribute =
+ hash_join->left_join_attributes()[i];
+ const E::AttributeReferencePtr build_join_attribute =
+ hash_join->right_join_attributes()[i];
+ join_attribute_pairs.emplace(build_join_attribute->id(),
+ probe_join_attribute);
+ }
+
+ auto &consumer_bloom_filters = consumers_[consumer_child];
+ for (const auto &info : producers_[producer_child]) {
+ const auto pair_it = join_attribute_pairs.find(info.attribute->id());
+ if (pair_it != join_attribute_pairs.end()) {
+ consumer_bloom_filters.emplace_back(info.source,
+ pair_it->second,
+ info.depth,
+ info.selectivity,
+ true,
+ info.attribute);
+ }
+ }
+ }
+ }
+
+ P::PhysicalPtr consumer_child = nullptr;
+ if (node->getPhysicalType() == P::PhysicalType::kHashJoin) {
+ consumer_child = std::static_pointer_cast<const P::HashJoin>(node)->left();
+ }
+ if (node->getPhysicalType() == P::PhysicalType::kAggregate) {
+ consumer_child = std::static_pointer_cast<const P::Aggregate>(node)->input();
+ }
+
+ if (consumer_child != nullptr) {
+ // Decide attaches
+ auto &consumer_bloom_filters = consumers_[consumer_child];
+ if (cost_model_->estimateCardinality(consumer_child) > 10000000 &&
+ !consumer_bloom_filters.empty()) {
+ std::map<E::AttributeReferencePtr, const BloomFilterInfo*> filters;
+ for (const auto &info : consumer_bloom_filters) {
+ auto it = filters.find(info.attribute);
+ if (it == filters.end()) {
+ filters.emplace(info.attribute, &info);
+ } else {
+ if (BloomFilterInfo::isBetterThan(&info, it->second)) {
+ it->second = &info;
+ }
+ }
+ }
+
+ auto &probe_attaches = getBloomFilterConfig(node);
+ for (const auto &pair : filters) {
+ auto &build_attaches = getBloomFilterConfig(pair.second->source);
+ build_attaches.addBuildSideBloomFilter(
+ pair.second->source_attribute);
+ probe_attaches.addProbeSideBloomFilter(
+ pair.first,
+ pair.second->source_attribute,
+ pair.second->source);
+ }
+ }
+ }
+
+ for (const auto &child : node->children()) {
+ visitConsumer(child);
+ }
+}
+
+P::PhysicalPtr AttachBloomFilters::visitAndAttach(const physical::PhysicalPtr &node) {
+ std::vector<P::PhysicalPtr> new_children;
+ bool has_changed = false;
+ for (const auto &child : node->children()) {
+ P::PhysicalPtr new_child = visitAndAttach(child);
+ if (new_child != child) {
+ has_changed = true;
+ }
+ new_children.emplace_back(new_child);
+ }
+
+ if (node->getPhysicalType() == P::PhysicalType::kHashJoin) {
+ const auto attach_it = attaches_.find(node);
+ if (attach_it != attaches_.end()) {
+// for (const auto& item : attach_it->second.probe_side_bloom_filters) {
+// std::cout << "Attach probe from " << item.builder
+// << " to " << node << "\n";
+// }
+
+ const P::HashJoinPtr hash_join =
+ std::static_pointer_cast<const P::HashJoin>(node);
+ return P::HashJoin::Create(
+ new_children[0],
+ new_children[1],
+ hash_join->left_join_attributes(),
+ hash_join->right_join_attributes(),
+ hash_join->residual_predicate(),
+ hash_join->project_expressions(),
+ hash_join->join_type(),
+ attach_it->second);
+ }
+ }
+
+ if (node->getPhysicalType() == P::PhysicalType::kAggregate) {
+ const auto attach_it = attaches_.find(node);
+ if (attach_it != attaches_.end()) {
+// for (const auto& item : attach_it->second.probe_side_bloom_filters) {
+// std::cout << "Attach probe from " << item.builder
+// << " to " << node << "\n";
+// }
+
+ const P::AggregatePtr aggregate =
+ std::static_pointer_cast<const P::Aggregate>(node);
+ return P::Aggregate::Create(
+ aggregate->input(),
+ aggregate->grouping_expressions(),
+ aggregate->aggregate_expressions(),
+ aggregate->filter_predicate(),
+ attach_it->second);
+ }
+ }
+
+ if (has_changed) {
+ return node->copyWithNewChildren(new_children);
+ }
+
+ return node;
+}
+
+P::BloomFilterConfig& AttachBloomFilters::getBloomFilterConfig(const physical::PhysicalPtr &node) {
+ if (attaches_.find(node) == attaches_.end()) {
+ attaches_.emplace(node, node);
+ }
+ return attaches_[node];
+}
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/rules/AttachBloomFilters.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachBloomFilters.hpp b/query_optimizer/rules/AttachBloomFilters.hpp
new file mode 100644
index 0000000..e4437f7
--- /dev/null
+++ b/query_optimizer/rules/AttachBloomFilters.hpp
@@ -0,0 +1,118 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_BLOOM_FILTERS_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_BLOOM_FILTERS_HPP_
+
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+/**
+ * @brief TODO
+ */
+class AttachBloomFilters : public Rule<physical::Physical> {
+ public:
+ AttachBloomFilters() {}
+
+ ~AttachBloomFilters() override {}
+
+ std::string getName() const override {
+ return "AttachBloomFilters";
+ }
+
+ physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+ struct BloomFilterInfo {
+ BloomFilterInfo(const physical::PhysicalPtr &source_in,
+ const expressions::AttributeReferencePtr &attribute_in,
+ const int depth_in,
+ const double selectivity_in,
+ const bool from_sibling_in,
+ const expressions::AttributeReferencePtr &source_attribute_in = nullptr)
+ : source(source_in),
+ attribute(attribute_in),
+ depth(depth_in),
+ selectivity(selectivity_in),
+ from_sibling(from_sibling_in),
+ source_attribute(
+ source_attribute_in == nullptr
+ ? attribute_in
+ : source_attribute_in) {
+
+ }
+ static bool isBetterThan(const BloomFilterInfo *a,
+ const BloomFilterInfo *b) {
+ if (a->selectivity == b->selectivity) {
+ return a->depth > b->depth;
+ } else {
+ return a->selectivity < b->selectivity;
+ }
+ }
+ physical::PhysicalPtr source;
+ expressions::AttributeReferencePtr attribute;
+ int depth;
+ double selectivity;
+ bool from_sibling;
+ expressions::AttributeReferencePtr source_attribute;
+ };
+
+ void visitProducer(const physical::PhysicalPtr &node, const int depth);
+
+ void visitConsumer(const physical::PhysicalPtr &node);
+
+ physical::PhysicalPtr visitAndAttach(const physical::PhysicalPtr &node);
+
+ physical::BloomFilterConfig &getBloomFilterConfig(const physical::PhysicalPtr &node);
+
+ std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+
+ std::map<physical::PhysicalPtr, std::vector<BloomFilterInfo>> producers_;
+ std::map<physical::PhysicalPtr, std::vector<BloomFilterInfo>> consumers_;
+ std::map<physical::PhysicalPtr, physical::BloomFilterConfig> attaches_;
+
+ DISALLOW_COPY_AND_ASSIGN(AttachBloomFilters);
+};
+
+/** @} */
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif /* QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_BLOOM_FILTERS_HPP_ */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 04a9814..6b248f4 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -18,6 +18,7 @@
add_subdirectory(tests)
# Declare micro-libs:
+add_library(quickstep_queryoptimizer_rules_AttachBloomFilters AttachBloomFilters.cpp AttachBloomFilters.hpp)
add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp)
add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp)
add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
@@ -36,6 +37,20 @@ add_library(quickstep_queryoptimizer_rules_UnnestSubqueries UnnestSubqueries.cpp
# Link dependencies:
+target_link_libraries(quickstep_queryoptimizer_rules_AttachBloomFilters
+ quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExprId
+ quickstep_queryoptimizer_expressions_NamedExpression
+ quickstep_queryoptimizer_expressions_PatternMatcher
+ quickstep_queryoptimizer_expressions_Predicate
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_TopLevelPlan
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_BottomUpRule
glog
quickstep_queryoptimizer_rules_Rule
@@ -127,6 +142,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOpti
quickstep_queryoptimizer_physical_PhysicalType
quickstep_queryoptimizer_physical_TopLevelPlan
quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_DisjointTreeForest
quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_SwapProbeBuild
quickstep_queryoptimizer_costmodel_SimpleCostModel
@@ -187,6 +203,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_UpdateExpression
# Module all-in-one library:
add_library(quickstep_queryoptimizer_rules ../../empty_src.cpp OptimizerRulesModule.hpp)
target_link_libraries(quickstep_queryoptimizer_rules
+ quickstep_queryoptimizer_rules_AttachBloomFilters
quickstep_queryoptimizer_rules_BottomUpRule
quickstep_queryoptimizer_rules_CollapseProject
quickstep_queryoptimizer_rules_GenerateJoins
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
index 9770606..cfbb5d1 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
@@ -31,6 +31,7 @@
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/PhysicalType.hpp"
#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "utility/DisjointTreeForest.hpp"
#include "glog/logging.h"
@@ -72,6 +73,9 @@ P::PhysicalPtr StarSchemaHashJoinOrderOptimization::applyInternal(const P::Physi
JoinGroupInfo *join_group = nullptr;
if (parent_join_group == nullptr || !is_valid_cascading_hash_join) {
new_join_group.reset(new JoinGroupInfo());
+ for (const auto &attr : input->getReferencedAttributes()) {
+ new_join_group->referenced_attributes.emplace(attr->id());
+ }
join_group = new_join_group.get();
} else {
join_group = parent_join_group;
@@ -144,7 +148,10 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
i,
tables[i],
cost_model_->estimateCardinality(tables[i]),
- cost_model_->estimateSelectivity(tables[i]));
+ cost_model_->estimateSelectivity(tables[i]),
+ CountSharedAttributes(join_group.referenced_attributes,
+ tables[i]->getOutputAttributes()),
+ tables[i]->getPhysicalType() == physical::PhysicalType::kAggregate);
}
// Auxiliary mapping info.
@@ -161,9 +168,19 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
}
}
- // Create a join graph where tables are vertices, and add an edge between vertices
- // t1 and t2 for each join predicate t1.x = t2.y
- std::vector<std::unordered_set<std::size_t>> join_graph(table_info_storage.size());
+ std::set<TableInfo*> remaining_tables;
+ for (auto &table_info : table_info_storage) {
+ remaining_tables.emplace(&table_info);
+ }
+
+ DisjointTreeForest<E::ExprId> join_attribute_forest;
+ for (const auto &attr_id_pair : join_group.join_attribute_pairs) {
+ join_attribute_forest.makeSet(attr_id_pair.first);
+ join_attribute_forest.makeSet(attr_id_pair.second);
+ join_attribute_forest.merge(attr_id_pair.first, attr_id_pair.second);
+ }
+
+ std::map<std::size_t, std::map<std::size_t, E::ExprId>> join_attribute_groups;
for (const auto &attr_id_pair : join_group.join_attribute_pairs) {
DCHECK(attribute_id_to_table_info_index_map.find(attr_id_pair.first)
!= attribute_id_to_table_info_index_map.end());
@@ -176,128 +193,169 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
attribute_id_to_table_info_index_map[attr_id_pair.second];
DCHECK_NE(first_table_idx, second_table_idx);
- table_info_storage[first_table_idx].join_attribute_pairs.emplace(
- attr_id_pair.first, attr_id_pair.second);
- table_info_storage[second_table_idx].join_attribute_pairs.emplace(
- attr_id_pair.second, attr_id_pair.first);
-
- join_graph[first_table_idx].emplace(second_table_idx);
- join_graph[second_table_idx].emplace(first_table_idx);
- }
-
- std::set<TableInfo*, TableInfoPtrLessComparator> table_info_ordered_by_priority;
- for (std::size_t i = 0; i < table_info_storage.size(); ++i) {
- table_info_ordered_by_priority.emplace(&table_info_storage[i]);
+ DCHECK_EQ(join_attribute_forest.find(attr_id_pair.first),
+ join_attribute_forest.find(attr_id_pair.second));
+ const std::size_t attr_group_id = join_attribute_forest.find(attr_id_pair.first);
+ auto &attr_group = join_attribute_groups[attr_group_id];
+ attr_group.emplace(first_table_idx, attr_id_pair.first);
+ attr_group.emplace(second_table_idx, attr_id_pair.second);
}
- // Contruct hash join tree.
while (true) {
- TableInfo *first_table_info = *table_info_ordered_by_priority.begin();
- table_info_ordered_by_priority.erase(
- table_info_ordered_by_priority.begin());
- const std::size_t first_table_info_id = first_table_info->table_info_id;
-
- TableInfo *second_table_info = nullptr;
- std::set<TableInfo*, TableInfoPtrLessComparator>::iterator second_table_info_it;
- for (auto candidate_table_info_it = table_info_ordered_by_priority.begin();
- candidate_table_info_it != table_info_ordered_by_priority.end();
- ++candidate_table_info_it) {
- TableInfo *candidate_table_info = *candidate_table_info_it;
- const std::size_t candidate_table_info_id = candidate_table_info->table_info_id;
-
- if (join_graph[first_table_info_id].find(candidate_table_info_id)
- == join_graph[first_table_info_id].end() &&
- join_graph[candidate_table_info_id].find(first_table_info_id)
- == join_graph[candidate_table_info_id].end()) {
- continue;
- } else if (second_table_info == nullptr) {
- second_table_info = candidate_table_info;
- second_table_info_it = candidate_table_info_it;
- }
-
- bool is_likely_many_to_many_join = false;
- for (const auto join_attr_pair : first_table_info->join_attribute_pairs) {
- if (candidate_table_info->joined_attribute_set.find(join_attr_pair.second)
- != candidate_table_info->joined_attribute_set.end()) {
- is_likely_many_to_many_join = true;
- break;
- }
- }
- for (const auto join_attr_pair : candidate_table_info->join_attribute_pairs) {
- if (first_table_info->joined_attribute_set.find(join_attr_pair.second)
- != first_table_info->joined_attribute_set.end()) {
- is_likely_many_to_many_join = true;
- break;
+ // TODO(jianqiao): design better data structure to improve efficiency here.
+ std::unique_ptr<JoinPair> best_join = nullptr;
+ for (TableInfo *probe_table_info : remaining_tables) {
+ for (TableInfo *build_table_info : remaining_tables) {
+ if (probe_table_info != build_table_info) {
+ std::vector<E::AttributeReferencePtr> build_attrs;
+ const std::size_t probe_table_id = probe_table_info->table_info_id;
+ const std::size_t build_table_id = build_table_info->table_info_id;
+ for (const auto &attr_group_pair : join_attribute_groups) {
+ const auto &attr_group = attr_group_pair.second;
+ auto probe_it = attr_group.find(probe_table_id);
+ auto build_it = attr_group.find(build_table_id);
+ if (probe_it != attr_group.end() && build_it != attr_group.end()) {
+ build_attrs.emplace_back(
+ attribute_id_to_reference_map.at(build_it->second));
+ }
+ }
+ if (!build_attrs.empty()
+ && build_table_info->table->impliesUniqueAttributes(build_attrs)) {
+ std::unique_ptr<JoinPair> new_join(
+ new JoinPair(probe_table_info, build_table_info));
+ if (best_join == nullptr || new_join->isBetterThan(*best_join)) {
+// if (best_join != nullptr) {
+// std::cerr << "(" << best_join->probe->estimated_selectivity
+// << ", " << best_join->probe->estimated_cardinality << ")"
+// << " -- "
+// << "(" << best_join->build->estimated_selectivity
+// << ", " << best_join->build->estimated_cardinality << ")"
+// << "\n";
+// std::cerr << "REPLACED WITH\n";
+// }
+// std::cerr << "(" << new_join->probe->estimated_selectivity
+// << ", " << new_join->probe->estimated_cardinality << ")"
+// << " -- "
+// << "(" << new_join->build->estimated_selectivity
+// << ", " << new_join->build->estimated_cardinality << ")"
+// << "\n****\n";
+ best_join.reset(new_join.release());
+ }
+ }
}
}
- if (!is_likely_many_to_many_join) {
- second_table_info = candidate_table_info;
- second_table_info_it = candidate_table_info_it;
- break;
- }
}
- DCHECK(second_table_info != nullptr);
- table_info_ordered_by_priority.erase(second_table_info_it);
- const P::PhysicalPtr &left_child = first_table_info->table;
- const P::PhysicalPtr &right_child = second_table_info->table;
+ TableInfo *selected_probe_table_info = nullptr;
+ TableInfo *selected_build_table_info = nullptr;
+
+ if (best_join != nullptr) {
+ selected_probe_table_info = best_join->probe;
+ selected_build_table_info = best_join->build;
+ }
+
+ // TODO(jianqiao): Handle the case when there is no primary key-foreign key information available.
+ CHECK(selected_probe_table_info != nullptr);
+ CHECK(selected_build_table_info != nullptr);
+
+// std::cerr << selected_probe_table_info->estimated_selectivity
+// << " -- "
+// << selected_build_table_info->estimated_selectivity
+// << "\n";
+
+// std::cerr << selected_probe_table_info->estimated_num_output_attributes
+// << " -- "
+// << selected_build_table_info->estimated_num_output_attributes
+// << "\n";
+
+ remaining_tables.erase(selected_probe_table_info);
+ remaining_tables.erase(selected_build_table_info);
+
+ const P::PhysicalPtr &probe_child = selected_probe_table_info->table;
+ const P::PhysicalPtr &build_child = selected_build_table_info->table;
std::vector<E::NamedExpressionPtr> output_attributes;
- for (const E::AttributeReferencePtr &left_attr : left_child->getOutputAttributes()) {
- output_attributes.emplace_back(left_attr);
+ for (const E::AttributeReferencePtr &probe_attr : probe_child->getOutputAttributes()) {
+ output_attributes.emplace_back(probe_attr);
}
- for (const E::AttributeReferencePtr &right_attr : right_child->getOutputAttributes()) {
- output_attributes.emplace_back(right_attr);
+ for (const E::AttributeReferencePtr &build_attr : build_child->getOutputAttributes()) {
+ output_attributes.emplace_back(build_attr);
}
- std::vector<E::AttributeReferencePtr> left_join_attributes;
- std::vector<E::AttributeReferencePtr> right_join_attributes;
- std::unordered_set<expressions::ExprId> new_joined_attribute_set;
- for (const auto &join_attr_pair : first_table_info->join_attribute_pairs) {
- if (second_table_info->join_attribute_pairs.find(join_attr_pair.second)
- != second_table_info->join_attribute_pairs.end()) {
- left_join_attributes.emplace_back(
- attribute_id_to_reference_map[join_attr_pair.first]);
- right_join_attributes.emplace_back(
- attribute_id_to_reference_map[join_attr_pair.second]);
-
- new_joined_attribute_set.emplace(join_attr_pair.first);
- new_joined_attribute_set.emplace(join_attr_pair.second);
+ std::vector<E::AttributeReferencePtr> probe_attributes;
+ std::vector<E::AttributeReferencePtr> build_attributes;
+ const std::size_t probe_table_id = selected_probe_table_info->table_info_id;
+ const std::size_t build_table_id = selected_build_table_info->table_info_id;
+ for (const auto &attr_group_pair : join_attribute_groups) {
+ const auto &attr_group = attr_group_pair.second;
+ auto probe_it = attr_group.find(probe_table_id);
+ auto build_it = attr_group.find(build_table_id);
+ if (probe_it != attr_group.end() && build_it != attr_group.end()) {
+ probe_attributes.emplace_back(
+ attribute_id_to_reference_map.at(probe_it->second));
+ build_attributes.emplace_back(
+ attribute_id_to_reference_map.at(build_it->second));
}
}
- DCHECK_GE(left_join_attributes.size(), static_cast<std::size_t>(1));
- if (table_info_ordered_by_priority.size() > 0) {
+ if (remaining_tables.size() > 0) {
P::PhysicalPtr output =
- P::HashJoin::Create(left_child,
- right_child,
- left_join_attributes,
- right_join_attributes,
+ P::HashJoin::Create(probe_child,
+ build_child,
+ probe_attributes,
+ build_attributes,
nullptr,
output_attributes,
P::HashJoin::JoinType::kInnerJoin);
- second_table_info->table = output;
+// P::PhysicalPtr output;
+// if (selected_build_table_info->estimated_num_output_attributes >= 4 &&
+// selected_probe_table_info->estimated_num_output_attributes < 4) {
+// output = P::HashJoin::Create(build_child,
+// probe_child,
+// build_attributes,
+// probe_attributes,
+// nullptr,
+// output_attributes,
+// P::HashJoin::JoinType::kInnerJoin);
+// } else {
+// output = P::HashJoin::Create(probe_child,
+// build_child,
+// probe_attributes,
+// build_attributes,
+// nullptr,
+// output_attributes,
+// P::HashJoin::JoinType::kInnerJoin);
+// }
+
+ selected_probe_table_info->table = output;
// TODO(jianqiao): Cache the estimated cardinality for each plan in cost
// model to avoid duplicated estimation.
- second_table_info->estimated_cardinality = cost_model_->estimateCardinality(output);
-
- second_table_info->join_attribute_pairs.insert(first_table_info->join_attribute_pairs.begin(),
- first_table_info->join_attribute_pairs.end());
- second_table_info->joined_attribute_set.insert(first_table_info->joined_attribute_set.begin(),
- first_table_info->joined_attribute_set.end());
- second_table_info->joined_attribute_set.insert(new_joined_attribute_set.begin(),
- new_joined_attribute_set.end());
- table_info_ordered_by_priority.emplace(second_table_info);
-
- join_graph[second_table_info->table_info_id].insert(join_graph[first_table_info_id].begin(),
- join_graph[first_table_info_id].end());
-
+ selected_probe_table_info->estimated_cardinality = cost_model_->estimateCardinality(output);
+ selected_probe_table_info->estimated_selectivity = cost_model_->estimateSelectivity(output);
+
+ selected_probe_table_info->estimated_num_output_attributes =
+ CountSharedAttributes(join_group.referenced_attributes,
+ output->getOutputAttributes());
+ selected_probe_table_info->is_aggregation = false;
+
+ remaining_tables.emplace(selected_probe_table_info);
+
+ // Update join attribute groups.
+ for (auto &attr_group_pair : join_attribute_groups) {
+ auto &attr_group = attr_group_pair.second;
+ auto build_it = attr_group.find(build_table_id);
+ if (build_it != attr_group.end()) {
+ const E::ExprId attr_id = build_it->second;
+ attr_group.erase(build_it);
+ attr_group.emplace(probe_table_id, attr_id);
+ }
+ }
} else {
- return P::HashJoin::Create(left_child,
- right_child,
- left_join_attributes,
- right_join_attributes,
+ return P::HashJoin::Create(probe_child,
+ build_child,
+ probe_attributes,
+ build_attributes,
residual_predicate,
project_expressions,
P::HashJoin::JoinType::kInnerJoin);
@@ -305,5 +363,18 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
}
}
+std::size_t StarSchemaHashJoinOrderOptimization::CountSharedAttributes(
+ const std::unordered_set<expressions::ExprId> &attr_set1,
+ const std::vector<expressions::AttributeReferencePtr> &attr_set2) {
+ std::size_t cnt = 0;
+ for (const auto &attr : attr_set2) {
+ if (attr_set1.find(attr->id()) != attr_set1.end()) {
+ ++cnt;
+ }
+ }
+ return cnt;
+}
+
+
} // namespace optimizer
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
index deddffd..33d95a5 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
@@ -62,6 +62,7 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
* @brief A group of tables to form a hash join tree.
*/
struct JoinGroupInfo {
+ std::unordered_set<expressions::ExprId> referenced_attributes;
std::vector<physical::PhysicalPtr> tables;
std::vector<std::pair<expressions::ExprId, expressions::ExprId>> join_attribute_pairs;
};
@@ -70,49 +71,84 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
* @brief Auxiliary information of a table for the optimizer.
*/
struct TableInfo {
- TableInfo(const std::size_t in_table_info_id,
- const physical::PhysicalPtr &in_table,
- const std::size_t in_estimated_cardinality,
- const double in_estimated_selectivity)
- : table_info_id(in_table_info_id),
- table(in_table),
- estimated_cardinality(in_estimated_cardinality),
- estimated_selectivity(in_estimated_selectivity) {
+ TableInfo(const std::size_t table_info_id_in,
+ const physical::PhysicalPtr &table_in,
+ const std::size_t estimated_cardinality_in,
+ const double estimated_selectivity_in,
+ const std::size_t estimated_num_output_attributes_in,
+ const bool is_aggregation_in)
+ : table_info_id(table_info_id_in),
+ table(table_in),
+ estimated_cardinality(estimated_cardinality_in),
+ estimated_selectivity(estimated_selectivity_in),
+ estimated_num_output_attributes(estimated_num_output_attributes_in),
+ is_aggregation(is_aggregation_in) {
}
const std::size_t table_info_id;
physical::PhysicalPtr table;
std::size_t estimated_cardinality;
double estimated_selectivity;
- std::unordered_multimap<expressions::ExprId, expressions::ExprId> join_attribute_pairs;
- std::unordered_set<expressions::ExprId> joined_attribute_set;
+ std::size_t estimated_num_output_attributes;
+ bool is_aggregation;
};
- /**
- * @brief Comparator that compares the join priorities between two tables.
- */
- struct TableInfoPtrLessComparator {
- inline bool operator() (const TableInfo *lhs, const TableInfo *rhs) {
- bool swapped = false;
- if (lhs->estimated_cardinality > rhs->estimated_cardinality) {
- std::swap(lhs, rhs);
- swapped = true;
+ struct JoinPair {
+ JoinPair(TableInfo *probe_in, TableInfo *build_in)
+ : probe(probe_in), build(build_in) {
+ }
+
+ inline bool isBetterThan(const JoinPair &rhs) const {
+ const auto &lhs = *this;
+ const bool lhs_has_large_output =
+ lhs.build->estimated_num_output_attributes
+ + lhs.probe->estimated_num_output_attributes > 5;
+ const bool rhs_has_large_output =
+ rhs.build->estimated_num_output_attributes
+ + rhs.probe->estimated_num_output_attributes > 5;
+ if (lhs_has_large_output || rhs_has_large_output) {
+ if (lhs_has_large_output != rhs_has_large_output) {
+ return rhs_has_large_output;
+ }
+ double lhs_selectivity =
+ lhs.build->estimated_selectivity * lhs.probe->estimated_selectivity;
+ double rhs_selectivity =
+ rhs.build->estimated_selectivity * rhs.probe->estimated_selectivity;
+ if (lhs_selectivity != rhs_selectivity) {
+ return lhs_selectivity < rhs_selectivity;
+ }
}
- if (lhs->estimated_selectivity < rhs->estimated_selectivity) {
- return !swapped;
- } else if (lhs->estimated_cardinality < 1000u &&
- rhs->estimated_cardinality > 10000u &&
- lhs->estimated_selectivity < rhs->estimated_selectivity * 1.5) {
- return !swapped;
- } else if (lhs->estimated_selectivity > rhs->estimated_selectivity) {
- return swapped;
- } else if (lhs->estimated_cardinality != rhs->estimated_cardinality) {
- return !swapped;
+ const bool lhs_has_small_build =
+ !lhs_has_large_output && lhs.build->estimated_cardinality < 0x100;
+ const bool rhs_has_small_build =
+ !rhs_has_large_output && rhs.build->estimated_cardinality < 0x100;
+ if (lhs_has_small_build != rhs_has_small_build) {
+ return lhs_has_small_build;
+ }
+
+ if (lhs.probe->is_aggregation != rhs.probe->is_aggregation) {
+ return lhs.probe->is_aggregation;
+ }
+
+ if (lhs.probe->estimated_cardinality != rhs.probe->estimated_cardinality) {
+ return lhs.probe->estimated_cardinality < rhs.probe->estimated_cardinality;
+ }
+ if (lhs.build->estimated_selectivity != rhs.build->estimated_selectivity) {
+ return lhs.build->estimated_selectivity < rhs.build->estimated_selectivity;
+ }
+ if (lhs.build->estimated_cardinality != rhs.build->estimated_cardinality) {
+ return lhs.build->estimated_cardinality < rhs.build->estimated_cardinality;
+ }
+ if (lhs.probe->table != rhs.probe->table) {
+ return lhs.probe->table < rhs.probe->table;
} else {
- return swapped ^ (lhs->table < rhs->table);
+ return lhs.build->table < rhs.build->table;
}
}
+
+ TableInfo *probe;
+ TableInfo *build;
};
physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input,
@@ -123,6 +159,10 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
const expressions::PredicatePtr &residual_predicate,
const std::vector<expressions::NamedExpressionPtr> &project_expressions);
+ static std::size_t CountSharedAttributes(
+ const std::unordered_set<expressions::ExprId> &attr_set1,
+ const std::vector<expressions::AttributeReferencePtr> &attr_set2);
+
std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
DISALLOW_COPY_AND_ASSIGN(StarSchemaHashJoinOrderOptimization);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/tests/ExecutionHeuristics_unittest.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/ExecutionHeuristics_unittest.cpp b/query_optimizer/tests/ExecutionHeuristics_unittest.cpp
index 815c13e..ac0adea 100644
--- a/query_optimizer/tests/ExecutionHeuristics_unittest.cpp
+++ b/query_optimizer/tests/ExecutionHeuristics_unittest.cpp
@@ -70,7 +70,8 @@ class ExecutionHeuristicsTest : public ::testing::Test {
probe_relation,
std::move(build_attribute_ids),
std::move(probe_attribute_ids),
- join_hash_table_id);
+ join_hash_table_id,
+ build_relation->estimateTupleCardinality());
}
QueryPlan::DAGNodeIndex createDummyBuildHashOperator(QueryPlan *query_plan,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index 667df1e..16c0d82 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -59,6 +59,11 @@ using std::vector;
namespace quickstep {
+DEFINE_int64(bloom_adapter_batch_size, 64,
+ "Number of tuples to probe in bulk in Bloom filter adapter.");
+DEFINE_bool(adapt_bloom_filters, true,
+ "Whether to adaptively adjust the ordering of bloom filters.");
+
namespace {
// Functor passed to HashTable::getAllFromValueAccessor() to collect matching
@@ -75,6 +80,11 @@ class MapBasedJoinedTupleCollector {
joined_tuples_[tref.block].emplace_back(tref.tuple, accessor.getCurrentPosition());
}
+ inline void operator()(const tuple_id probe_tid,
+ const TupleReference &build_tref) {
+ joined_tuples_[build_tref.block].emplace_back(build_tref.tuple, probe_tid);
+ }
+
// Get a mutable pointer to the collected map of joined tuple ID pairs. The
// key is inner block_id, values are vectors of joined tuple ID pairs with
// tuple ID from the inner block on the left and the outer block on the
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index 235bfe4..cf680f6 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -307,8 +307,9 @@ class HashInnerJoinWorkOrder : public WorkOrder {
const std::vector<std::unique_ptr<const Scalar>> &selection,
const JoinHashTable &hash_table,
InsertDestination *output_destination,
- StorageManager *storage_manager)
- : WorkOrder(query_id),
+ StorageManager *storage_manager,
+ const int op_index = -1)
+ : WorkOrder(query_id, op_index),
build_relation_(build_relation),
probe_relation_(probe_relation),
join_key_attributes_(join_key_attributes),
@@ -354,8 +355,9 @@ class HashInnerJoinWorkOrder : public WorkOrder {
const std::vector<std::unique_ptr<const Scalar>> &selection,
const JoinHashTable &hash_table,
InsertDestination *output_destination,
- StorageManager *storage_manager)
- : WorkOrder(query_id),
+ StorageManager *storage_manager,
+ const int op_index = -1)
+ : WorkOrder(query_id, op_index),
build_relation_(build_relation),
probe_relation_(probe_relation),
join_key_attributes_(std::move(join_key_attributes)),
@@ -435,8 +437,9 @@ class HashSemiJoinWorkOrder : public WorkOrder {
const std::vector<std::unique_ptr<const Scalar>> &selection,
const JoinHashTable &hash_table,
InsertDestination *output_destination,
- StorageManager *storage_manager)
- : WorkOrder(query_id),
+ StorageManager *storage_manager,
+ const int op_index = -1)
+ : WorkOrder(query_id, op_index),
build_relation_(build_relation),
probe_relation_(probe_relation),
join_key_attributes_(join_key_attributes),
@@ -482,8 +485,9 @@ class HashSemiJoinWorkOrder : public WorkOrder {
const std::vector<std::unique_ptr<const Scalar>> &selection,
const JoinHashTable &hash_table,
InsertDestination *output_destination,
- StorageManager *storage_manager)
- : WorkOrder(query_id),
+ StorageManager *storage_manager,
+ const int op_index = -1)
+ : WorkOrder(query_id, op_index),
build_relation_(build_relation),
probe_relation_(probe_relation),
join_key_attributes_(std::move(join_key_attributes)),
@@ -559,8 +563,9 @@ class HashAntiJoinWorkOrder : public WorkOrder {
const std::vector<std::unique_ptr<const Scalar>> &selection,
const JoinHashTable &hash_table,
InsertDestination *output_destination,
- StorageManager *storage_manager)
- : WorkOrder(query_id),
+ StorageManager *storage_manager,
+ const int op_index = -1)
+ : WorkOrder(query_id, op_index),
build_relation_(build_relation),
probe_relation_(probe_relation),
join_key_attributes_(join_key_attributes),
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/relational_operators/WorkOrder.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrder.hpp b/relational_operators/WorkOrder.hpp
index df195cc..4eb6b3a 100644
--- a/relational_operators/WorkOrder.hpp
+++ b/relational_operators/WorkOrder.hpp
@@ -299,16 +299,23 @@ class WorkOrder {
return query_id_;
}
+ inline const int getOperatorIndex() const {
+ return op_index_;
+ }
+
protected:
/**
* @brief Constructor.
*
* @param query_id The ID of the query to which this WorkOrder belongs.
**/
- explicit WorkOrder(const std::size_t query_id)
- : query_id_(query_id) {}
+ explicit WorkOrder(const std::size_t query_id,
+ const int op_index = -1)
+ : query_id_(query_id),
+ op_index_(op_index) {}
const std::size_t query_id_;
+ const int op_index_;
// A vector of preferred NUMA node IDs where this workorder should be executed.
// These node IDs typically indicate the NUMA node IDs of the input(s) of the
// workorder. Derived classes should ensure that there are no duplicate entries
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/AggregationOperationState.cpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.cpp b/storage/AggregationOperationState.cpp
index 4878cf1..668164c 100644
--- a/storage/AggregationOperationState.cpp
+++ b/storage/AggregationOperationState.cpp
@@ -46,10 +46,13 @@
#include "storage/StorageBlock.hpp"
#include "storage/StorageBlockInfo.hpp"
#include "storage/StorageManager.hpp"
+#include "storage/ValueAccessor.hpp"
+#include "storage/ValueAccessorUtil.hpp"
#include "types/TypedValue.hpp"
#include "types/containers/ColumnVector.hpp"
#include "types/containers/ColumnVectorsValueAccessor.hpp"
#include "types/containers/Tuple.hpp"
+#include "utility/BloomFilterAdapter.hpp"
#include "glog/logging.h"
@@ -57,6 +60,8 @@ using std::unique_ptr;
namespace quickstep {
+DECLARE_int64(bloom_adapter_batch_size);
+
AggregationOperationState::AggregationOperationState(
const CatalogRelationSchema &input_relation,
const std::vector<const AggregateFunction*> &aggregate_functions,
@@ -64,12 +69,16 @@ AggregationOperationState::AggregationOperationState(
std::vector<bool> &&is_distinct,
std::vector<std::unique_ptr<const Scalar>> &&group_by,
const Predicate *predicate,
+ std::vector<const BloomFilter *> &&bloom_filters,
+ std::vector<attribute_id> &&bloom_filter_attribute_ids,
const std::size_t estimated_num_entries,
const HashTableImplType hash_table_impl_type,
const std::vector<HashTableImplType> &distinctify_hash_table_impl_types,
StorageManager *storage_manager)
: input_relation_(input_relation),
predicate_(predicate),
+ bloom_filters_(std::move(bloom_filters)),
+ bloom_filter_attribute_ids_(std::move(bloom_filter_attribute_ids)),
group_by_list_(std::move(group_by)),
arguments_(std::move(arguments)),
is_distinct_(std::move(is_distinct)),
@@ -183,7 +192,8 @@ AggregationOperationState::AggregationOperationState(
AggregationOperationState* AggregationOperationState::ReconstructFromProto(
const serialization::AggregationOperationState &proto,
const CatalogDatabaseLite &database,
- StorageManager *storage_manager) {
+ StorageManager *storage_manager,
+ const std::vector<std::unique_ptr<BloomFilter>> &bloom_filters) {
DCHECK(ProtoIsValid(proto, database));
// Rebuild contructor arguments from their representation in 'proto'.
@@ -232,12 +242,24 @@ AggregationOperationState* AggregationOperationState::ReconstructFromProto(
database));
}
+ std::vector<const BloomFilter*> bloom_filter_vector;
+ std::vector<attribute_id> bloom_filter_attribute_ids;
+ for (int i = 0; i < proto.bloom_filters_size(); ++i) {
+ // Add the pointer to the probe bloom filter within the list of probe bloom filters to use.
+ const auto bloom_filter_proto = proto.bloom_filters(i);
+ bloom_filter_vector.emplace_back(
+ bloom_filters[bloom_filter_proto.bloom_filter_id()].get());
+ bloom_filter_attribute_ids.emplace_back(bloom_filter_proto.attr_id());
+ }
+
return new AggregationOperationState(database.getRelationSchemaById(proto.relation_id()),
aggregate_functions,
std::move(arguments),
std::move(is_distinct),
std::move(group_by_expressions),
predicate.release(),
+ std::move(bloom_filter_vector),
+ std::move(bloom_filter_attribute_ids),
proto.estimated_num_entries(),
HashTableImplTypeFromProto(proto.hash_table_impl_type()),
distinctify_hash_table_impl_types,
@@ -340,6 +362,10 @@ void AggregationOperationState::aggregateBlockSingleState(const block_id input_b
// tuples so that it can be reused across multiple aggregates (i.e. we only
// pay the cost of evaluating the predicate once).
std::unique_ptr<TupleIdSequence> reuse_matches;
+ if (predicate_) {
+ reuse_matches.reset(block->getMatchesForPredicate(predicate_.get()));
+ }
+
for (std::size_t agg_idx = 0;
agg_idx < handles_.size();
++agg_idx) {
@@ -358,7 +384,6 @@ void AggregationOperationState::aggregateBlockSingleState(const block_id input_b
arguments_[agg_idx],
local_arguments_as_attributes,
{}, /* group_by */
- predicate_.get(),
distinctify_hashtables_[agg_idx].get(),
&reuse_matches,
nullptr /* reuse_group_by_vectors */);
@@ -369,7 +394,6 @@ void AggregationOperationState::aggregateBlockSingleState(const block_id input_b
block->aggregate(*handles_[agg_idx],
arguments_[agg_idx],
local_arguments_as_attributes,
- predicate_.get(),
&reuse_matches));
}
}
@@ -391,6 +415,72 @@ void AggregationOperationState::aggregateBlockHashTable(const block_id input_blo
// GROUP BY expressions once).
std::vector<std::unique_ptr<ColumnVector>> reuse_group_by_vectors;
+ if (predicate_) {
+ reuse_matches.reset(block->getMatchesForPredicate(predicate_.get()));
+ }
+
+ if (bloom_filters_.size() > 0) {
+ const std::size_t num_tuples = block->getNumTuples();
+// std::cerr << "Before: " << num_tuples << " -- "
+// << (reuse_matches ? reuse_matches->numTuples() : num_tuples)
+// << "\n";
+ std::unique_ptr<ValueAccessor> accessor;
+ if (reuse_matches) {
+ accessor.reset(
+ block->getTupleStorageSubBlock().createValueAccessor(reuse_matches.get()));
+ } else {
+ accessor.reset(
+ block->getTupleStorageSubBlock().createValueAccessor());
+ }
+ InvokeOnAnyValueAccessor(
+ accessor.get(),
+ [&](auto *accessor) -> void { // NOLINT(build/c++11)
+ std::unique_ptr<TupleIdSequence> filtered(new TupleIdSequence(num_tuples));
+
+ std::vector<std::size_t> attr_size_vector;
+ attr_size_vector.reserve(bloom_filter_attribute_ids_.size());
+ for (const auto &attr : bloom_filter_attribute_ids_) {
+ auto val_and_size =
+ accessor->template getUntypedValueAndByteLengthAtAbsolutePosition<false>(0, attr);
+ attr_size_vector.emplace_back(val_and_size.second);
+ }
+
+ std::unique_ptr<BloomFilterAdapter> bloom_filter_adapter;
+ bloom_filter_adapter.reset(new BloomFilterAdapter(
+ bloom_filters_, bloom_filter_attribute_ids_, attr_size_vector));
+
+ std::uint32_t batch_size_try = FLAGS_bloom_adapter_batch_size;
+ std::uint32_t num_tuples_left = accessor->getNumTuples();
+ std::vector<tuple_id> batch(num_tuples_left);
+
+ do {
+ std::uint32_t batch_size =
+ batch_size_try < num_tuples_left ? batch_size_try : num_tuples_left;
+ for (std::size_t i = 0; i < batch_size; ++i) {
+ accessor->next();
+ batch.push_back(accessor->getCurrentPosition());
+ }
+
+ std::size_t num_hits =
+ bloom_filter_adapter->bulkProbe<true>(accessor, batch, batch_size);
+ for (std::size_t t = 0; t < num_hits; ++t){
+ filtered->set(batch[t], true);
+ }
+
+ batch.clear();
+ num_tuples_left -= batch_size;
+ batch_size_try = batch_size * 2;
+ } while (num_tuples_left > 0);
+
+ if (reuse_matches) {
+ reuse_matches->intersectWith(*filtered);
+ } else {
+ reuse_matches.reset(filtered.release());
+ }
+ });
+// std::cerr << "After: " << reuse_matches->numTuples() << "\n";
+ }
+
for (std::size_t agg_idx = 0;
agg_idx < handles_.size();
++agg_idx) {
@@ -402,7 +492,6 @@ void AggregationOperationState::aggregateBlockHashTable(const block_id input_blo
arguments_[agg_idx],
nullptr, /* arguments_as_attributes */
group_by_list_,
- predicate_.get(),
distinctify_hashtables_[agg_idx].get(),
&reuse_matches,
&reuse_group_by_vectors);
@@ -416,7 +505,6 @@ void AggregationOperationState::aggregateBlockHashTable(const block_id input_blo
block->aggregateGroupBy(*handles_[agg_idx],
arguments_[agg_idx],
group_by_list_,
- predicate_.get(),
agg_hash_table,
&reuse_matches,
&reuse_group_by_vectors);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/AggregationOperationState.hpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.hpp b/storage/AggregationOperationState.hpp
index 0199749..5db7325 100644
--- a/storage/AggregationOperationState.hpp
+++ b/storage/AggregationOperationState.hpp
@@ -33,6 +33,7 @@
#include "storage/HashTableBase.hpp"
#include "storage/HashTablePool.hpp"
#include "storage/StorageBlockInfo.hpp"
+#include "utility/BloomFilter.hpp"
#include "utility/Macros.hpp"
namespace quickstep {
@@ -108,6 +109,8 @@ class AggregationOperationState {
std::vector<bool> &&is_distinct,
std::vector<std::unique_ptr<const Scalar>> &&group_by,
const Predicate *predicate,
+ std::vector<const BloomFilter *> &&bloom_filters,
+ std::vector<attribute_id> &&bloom_filter_attribute_ids,
const std::size_t estimated_num_entries,
const HashTableImplType hash_table_impl_type,
const std::vector<HashTableImplType> &distinctify_hash_table_impl_types,
@@ -131,7 +134,8 @@ class AggregationOperationState {
static AggregationOperationState* ReconstructFromProto(
const serialization::AggregationOperationState &proto,
const CatalogDatabaseLite &database,
- StorageManager *storage_manager);
+ StorageManager *storage_manager,
+ const std::vector<std::unique_ptr<BloomFilter>> &bloom_filters);
/**
* @brief Check whether a serialization::AggregationOperationState is
@@ -181,6 +185,10 @@ class AggregationOperationState {
// filter predicate (if any), and the list of GROUP BY expressions (if any).
const CatalogRelationSchema &input_relation_;
std::unique_ptr<const Predicate> predicate_;
+
+ std::vector<const BloomFilter*> bloom_filters_;
+ std::vector<attribute_id> bloom_filter_attribute_ids_;
+
std::vector<std::unique_ptr<const Scalar>> group_by_list_;
// Each individual aggregate in this operation has an AggregationHandle and
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/AggregationOperationState.proto
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.proto b/storage/AggregationOperationState.proto
index bf78e3a..165148e 100644
--- a/storage/AggregationOperationState.proto
+++ b/storage/AggregationOperationState.proto
@@ -42,4 +42,10 @@ message AggregationOperationState {
// Each DISTINCT aggregation has its distinctify hash table impl type.
repeated HashTableImplType distinctify_hash_table_impl_types = 7;
+
+ message BloomFilter {
+ required uint32 bloom_filter_id = 1;
+ required uint32 attr_id = 2;
+ }
+ repeated BloomFilter bloom_filters = 8;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/BasicColumnStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/BasicColumnStoreValueAccessor.hpp b/storage/BasicColumnStoreValueAccessor.hpp
index 759e187..7907fd5 100644
--- a/storage/BasicColumnStoreValueAccessor.hpp
+++ b/storage/BasicColumnStoreValueAccessor.hpp
@@ -18,6 +18,8 @@
#ifndef QUICKSTEP_STORAGE_BASIC_COLUMN_STORE_VALUE_ACCESSOR_HPP_
#define QUICKSTEP_STORAGE_BASIC_COLUMN_STORE_VALUE_ACCESSOR_HPP_
+#include <cstddef>
+#include <utility>
#include <vector>
#include "catalog/CatalogRelationSchema.hpp"
@@ -43,7 +45,8 @@ class BasicColumnStoreValueAccessorHelper {
: relation_(relation),
num_tuples_(num_tuples),
column_stripes_(column_stripes),
- column_null_bitmaps_(column_null_bitmaps) {
+ column_null_bitmaps_(column_null_bitmaps),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()) {
}
inline tuple_id numPackedTuples() const {
@@ -61,9 +64,23 @@ class BasicColumnStoreValueAccessorHelper {
return nullptr;
}
- // TODO(chasseur): Consider cacheing the byte lengths of attributes.
- return static_cast<const char*>(column_stripes_[attr])
- + (tuple * relation_.getAttributeById(attr)->getType().maximumByteLength());
+ return static_cast<const char*>(column_stripes_[attr]) + (tuple * attr_max_lengths_[attr]);
+ }
+
+ template <bool check_null>
+ inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple,
+ const attribute_id attr) const {
+ DEBUG_ASSERT(tuple < num_tuples_);
+ DEBUG_ASSERT(relation_.hasAttributeWithId(attr));
+ if (check_null
+ && (!column_null_bitmaps_.elementIsNull(attr))
+ && column_null_bitmaps_[attr].getBit(tuple)) {
+ return std::make_pair(nullptr, 0);
+ }
+
+ const std::size_t attr_length = attr_max_lengths_[attr];
+ return std::make_pair(static_cast<const char*>(column_stripes_[attr]) + (tuple * attr_length),
+ attr_length);
}
inline TypedValue getAttributeValueTyped(const tuple_id tuple,
@@ -80,6 +97,7 @@ class BasicColumnStoreValueAccessorHelper {
const tuple_id num_tuples_;
const std::vector<void*> &column_stripes_;
const PtrVector<BitVector<false>, true> &column_null_bitmaps_;
+ const std::vector<std::size_t> &attr_max_lengths_;
DISALLOW_COPY_AND_ASSIGN(BasicColumnStoreValueAccessorHelper);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/BloomFilterIndexSubBlock.cpp
----------------------------------------------------------------------
diff --git a/storage/BloomFilterIndexSubBlock.cpp b/storage/BloomFilterIndexSubBlock.cpp
index e806217..a40f69f 100644
--- a/storage/BloomFilterIndexSubBlock.cpp
+++ b/storage/BloomFilterIndexSubBlock.cpp
@@ -55,7 +55,6 @@ BloomFilterIndexSubBlock::BloomFilterIndexSubBlock(const TupleStorageSubBlock &t
sub_block_memory_size),
is_initialized_(false),
is_consistent_(false),
- random_seed_(kBloomFilterSeed),
bit_array_size_in_bytes_(description.GetExtension(
BloomFilterIndexSubBlockDescription::bloom_filter_size)) {
CHECK(DescriptionIsValid(relation_, description_))
@@ -74,8 +73,7 @@ BloomFilterIndexSubBlock::BloomFilterIndexSubBlock(const TupleStorageSubBlock &t
const std::uint32_t salt_count = description.GetExtension(BloomFilterIndexSubBlockDescription::number_of_hashes);
// Initialize the bloom_filter_ data structure to operate on bit_array.
- bloom_filter_.reset(new BloomFilter(random_seed_,
- salt_count,
+ bloom_filter_.reset(new BloomFilter(salt_count,
bit_array_size_in_bytes_,
bit_array_.get(),
is_bloom_filter_initialized));
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/BloomFilterIndexSubBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/BloomFilterIndexSubBlock.hpp b/storage/BloomFilterIndexSubBlock.hpp
index 4925673..8c81156 100644
--- a/storage/BloomFilterIndexSubBlock.hpp
+++ b/storage/BloomFilterIndexSubBlock.hpp
@@ -65,11 +65,6 @@ class BloomFilterIndexSubBlock : public IndexSubBlock {
kSelectivityNone
};
- /**
- * @brief A random seed to initialize the bloom filter hash functions.
- **/
- static const std::uint64_t kBloomFilterSeed = 0xA5A5A5A55A5A5A5AULL;
-
BloomFilterIndexSubBlock(const TupleStorageSubBlock &tuple_store,
const IndexSubBlockDescription &description,
const bool new_block,
@@ -179,7 +174,6 @@ class BloomFilterIndexSubBlock : public IndexSubBlock {
private:
bool is_initialized_;
bool is_consistent_;
- const std::uint64_t random_seed_;
const std::uint64_t bit_array_size_in_bytes_;
std::vector<attribute_id> indexed_attribute_ids_;
std::unique_ptr<unsigned char> bit_array_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt
index 582effd..777a888 100644
--- a/storage/CMakeLists.txt
+++ b/storage/CMakeLists.txt
@@ -677,6 +677,8 @@ target_link_libraries(quickstep_storage_HashTable
quickstep_types_Type
quickstep_types_TypedValue
quickstep_utility_BloomFilter
+ quickstep_utility_BloomFilterAdapter
+ quickstep_utility_EventProfiler
quickstep_utility_HashPair
quickstep_utility_Macros)
target_link_libraries(quickstep_storage_HashTableBase
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/CompressedColumnStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/CompressedColumnStoreValueAccessor.hpp b/storage/CompressedColumnStoreValueAccessor.hpp
index 64eb315..984dea3 100644
--- a/storage/CompressedColumnStoreValueAccessor.hpp
+++ b/storage/CompressedColumnStoreValueAccessor.hpp
@@ -52,6 +52,7 @@ class CompressedColumnStoreValueAccessorHelper {
const PtrVector<BitVector<false>, true> &uncompressed_column_null_bitmaps)
: relation_(relation),
num_tuples_(num_tuples),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()),
compression_info_(compression_info),
dictionary_coded_attributes_(dictionary_coded_attributes),
truncated_attributes_(truncated_attributes),
@@ -84,6 +85,26 @@ class CompressedColumnStoreValueAccessorHelper {
}
}
+ template <bool check_null>
+ inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple,
+ const attribute_id attr) const {
+ if (dictionary_coded_attributes_[attr]) {
+ return dictionaries_.atUnchecked(attr).getUntypedValueAndByteLengthForCode<check_null>(
+ getCode(tuple, attr));
+ } else if (truncated_attributes_[attr]) {
+ if (truncated_attribute_is_int_[attr]) {
+ int_buffer_ = getCode(tuple, attr);
+ return std::make_pair(&int_buffer_, sizeof(int_buffer_));
+ } else {
+ long_buffer_ = getCode(tuple, attr);
+ return std::make_pair(&long_buffer_, sizeof(long_buffer_));
+ }
+ } else {
+ return std::make_pair(getAttributePtr<check_null>(tuple, attr),
+ attr_max_lengths_[attr]);
+ }
+ }
+
inline TypedValue getAttributeValueTyped(const tuple_id tuple,
const attribute_id attr) const {
if (dictionary_coded_attributes_[attr]) {
@@ -138,6 +159,7 @@ class CompressedColumnStoreValueAccessorHelper {
const CatalogRelationSchema &relation_;
const tuple_id num_tuples_;
+ const std::vector<std::size_t> &attr_max_lengths_;
const CompressedBlockInfo &compression_info_;
const std::vector<bool> &dictionary_coded_attributes_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/CompressedPackedRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/CompressedPackedRowStoreValueAccessor.hpp b/storage/CompressedPackedRowStoreValueAccessor.hpp
index 024b0ec..7058aec 100644
--- a/storage/CompressedPackedRowStoreValueAccessor.hpp
+++ b/storage/CompressedPackedRowStoreValueAccessor.hpp
@@ -58,6 +58,7 @@ class CompressedPackedRowStoreValueAccessorHelper {
num_tuples_(num_tuples),
tuple_length_bytes_(tuple_length_bytes),
attribute_offsets_(attribute_offsets),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()),
compression_info_(compression_info),
dictionary_coded_attributes_(dictionary_coded_attributes),
truncated_attributes_(truncated_attributes),
@@ -92,6 +93,26 @@ class CompressedPackedRowStoreValueAccessorHelper {
}
}
+ template <bool check_null>
+ inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple,
+ const attribute_id attr) const {
+ if (dictionary_coded_attributes_[attr]) {
+ return dictionaries_.atUnchecked(attr).getUntypedValueAndByteLengthForCode<check_null>(
+ getCode(tuple, attr));
+ } else if (truncated_attributes_[attr]) {
+ if (truncated_attribute_is_int_[attr]) {
+ int_buffer_ = getCode(tuple, attr);
+ return std::make_pair(&int_buffer_, sizeof(int_buffer_));
+ } else {
+ long_buffer_ = getCode(tuple, attr);
+ return std::make_pair(&long_buffer_, sizeof(long_buffer_));
+ }
+ } else {
+ return std::make_pair(getAttributePtr<check_null>(tuple, attr),
+ attr_max_lengths_[attr]);
+ }
+ }
+
inline TypedValue getAttributeValueTyped(const tuple_id tuple,
const attribute_id attr) const {
if (dictionary_coded_attributes_[attr]) {
@@ -150,6 +171,7 @@ class CompressedPackedRowStoreValueAccessorHelper {
const tuple_id num_tuples_;
const std::size_t tuple_length_bytes_;
const std::vector<std::size_t> &attribute_offsets_;
+ const std::vector<std::size_t> &attr_max_lengths_;
const CompressedBlockInfo &compression_info_;
const std::vector<bool> &dictionary_coded_attributes_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index be31fd9..6e3dc96 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -23,6 +23,7 @@
#include <atomic>
#include <cstddef>
#include <cstdlib>
+#include <memory>
#include <type_traits>
#include <vector>
@@ -39,11 +40,14 @@
#include "types/Type.hpp"
#include "types/TypedValue.hpp"
#include "utility/BloomFilter.hpp"
+#include "utility/BloomFilterAdapter.hpp"
#include "utility/HashPair.hpp"
#include "utility/Macros.hpp"
namespace quickstep {
+DECLARE_int64(bloom_adapter_batch_size);
+
/** \addtogroup Storage
* @{
*/
@@ -1016,8 +1020,12 @@ class HashTable : public HashTableBase<resizable,
*
* @param bloom_filter The pointer to the bloom filter.
**/
- inline void setBuildSideBloomFilter(BloomFilter *bloom_filter) {
- build_bloom_filter_ = bloom_filter;
+ inline void addBuildSideBloomFilter(BloomFilter *bloom_filter) {
+ build_bloom_filters_.emplace_back(bloom_filter);
+ }
+
+ inline void addBuildSideAttributeId(const attribute_id build_attribute_id) {
+ build_attribute_ids_.push_back(build_attribute_id);
}
/**
@@ -1042,8 +1050,8 @@ class HashTable : public HashTableBase<resizable,
* @param probe_attribute_ids The vector of attribute ids to use for probing
* the bloom filter.
**/
- inline void addProbeSideAttributeIds(std::vector<attribute_id> &&probe_attribute_ids) {
- probe_attribute_ids_.push_back(probe_attribute_ids);
+ inline void addProbeSideAttributeId(const attribute_id probe_attribute_id) {
+ probe_attribute_ids_.push_back(probe_attribute_id);
}
protected:
@@ -1329,9 +1337,10 @@ class HashTable : public HashTableBase<resizable,
// Data structures used for bloom filter optimized semi-joins.
bool has_build_side_bloom_filter_ = false;
bool has_probe_side_bloom_filter_ = false;
- BloomFilter *build_bloom_filter_;
+ std::vector<BloomFilter *> build_bloom_filters_;
+ std::vector<attribute_id> build_attribute_ids_;
std::vector<const BloomFilter*> probe_bloom_filters_;
- std::vector<std::vector<attribute_id>> probe_attribute_ids_;
+ std::vector<attribute_id> probe_attribute_ids_;
DISALLOW_COPY_AND_ASSIGN(HashTable);
};
@@ -1477,12 +1486,26 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy, al
&prealloc_state);
}
}
- std::unique_ptr<BloomFilter> thread_local_bloom_filter;
+
if (has_build_side_bloom_filter_) {
- thread_local_bloom_filter.reset(new BloomFilter(build_bloom_filter_->getRandomSeed(),
- build_bloom_filter_->getNumberOfHashes(),
- build_bloom_filter_->getBitArraySize()));
+ for (std::size_t i = 0; i < build_bloom_filters_.size(); ++i) {
+ auto *build_bloom_filter = build_bloom_filters_[i];
+ std::unique_ptr<BloomFilter> thread_local_bloom_filter(
+ new BloomFilter(build_bloom_filter->getNumberOfHashes(),
+ build_bloom_filter->getBitArraySize()));
+ const auto &build_attr = build_attribute_ids_[i];
+ const std::size_t attr_size =
+ accessor->template getUntypedValueAndByteLengthAtAbsolutePosition<false>(0, build_attr).second;
+ while (accessor->next()) {
+ thread_local_bloom_filter->insertUnSafe(
+ static_cast<const std::uint8_t *>(accessor->getUntypedValue(build_attr)),
+ attr_size);
+ }
+ build_bloom_filter->bitwiseOr(thread_local_bloom_filter.get());
+ accessor->beginIteration();
+ }
}
+
if (resizable) {
while (result == HashTablePutResult::kOutOfSpace) {
{
@@ -1498,11 +1521,6 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy, al
variable_size,
(*functor)(*accessor),
using_prealloc ? &prealloc_state : nullptr);
- // Insert into bloom filter, if enabled.
- if (has_build_side_bloom_filter_) {
- thread_local_bloom_filter->insertUnSafe(static_cast<const std::uint8_t *>(key.getDataPtr()),
- key.getDataSize());
- }
if (result == HashTablePutResult::kDuplicateKey) {
DEBUG_ASSERT(!using_prealloc);
return result;
@@ -1528,20 +1546,11 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy, al
variable_size,
(*functor)(*accessor),
using_prealloc ? &prealloc_state : nullptr);
- // Insert into bloom filter, if enabled.
- if (has_build_side_bloom_filter_) {
- thread_local_bloom_filter->insertUnSafe(static_cast<const std::uint8_t *>(key.getDataPtr()),
- key.getDataSize());
- }
if (result != HashTablePutResult::kOK) {
return result;
}
}
}
- // Update the build side bloom filter with thread local copy, if available.
- if (has_build_side_bloom_filter_) {
- build_bloom_filter_->bitwiseOr(thread_local_bloom_filter.get());
- }
return HashTablePutResult::kOK;
});
@@ -1607,6 +1616,26 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy, al
&prealloc_state);
}
}
+
+ if (has_build_side_bloom_filter_) {
+ for (std::size_t i = 0; i < build_bloom_filters_.size(); ++i) {
+ auto *build_bloom_filter = build_bloom_filters_[i];
+ std::unique_ptr<BloomFilter> thread_local_bloom_filter(
+ new BloomFilter(build_bloom_filter->getNumberOfHashes(),
+ build_bloom_filter->getBitArraySize()));
+ const auto &build_attr = build_attribute_ids_[i];
+ const std::size_t attr_size =
+ accessor->template getUntypedValueAndByteLengthAtAbsolutePosition<false>(0, build_attr).second;
+ while (accessor->next()) {
+ thread_local_bloom_filter->insertUnSafe(
+ static_cast<const std::uint8_t *>(accessor->getUntypedValue(build_attr)),
+ attr_size);
+ }
+ build_bloom_filter->bitwiseOr(thread_local_bloom_filter.get());
+ accessor->beginIteration();
+ }
+ }
+
if (resizable) {
while (result == HashTablePutResult::kOutOfSpace) {
{
@@ -2229,6 +2258,7 @@ inline std::size_t HashTable<ValueT, resizable, serializable, force_key_copy, al
}
}
+
template <typename ValueT,
bool resizable,
bool serializable,
@@ -2246,42 +2276,85 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_
InvokeOnAnyValueAccessor(
accessor,
[&](auto *accessor) -> void { // NOLINT(build/c++11)
- while (accessor->next()) {
- // Probe any bloom filters, if enabled.
- if (has_probe_side_bloom_filter_) {
- DCHECK_EQ(probe_bloom_filters_.size(), probe_attribute_ids_.size());
- // Check if the key is contained in the BloomFilters or not.
- bool bloom_miss = false;
- for (std::size_t i = 0; i < probe_bloom_filters_.size() && !bloom_miss; ++i) {
- const BloomFilter *bloom_filter = probe_bloom_filters_[i];
- for (const attribute_id &attr_id : probe_attribute_ids_[i]) {
- TypedValue bloom_key = accessor->getTypedValue(attr_id);
- if (!bloom_filter->contains(static_cast<const std::uint8_t*>(bloom_key.getDataPtr()),
- bloom_key.getDataSize())) {
- bloom_miss = true;
+ std::unique_ptr<BloomFilterAdapter> bloom_filter_adapter;
+ if (has_probe_side_bloom_filter_) {
+ // Find (and cache) the size of each attribute in the probe lists.
+ // NOTE(nav): This code uses the accessor to get the size,
+ // and hence only works if there's at least one tuple.
+ std::vector<std::size_t> attr_size_vector;
+ attr_size_vector.reserve(probe_attribute_ids_.size());
+ for (const auto &probe_attr : probe_attribute_ids_) {
+ auto val_and_size =
+ accessor->template getUntypedValueAndByteLengthAtAbsolutePosition<false>(0, probe_attr);
+// std::cerr << "BF attr size = " << val_and_size.second << "\n";
+ attr_size_vector.emplace_back(val_and_size.second);
+ }
+
+ bloom_filter_adapter.reset(new BloomFilterAdapter(
+ probe_bloom_filters_, probe_attribute_ids_, attr_size_vector));
+
+ // We want to have large batch sizes for cache efficiency while probeing,
+ // but small batch sizes to ensure that the adaptation logic kicks in
+ // (and does early). We use exponentially increasing batch sizes to
+ // achieve a balance between the two.
+ //
+ // We also keep track of num_tuples_left in the block, to ensure that
+ // we don't reserve an unnecessarily large vector.
+ std::uint32_t batch_size_try = FLAGS_bloom_adapter_batch_size;
+ std::uint32_t num_tuples_left = accessor->getNumTuples();
+ std::vector<tuple_id> batch(num_tuples_left);
+
+ do {
+ std::uint32_t batch_size =
+ batch_size_try < num_tuples_left ? batch_size_try : num_tuples_left;
+ for (std::size_t i = 0; i < batch_size; ++i) {
+ accessor->next();
+ batch.push_back(accessor->getCurrentPosition());
+ }
+
+ std::size_t num_hits =
+ bloom_filter_adapter->bulkProbe<true>(accessor, batch, batch_size);
+
+ for (std::size_t t = 0; t < num_hits; ++t){
+ tuple_id probe_tid = batch[t];
+ TypedValue key = accessor->getTypedValueAtAbsolutePosition(key_attr_id, probe_tid);
+ if (check_for_null_keys && key.isNull()) {
+ continue;
+ }
+ const std::size_t true_hash = use_scalar_literal_hash_template ? key.getHashScalarLiteral()
+ : key.getHash();
+ const std::size_t adjusted_hash = adjust_hashes_template ? this->AdjustHash(true_hash)
+ : true_hash;
+ std::size_t entry_num = 0;
+ const ValueT *value;
+ while (this->getNextEntryForKey(key, adjusted_hash, &value, &entry_num)) {
+ (*functor)(probe_tid, *value);
+ if (!allow_duplicate_keys)
break;
- }
}
}
- if (bloom_miss) {
- continue; // On a bloom filter miss, probing the hash table can be skipped.
- }
- }
+ batch.clear();
+ num_tuples_left -= batch_size;
+ batch_size_try = batch_size * 2;
+ } while (!accessor->iterationFinished());
+ }
- TypedValue key = accessor->getTypedValue(key_attr_id);
- if (check_for_null_keys && key.isNull()) {
- continue;
- }
- const std::size_t true_hash = use_scalar_literal_hash_template ? key.getHashScalarLiteral()
- : key.getHash();
- const std::size_t adjusted_hash = adjust_hashes_template ? this->AdjustHash(true_hash)
- : true_hash;
- std::size_t entry_num = 0;
- const ValueT *value;
- while (this->getNextEntryForKey(key, adjusted_hash, &value, &entry_num)) {
- (*functor)(*accessor, *value);
- if (!allow_duplicate_keys) {
- break;
+ else { // no Bloom filters to probe
+ while(accessor->next()) {
+ TypedValue key = accessor->getTypedValue(key_attr_id);
+ if (check_for_null_keys && key.isNull()) {
+ continue;
+ }
+ const std::size_t true_hash = use_scalar_literal_hash_template ? key.getHashScalarLiteral()
+ : key.getHash();
+ const std::size_t adjusted_hash = adjust_hashes_template ? this->AdjustHash(true_hash)
+ : true_hash;
+ std::size_t entry_num = 0;
+ const ValueT *value;
+ while (this->getNextEntryForKey(key, adjusted_hash, &value, &entry_num)) {
+ (*functor)(*accessor, *value);
+ if (!allow_duplicate_keys)
+ break;
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/HashTable.proto
----------------------------------------------------------------------
diff --git a/storage/HashTable.proto b/storage/HashTable.proto
index 7f00f29..6eabf60 100644
--- a/storage/HashTable.proto
+++ b/storage/HashTable.proto
@@ -34,10 +34,10 @@ message HashTable {
required HashTableImplType hash_table_impl_type = 1;
repeated Type key_types = 2;
required uint64 estimated_num_entries = 3;
- repeated uint32 build_side_bloom_filter_id = 4;
- message ProbeSideBloomFilter {
- required uint32 probe_side_bloom_filter_id = 1;
- repeated uint32 probe_side_attr_ids = 2;
+ message BloomFilter {
+ required uint32 bloom_filter_id = 1;
+ required uint32 attr_id = 2;
}
- repeated ProbeSideBloomFilter probe_side_bloom_filters = 6;
+ repeated BloomFilter probe_side_bloom_filters = 4;
+ repeated BloomFilter build_side_bloom_filters = 5;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/HashTableFactory.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTableFactory.hpp b/storage/HashTableFactory.hpp
index 34baaeb..fbb3d41 100644
--- a/storage/HashTableFactory.hpp
+++ b/storage/HashTableFactory.hpp
@@ -318,9 +318,15 @@ class HashTableFactory {
// individual implementations of the hash table constructors.
// Check if there are any build side bloom filter defined on the hash table.
- if (proto.build_side_bloom_filter_id_size() > 0) {
+ if (proto.build_side_bloom_filters_size() > 0) {
hash_table->enableBuildSideBloomFilter();
- hash_table->setBuildSideBloomFilter(bloom_filters[proto.build_side_bloom_filter_id(0)].get());
+ for (int j = 0; j < proto.build_side_bloom_filters_size(); ++j) {
+ const auto build_side_bloom_filter = proto.build_side_bloom_filters(j);
+ hash_table->addBuildSideBloomFilter(
+ bloom_filters[build_side_bloom_filter.bloom_filter_id()].get());
+
+ hash_table->addBuildSideAttributeId(build_side_bloom_filter.attr_id());
+ }
}
// Check if there are any probe side bloom filters defined on the hash table.
@@ -330,15 +336,10 @@ class HashTableFactory {
for (int j = 0; j < proto.probe_side_bloom_filters_size(); ++j) {
// Add the pointer to the probe bloom filter within the list of probe bloom filters to use.
const auto probe_side_bloom_filter = proto.probe_side_bloom_filters(j);
- hash_table->addProbeSideBloomFilter(bloom_filters[probe_side_bloom_filter.probe_side_bloom_filter_id()].get());
-
- // Add the attribute ids corresponding to this probe bloom filter.
- std::vector<attribute_id> probe_attribute_ids;
- for (int k = 0; k < probe_side_bloom_filter.probe_side_attr_ids_size(); ++k) {
- const attribute_id probe_attribute_id = probe_side_bloom_filter.probe_side_attr_ids(k);
- probe_attribute_ids.push_back(probe_attribute_id);
- }
- hash_table->addProbeSideAttributeIds(std::move(probe_attribute_ids));
+ hash_table->addProbeSideBloomFilter(
+ bloom_filters[probe_side_bloom_filter.bloom_filter_id()].get());
+
+ hash_table->addProbeSideAttributeId(probe_side_bloom_filter.attr_id());
}
}
[04/13] incubator-quickstep git commit: Cleaned up the messages w/ a
dummy payload.
Posted by ji...@apache.org.
Cleaned up the messages w/ a dummy payload.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/ccea2ff8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/ccea2ff8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/ccea2ff8
Branch: refs/heads/LIP-for-tpch
Commit: ccea2ff83ea73e950d52c152cc422a9e93cf6aad
Parents: 52a32a3
Author: Zuyu Zhang <zu...@twitter.com>
Authored: Fri Jul 29 23:52:33 2016 -0700
Committer: Zuyu Zhang <zu...@twitter.com>
Committed: Mon Aug 1 10:11:19 2016 -0700
----------------------------------------------------------------------
query_execution/ForemanSingleNode.cpp | 4 +-
query_execution/QueryExecutionMessages.proto | 4 -
query_execution/QueryExecutionUtil.hpp | 5 +-
query_execution/README.md | 110 +++++++++----------
query_execution/WorkerMessage.hpp | 13 +--
query_execution/tests/BlockLocator_unittest.cpp | 11 +-
storage/tests/DataExchange_unittest.cpp | 11 +-
7 files changed, 60 insertions(+), 98 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ccea2ff8/query_execution/ForemanSingleNode.cpp
----------------------------------------------------------------------
diff --git a/query_execution/ForemanSingleNode.cpp b/query_execution/ForemanSingleNode.cpp
index cda02a7..d2b56ae 100644
--- a/query_execution/ForemanSingleNode.cpp
+++ b/query_execution/ForemanSingleNode.cpp
@@ -167,9 +167,7 @@ void ForemanSingleNode::run() {
if (!policy_enforcer_->hasQueries()) {
// Signal the main thread that there are no queries to be executed.
// Currently the message doesn't have any real content.
- const int dummy_payload = 0;
- TaggedMessage completion_tagged_message(
- &dummy_payload, sizeof(dummy_payload), kWorkloadCompletionMessage);
+ TaggedMessage completion_tagged_message(kWorkloadCompletionMessage);
const tmb::MessageBus::SendStatus send_status =
QueryExecutionUtil::SendTMBMessage(
bus_,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ccea2ff8/query_execution/QueryExecutionMessages.proto
----------------------------------------------------------------------
diff --git a/query_execution/QueryExecutionMessages.proto b/query_execution/QueryExecutionMessages.proto
index 308d736..f2219f6 100644
--- a/query_execution/QueryExecutionMessages.proto
+++ b/query_execution/QueryExecutionMessages.proto
@@ -20,10 +20,6 @@ import "catalog/Catalog.proto";
import "query_execution/QueryContext.proto";
import "relational_operators/WorkOrder.proto";
-// Used for any messages that do not carry payloads.
-message EmptyMessage {
-}
-
// Note: There are different types of completion messages for normal work orders
// rebuild work orders. This can be potentially helpful when we want to collect
// different statistics for executing different types of work orders.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ccea2ff8/query_execution/QueryExecutionUtil.hpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryExecutionUtil.hpp b/query_execution/QueryExecutionUtil.hpp
index 6ea4a29..5994f22 100644
--- a/query_execution/QueryExecutionUtil.hpp
+++ b/query_execution/QueryExecutionUtil.hpp
@@ -123,10 +123,7 @@ class QueryExecutionUtil {
style.Broadcast(true);
Address address;
address.All(true);
- std::unique_ptr<WorkerMessage> poison_message(WorkerMessage::PoisonMessage());
- TaggedMessage poison_tagged_message(poison_message.get(),
- sizeof(*poison_message),
- kPoisonMessage);
+ TaggedMessage poison_tagged_message(kPoisonMessage);
const tmb::MessageBus::SendStatus send_status = bus->Send(
sender_id, address, style, std::move(poison_tagged_message));
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ccea2ff8/query_execution/README.md
----------------------------------------------------------------------
diff --git a/query_execution/README.md b/query_execution/README.md
index 22ad91d..12e0f57 100644
--- a/query_execution/README.md
+++ b/query_execution/README.md
@@ -3,19 +3,19 @@
## Types of threads
There are two kinds of threads in Quickstep - Foreman and Worker. The foreman
thread controls the query execution progress, finds schedulable work (called as
-WorkOrder) and assigns (or schedules) it for execution to the Worker threads.
+WorkOrder) and assigns (or schedules) it for execution to the Worker threads.
The Worker threads receive the WorkOrders and execute them. After execution they
-send a completion message (or response message) back to Foreman.
+send a completion message (or response message) back to Foreman.
## High level functionality of Foreman
-Foreman requests all the RelationalOperators in the physical query plan
+Foreman requests all the RelationalOperators in the physical query plan
represented as a DAG to give any schedulable work (in the form of WorkOrders).
While doing so, Foreman has to respect dependencies between operators. There are
-two kinds of dependencies between operators - pipeline breaking (or blocking)
+two kinds of dependencies between operators - pipeline breaking (or blocking)
and pipeline non-breaking (or non-blocking). In the first case, the output of
the producer operator can't be pipelined to the consumer operator. In the second
case, the Foreman will facilitate the pipelining of the intermediate output
-produced by the producer operator to the consumer operator.
+produced by the producer operator to the consumer operator.
## Messages in execution engine
@@ -26,110 +26,110 @@ of the message.
Foreman -> Worker : WorkerMessage which consists of the following things
- A pointer to the WorkOrder to be executed. The WorkOrder could be a normal
WorkOrder or a rebuild WorkOrder. A normal WorkOrder involves the invocation of
-WorkOrder::execute() method which is overriden by all of the RelationalOperator
-classes. A rebuild WorkOrder has one StorageBlock as input and calls a
+WorkOrder::execute() method which is overriden by all of the RelationalOperator
+classes. A rebuild WorkOrder has one StorageBlock as input and calls a
rebuild() method on the block. More details about rebuild() can be found in the
-storage module.
+storage module.
- The index of the relational operator in the query plan DAG that produced the
-WorkOrder.
-
-Main thread -> Worker : WorkerMessage of type PoisonMessage. This message is
-used to terminate the Worker thread, typically when shutting down the Quickstep
-process.
+WorkOrder.
### ForemanMessage
-Multiple senders are possible for this message. There are multiple types of
+Multiple senders are possible for this message. There are multiple types of
ForemanMessages, each of which indicates the purpose of the message.
-Worker -> Foreman : ForemanMessage of types WorkOrderCompletion and
-RebuildCompletion are sent after a Worker finishes executing a respective type
-of WorkOrder. This message helps the Foreman track the progress of individual
-operators as well as the whole query.
+Worker -> Foreman : ForemanMessage of types WorkOrderCompletion and
+RebuildCompletion are sent after a Worker finishes executing a respective type
+of WorkOrder. This message helps the Foreman track the progress of individual
+operators as well as the whole query.
Some relational operators and InsertDestination -> Foreman : ForemanMessage of
-types DataPipeline and WorkOrdersAvailable. InsertDestination first determines
+types DataPipeline and WorkOrdersAvailable. InsertDestination first determines
when an output block of a relational operator gets full. Once a block is full,
-it streams the unique block ID of the filled block along with the index of the
-relational operator that produced the block to Foreman with the message type
+it streams the unique block ID of the filled block along with the index of the
+relational operator that produced the block to Foreman with the message type
DataPipeline. Some operators which modify the block in place also send similar
-messages to Foreman.
+messages to Foreman.
### FeedbackMessage
This message is sent from Workers to the Foreman during a WorkOrder execution.
In certain operators, e.g. TextScan (used for bulk loading data from text files)
-and Sort, there is a communication between the relational operator and its
-WorkOrders. In such cases, when a WorkOrder is under execution on a Worker
+and Sort, there is a communication between the relational operator and its
+WorkOrders. In such cases, when a WorkOrder is under execution on a Worker
thread, a FeedbackMessage is sent from the WorkOrder via the Worker to Foreman.
Foreman relays this message to the relational operator that produced the sender
-WorkOrder. The relational operator uses this message to update its internal
-state to potentially generate newer WorkOrders.
+WorkOrder. The relational operator uses this message to update its internal
+state to potentially generate newer WorkOrders.
+
+### PoisonMessage
+This message is used to terminate a thread (i.e., Foreman and Worker), typically
+when shutting down the Quickstep process.
## How does the Foreman react after receiving various messages?
### WorkOrder completion message
* Update the book-keeping of pending WorkOrders per Worker and per operator.
* Fetch new WorkOrders if available for the operator of whose WorkOrder was
-just executed.
+just executed.
* Update the state of an operator - the possible options are:
- Normal WorkOrders are still under execution
- All normal WorkOrders have finished execution and rebuild WorkOrders are yet
- to be generated.
+ to be generated.
- All normal WorkOrders have finished execution, rebuild WorkOrders have been
- generated and issued to Workers.
+ generated and issued to Workers.
- All normal and rebuild WorkOrders have been executed AND all the dependency
- operators for the given operator have finished execution, therefore the given
- operator has finished its execution.
-* Fetch the WorkOrders from the dependents of the given operator.
+ operators for the given operator have finished execution, therefore the given
+ operator has finished its execution.
+* Fetch the WorkOrders from the dependents of the given operator.
### Rebuild WorkOrder completion message
* Update the book-keeping of pending WorkOrders per Worker and per operator.
* If all the rebuild WorkOrders have finished their execution, try to fetch the
WorkOrders of the dependent operators of the operator whose rebuild WorkOrder
-was just executed.
+was just executed.
### Data pipeline message
-* Find the consumer operators (i.e. operators which have a non
-pipeline-breaking link) of the producer operator.
-* Stream the block ID to the eligible consumer operators.
-* Fetch new WorkOrders from these consumer operators which may have become
-available because of the streaming of data.
+* Find the consumer operators (i.e. operators which have a non
+pipeline-breaking link) of the producer operator.
+* Stream the block ID to the eligible consumer operators.
+* Fetch new WorkOrders from these consumer operators which may have become
+available because of the streaming of data.
### WorkOrder available message
* Fetch new WorkOrders that may have become available.
### Feedback message
-* Relay the feedback message to a specified relational operator. The recipient
-operator is specified in the header of the message.
+* Relay the feedback message to a specified relational operator. The recipient
+operator is specified in the header of the message.
## Example
-We look at a sample query to better describe the flow of messages -
+We look at a sample query to better describe the flow of messages -
SELECT R.a, S.b from R, S where R.a = S.a and R.c < 20;
-This is an equi-join query which can be implemented using a hash join. We assume
-that S is a larger relation and the build relation is the output of the
+This is an equi-join query which can be implemented using a hash join. We assume
+that S is a larger relation and the build relation is the output of the
selection on R.
The query execution plan involves the following operators:
-* SelectOperator to filter R based on predicate R.c < 20 (We call the output as
-R')
+* SelectOperator to filter R based on predicate R.c < 20 (We call the output as
+R')
* BuildHashOperator to construct a hash table on R'
* HashJoinOperator to probe the hash table, where the probe relation is S
* DestroyHashTableOperator to destroy the hash table after the join is done
-* Multiple DropTableOperators to destroy the temporaray relations produced as
-output.
+* Multiple DropTableOperators to destroy the temporaray relations produced as
+output.
R has two blocks with IDs as 1 and 2. S has two blocks with IDs as 3 and 4.
-We assume that the SelectOperator produces one filled block and one partially
-filled block as output. Note that in the query plan DAG, the link between
-SelectOperator and BuildHashOperator allows streaming of data. The
-HashJoinOperator's WorkOrder can't be generated unless all of the
+We assume that the SelectOperator produces one filled block and one partially
+filled block as output. Note that in the query plan DAG, the link between
+SelectOperator and BuildHashOperator allows streaming of data. The
+HashJoinOperator's WorkOrder can't be generated unless all of the
BuildHashOperator's WorkOrders have finished their execution. The execution is
-assumed to be performed by a single Worker thread.
+assumed to be performed by a single Worker thread.
-The following table describes the message exchange that happens during the
-query excution. We primarily focus on three operators - Select, BuildHash and
-HashJoin (probe).
+The following table describes the message exchange that happens during the
+query excution. We primarily focus on three operators - Select, BuildHash and
+HashJoin (probe).
| Sender | Receiver | Message | Message Description |
|:-----------------:|----------|---------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ccea2ff8/query_execution/WorkerMessage.hpp
----------------------------------------------------------------------
diff --git a/query_execution/WorkerMessage.hpp b/query_execution/WorkerMessage.hpp
index 560c1ba..a0434de 100644
--- a/query_execution/WorkerMessage.hpp
+++ b/query_execution/WorkerMessage.hpp
@@ -35,7 +35,6 @@ class WorkerMessage {
enum class WorkerMessageType {
kRebuildWorkOrder = 0,
kWorkOrder,
- kPoison
};
/**
@@ -70,15 +69,6 @@ class WorkerMessage {
}
/**
- * @brief A static factory method for generating a poison message.
- *
- * @return The constructed PoisonMessage.
- **/
- static WorkerMessage* PoisonMessage() {
- return new WorkerMessage(nullptr, 0, WorkerMessageType::kPoison);
- }
-
- /**
* @brief Destructor.
**/
~WorkerMessage() {
@@ -128,8 +118,7 @@ class WorkerMessage {
/**
* @brief Constructor.
*
- * @param work_unit The work order to be executed by the worker. A NULL
- * workorder indicates a poison message.
+ * @param work_unit The work order to be executed by the worker.
* @param relational_op_index The index of the relational operator in the
* query plan DAG that generated the given WorkOrder.
* @param type Type of the WorkerMessage.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ccea2ff8/query_execution/tests/BlockLocator_unittest.cpp
----------------------------------------------------------------------
diff --git a/query_execution/tests/BlockLocator_unittest.cpp b/query_execution/tests/BlockLocator_unittest.cpp
index fe7b86b..fd25e9e 100644
--- a/query_execution/tests/BlockLocator_unittest.cpp
+++ b/query_execution/tests/BlockLocator_unittest.cpp
@@ -90,16 +90,7 @@ class BlockLocatorTest : public ::testing::Test {
virtual void TearDown() {
storage_manager_.reset();
- serialization::EmptyMessage proto;
-
- const int proto_length = proto.ByteSize();
- char *proto_bytes = static_cast<char*>(malloc(proto_length));
- CHECK(proto.SerializeToArray(proto_bytes, proto_length));
-
- TaggedMessage message(static_cast<const void*>(proto_bytes),
- proto_length,
- kPoisonMessage);
- free(proto_bytes);
+ TaggedMessage message(kPoisonMessage);
LOG(INFO) << "Worker (id '" << worker_client_id_
<< "') sent PoisonMessage (typed '" << kPoisonMessage
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ccea2ff8/storage/tests/DataExchange_unittest.cpp
----------------------------------------------------------------------
diff --git a/storage/tests/DataExchange_unittest.cpp b/storage/tests/DataExchange_unittest.cpp
index 38d12f6..4bad17b 100644
--- a/storage/tests/DataExchange_unittest.cpp
+++ b/storage/tests/DataExchange_unittest.cpp
@@ -105,16 +105,7 @@ class DataExchangeTest : public ::testing::Test {
data_exchanger_expected_.shutdown();
storage_manager_expected_.reset();
- serialization::EmptyMessage proto;
-
- const int proto_length = proto.ByteSize();
- char *proto_bytes = static_cast<char*>(malloc(proto_length));
- CHECK(proto.SerializeToArray(proto_bytes, proto_length));
-
- TaggedMessage message(static_cast<const void*>(proto_bytes),
- proto_length,
- kPoisonMessage);
- free(proto_bytes);
+ TaggedMessage message(kPoisonMessage);
LOG(INFO) << "Worker (id '" << worker_client_id_
<< "') sent PoisonMessage (typed '" << kPoisonMessage
[05/13] incubator-quickstep git commit: TMB: Added Support for an
Empty, but Typed TaggedMessage.
Posted by ji...@apache.org.
TMB: Added Support for an Empty, but Typed TaggedMessage.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/52a32a37
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/52a32a37
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/52a32a37
Branch: refs/heads/LIP-for-tpch
Commit: 52a32a372612a3e8f5ab268886d4b9cbb546c205
Parents: 260b862
Author: Zuyu Zhang <zu...@twitter.com>
Authored: Fri Jul 29 19:57:49 2016 -0700
Committer: Zuyu Zhang <zu...@twitter.com>
Committed: Mon Aug 1 10:11:19 2016 -0700
----------------------------------------------------------------------
third_party/tmb/include/tmb/tagged_message.h | 9 +++++++++
1 file changed, 9 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/52a32a37/third_party/tmb/include/tmb/tagged_message.h
----------------------------------------------------------------------
diff --git a/third_party/tmb/include/tmb/tagged_message.h b/third_party/tmb/include/tmb/tagged_message.h
index 49dcee7..f3a77ee 100644
--- a/third_party/tmb/include/tmb/tagged_message.h
+++ b/third_party/tmb/include/tmb/tagged_message.h
@@ -63,6 +63,15 @@ class TaggedMessage {
}
/**
+ * @brief Constructor which creates an empty, but typed message.
+ **/
+ explicit TaggedMessage(const message_type_id message_type)
+ : payload_inline_(true),
+ message_type_(message_type) {
+ payload_.in_line.size = 0;
+ }
+
+ /**
* @brief Constructor.
*
* @param msg A pointer to the message contents in memory, which will be
[06/13] incubator-quickstep git commit: Implemented hashjoin
optimization class and removed the logic from ExecutionGenerator.
Posted by ji...@apache.org.
Implemented hashjoin optimization class and removed the logic from ExecutionGenerator.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/a61b99e9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/a61b99e9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/a61b99e9
Branch: refs/heads/LIP-for-tpch
Commit: a61b99e9e1fcbbe84c60d63b8277cbb67e518030
Parents: ccea2ff
Author: Hakan Memisoglu <ha...@gmail.com>
Authored: Mon Aug 1 16:39:07 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Wed Aug 3 03:10:58 2016 -0500
----------------------------------------------------------------------
query_optimizer/CMakeLists.txt | 1 +
query_optimizer/ExecutionGenerator.cpp | 19 +-----
query_optimizer/PhysicalGenerator.cpp | 2 +
query_optimizer/physical/HashJoin.hpp | 1 +
query_optimizer/rules/BottomUpRule.hpp | 10 ++++
query_optimizer/rules/CMakeLists.txt | 12 ++++
query_optimizer/rules/SwapProbeBuild.cpp | 62 ++++++++++++++++++++
query_optimizer/rules/SwapProbeBuild.hpp | 48 +++++++++++++++
.../tests/physical_generator/Select.test | 36 ++++++------
9 files changed, 156 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a61b99e9/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index a56b714..c55881f 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -199,6 +199,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_physical_Physical
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
+ quickstep_queryoptimizer_rules_SwapProbeBuild
quickstep_queryoptimizer_strategy_Aggregate
quickstep_queryoptimizer_strategy_Join
quickstep_queryoptimizer_strategy_OneToOne
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a61b99e9/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 88103df..fb24489 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -604,6 +604,8 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
const CatalogRelation *referenced_stored_probe_relation = nullptr;
const CatalogRelation *referenced_stored_build_relation = nullptr;
+ std::size_t build_cardinality = cost_model_->estimateCardinality(build_physical);
+
bool any_probe_attributes_nullable = false;
bool any_build_attributes_nullable = false;
@@ -671,23 +673,6 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
key_types.push_back(&left_attribute_type);
}
- std::size_t probe_cardinality = cost_model_->estimateCardinality(probe_physical);
- std::size_t build_cardinality = cost_model_->estimateCardinality(build_physical);
- // For inner join, we may swap the probe table and the build table.
- if (physical_plan->join_type() == P::HashJoin::JoinType::kInnerJoin) {
- // Choose the smaller table as the inner build table,
- // and the other one as the outer probe table.
- if (probe_cardinality < build_cardinality) {
- // Switch the probe and build physical nodes.
- std::swap(probe_physical, build_physical);
- std::swap(probe_cardinality, build_cardinality);
- std::swap(probe_attribute_ids, build_attribute_ids);
- std::swap(any_probe_attributes_nullable, any_build_attributes_nullable);
- std::swap(probe_original_attribute_ids, build_original_attribute_ids);
- std::swap(referenced_stored_probe_relation, referenced_stored_build_relation);
- }
- }
-
// Convert the residual predicate proto.
QueryContext::predicate_id residual_predicate_index = QueryContext::kInvalidPredicateId;
if (physical_plan->residual_predicate()) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a61b99e9/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 75a7bc9..897b212 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -28,6 +28,7 @@
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
#include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
+#include "query_optimizer/rules/SwapProbeBuild.hpp"
#include "query_optimizer/strategy/Aggregate.hpp"
#include "query_optimizer/strategy/Join.hpp"
#include "query_optimizer/strategy/OneToOne.hpp"
@@ -98,6 +99,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
}
rules.emplace_back(new PruneColumns());
+ rules.emplace_back(new SwapProbeBuild());
for (std::unique_ptr<Rule<P::Physical>> &rule : rules) {
physical_plan_ = rule->apply(physical_plan_);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a61b99e9/query_optimizer/physical/HashJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.hpp b/query_optimizer/physical/HashJoin.hpp
index b904b5f..988b139 100644
--- a/query_optimizer/physical/HashJoin.hpp
+++ b/query_optimizer/physical/HashJoin.hpp
@@ -20,6 +20,7 @@
#ifndef QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_HASHJOIN_HPP_
#define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_HASHJOIN_HPP_
+#include <cstddef>
#include <memory>
#include <string>
#include <type_traits>
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a61b99e9/query_optimizer/rules/BottomUpRule.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/BottomUpRule.hpp b/query_optimizer/rules/BottomUpRule.hpp
index 0acc199..f98dadf 100644
--- a/query_optimizer/rules/BottomUpRule.hpp
+++ b/query_optimizer/rules/BottomUpRule.hpp
@@ -54,6 +54,7 @@ class BottomUpRule : public Rule<TreeType> {
TreeNodePtr apply(const TreeNodePtr &tree) override {
DCHECK(tree != nullptr);
+ init(tree);
std::vector<std::shared_ptr<const TreeType>> new_children;
bool has_changed_children = false;
for (const std::shared_ptr<const TreeType> &child : tree->children()) {
@@ -80,6 +81,15 @@ class BottomUpRule : public Rule<TreeType> {
*/
virtual TreeNodePtr applyToNode(const TreeNodePtr &node) = 0;
+ /**
+ * @brief Override this method to implement the initialization code
+ * for the rule.
+ *
+ * @param input The input tree.
+ */
+ virtual void init(const TreeNodePtr &input) {
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(BottomUpRule);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a61b99e9/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 1990174..04a9814 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -29,6 +29,7 @@ add_library(quickstep_queryoptimizer_rules_RuleHelper RuleHelper.cpp RuleHelper.
add_library(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
StarSchemaHashJoinOrderOptimization.cpp
StarSchemaHashJoinOrderOptimization.hpp)
+add_library(quickstep_queryoptimizer_rules_SwapProbeBuild SwapProbeBuild.cpp SwapProbeBuild.hpp)
add_library(quickstep_queryoptimizer_rules_TopDownRule ../../empty_src.cpp TopDownRule.hpp)
add_library(quickstep_queryoptimizer_rules_UpdateExpression UpdateExpression.cpp UpdateExpression.hpp)
add_library(quickstep_queryoptimizer_rules_UnnestSubqueries UnnestSubqueries.cpp UnnestSubqueries.hpp)
@@ -127,6 +128,16 @@ target_link_libraries(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOpti
quickstep_queryoptimizer_physical_TopLevelPlan
quickstep_queryoptimizer_rules_Rule
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_SwapProbeBuild
+ quickstep_queryoptimizer_costmodel_SimpleCostModel
+ quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_TopLevelPlan
+ quickstep_queryoptimizer_rules_BottomUpRule
+ quickstep_queryoptimizer_rules_Rule
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_rules_TopDownRule
quickstep_queryoptimizer_rules_Rule
quickstep_utility_Macros)
@@ -185,6 +196,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
quickstep_queryoptimizer_rules_Rule
quickstep_queryoptimizer_rules_RuleHelper
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
+ quickstep_queryoptimizer_rules_SwapProbeBuild
quickstep_queryoptimizer_rules_TopDownRule
quickstep_queryoptimizer_rules_UpdateExpression
quickstep_queryoptimizer_rules_UnnestSubqueries)
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a61b99e9/query_optimizer/rules/SwapProbeBuild.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/SwapProbeBuild.cpp b/query_optimizer/rules/SwapProbeBuild.cpp
new file mode 100644
index 0000000..4b4b38f
--- /dev/null
+++ b/query_optimizer/rules/SwapProbeBuild.cpp
@@ -0,0 +1,62 @@
+#include "query_optimizer/rules/SwapProbeBuild.hpp"
+
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+
+
+namespace quickstep {
+namespace optimizer {
+
+P::PhysicalPtr SwapProbeBuild::applyToNode(const P::PhysicalPtr &input) {
+ P::HashJoinPtr hash_join;
+
+ if (P::SomeHashJoin::MatchesWithConditionalCast(input, &hash_join)
+ && hash_join->join_type() == P::HashJoin::JoinType::kInnerJoin) {
+ P::PhysicalPtr left = hash_join->left();
+ P::PhysicalPtr right = hash_join->right();
+
+ std::size_t left_cardinality = cost_model_->estimateCardinality(left);
+ std::size_t right_cardinality = cost_model_->estimateCardinality(right);
+
+ if (right_cardinality > left_cardinality) {
+ std::vector<E::AttributeReferencePtr> left_join_attributes = hash_join->left_join_attributes();
+ std::vector<E::AttributeReferencePtr> right_join_attributes = hash_join->right_join_attributes();
+
+ P::PhysicalPtr output = P::HashJoin::Create(right,
+ left,
+ right_join_attributes,
+ left_join_attributes,
+ hash_join->residual_predicate(),
+ hash_join->project_expressions(),
+ hash_join->join_type());
+ LOG_APPLYING_RULE(input, output);
+ return output;
+ }
+ }
+
+ LOG_IGNORING_RULE(input);
+ return input;
+}
+
+void SwapProbeBuild::init(const P::PhysicalPtr &input) {
+ if (cost_model_ == nullptr) {
+ P::TopLevelPlanPtr top_level;
+ if (P::SomeTopLevelPlan::MatchesWithConditionalCast(input, &top_level)) {
+ cost_model_.reset(new C::SimpleCostModel(top_level->shared_subplans()));
+ } else {
+ std::vector<P::PhysicalPtr> plans = {input};
+ cost_model_.reset(new C::SimpleCostModel(plans));
+ }
+ }
+}
+
+} // namespace optimizer
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a61b99e9/query_optimizer/rules/SwapProbeBuild.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/SwapProbeBuild.hpp b/query_optimizer/rules/SwapProbeBuild.hpp
new file mode 100644
index 0000000..da8fdf9
--- /dev/null
+++ b/query_optimizer/rules/SwapProbeBuild.hpp
@@ -0,0 +1,48 @@
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_SWAP_PROBE_BUILD_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_SWAP_PROBE_BUILD_HPP_
+
+#include <memory>
+#include <string>
+
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "query_optimizer/rules/BottomUpRule.hpp"
+#include "query_optimizer/cost_model/SimpleCostModel.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+namespace P = ::quickstep::optimizer::physical;
+namespace E = ::quickstep::optimizer::expressions;
+namespace C = ::quickstep::optimizer::cost;
+
+/**
+ * @brief Rule that applies to a physical plan to arrange probe and
+ * build side based on the cardinalities.
+ */
+class SwapProbeBuild : public BottomUpRule<P::Physical> {
+ public:
+ SwapProbeBuild() {
+ }
+
+ std::string getName() const override { return "SwapProbeBuild"; }
+
+ protected:
+ P::PhysicalPtr applyToNode(const P::PhysicalPtr &input) override;
+ void init(const P::PhysicalPtr &input) override;
+
+ private:
+ std::unique_ptr<C::SimpleCostModel> cost_model_;
+
+ DISALLOW_COPY_AND_ASSIGN(SwapProbeBuild);
+};
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a61b99e9/query_optimizer/tests/physical_generator/Select.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/physical_generator/Select.test b/query_optimizer/tests/physical_generator/Select.test
index d99916c..76c5683 100644
--- a/query_optimizer/tests/physical_generator/Select.test
+++ b/query_optimizer/tests/physical_generator/Select.test
@@ -2232,10 +2232,7 @@ TopLevelPlan
[Physical Plan]
TopLevelPlan
+-plan=HashJoin
-| +-left=TableReference[relation=b]
-| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
-| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
-| +-right=Aggregate
+| +-left=Aggregate
| | +-input=TableReference[relation=c]
| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
| | | +-AttributeReference[id=3,name=y,relation=c,type=Int]
@@ -2245,6 +2242,9 @@ TopLevelPlan
| | +-Alias[id=4,name=,alias=$aggregate0,relation=$aggregate,type=Long NULL]
| | +-AggregateFunction[function=SUM]
| | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| +-right=TableReference[relation=b]
+| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
| +-project_expressions=
| | +-Alias[id=5,name=,alias=(x+SubqueryExpression),relation=,type=Long NULL]
| | +-Add
@@ -2252,9 +2252,9 @@ TopLevelPlan
| | +-AttributeReference[id=4,name=,alias=$aggregate0,relation=$aggregate,
| | type=Long NULL]
| +-left_join_attributes=
-| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | +-AttributeReference[id=2,name=x,relation=c,type=Int]
| +-right_join_attributes=
-| +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| +-AttributeReference[id=0,name=w,relation=b,type=Int]
+-output_attributes=
+-AttributeReference[id=5,name=,alias=(x+SubqueryExpression),relation=,
type=Long NULL]
@@ -2369,10 +2369,7 @@ TopLevelPlan
[Physical Plan]
TopLevelPlan
+-plan=HashJoin
-| +-left=TableReference[relation=b]
-| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
-| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
-| +-right=Aggregate
+| +-left=Aggregate
| | +-input=TableReference[relation=c]
| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
| | | +-AttributeReference[id=3,name=y,relation=c,type=Int]
@@ -2382,6 +2379,9 @@ TopLevelPlan
| | +-Alias[id=4,name=,alias=$aggregate0,relation=$aggregate,type=Long NULL]
| | +-AggregateFunction[function=SUM]
| | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| +-right=TableReference[relation=b]
+| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
| +-residual_predicate=Greater
| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
| | +-AttributeReference[id=4,name=,alias=$aggregate0,relation=$aggregate,
@@ -2390,9 +2390,9 @@ TopLevelPlan
| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
| +-left_join_attributes=
-| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | +-AttributeReference[id=2,name=x,relation=c,type=Int]
| +-right_join_attributes=
-| +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| +-AttributeReference[id=0,name=w,relation=b,type=Int]
+-output_attributes=
+-AttributeReference[id=0,name=w,relation=b,type=Int]
+-AttributeReference[id=1,name=x,relation=b,type=Int]
@@ -2476,10 +2476,7 @@ TopLevelPlan
[Physical Plan]
TopLevelPlan
+-plan=HashJoin
-| +-left=TableReference[relation=b]
-| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
-| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
-| +-right=NestedLoopsJoin
+| +-left=NestedLoopsJoin
| | +-left=Aggregate
| | | +-input=TableReference[relation=c]
| | | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
@@ -2518,6 +2515,9 @@ TopLevelPlan
| | | type=Long NULL]
| | +-AttributeReference[id=9,name=,alias=$aggregate0,relation=$aggregate,
| | type=Long NULL]
+| +-right=TableReference[relation=b]
+| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
| +-project_expressions=
| | +-Alias[id=11,name=,alias=(x+SubqueryExpression),relation=,type=Long NULL]
| | +-Add
@@ -2525,9 +2525,9 @@ TopLevelPlan
| | +-AttributeReference[id=10,name=,alias=(SUM(y)+SubqueryExpression),
| | relation=,type=Long NULL]
| +-left_join_attributes=
-| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | +-AttributeReference[id=2,name=x,relation=c,type=Int]
| +-right_join_attributes=
-| +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| +-AttributeReference[id=0,name=w,relation=b,type=Int]
+-output_attributes=
+-AttributeReference[id=11,name=,alias=(x+SubqueryExpression),relation=,
type=Long NULL]
[09/13] incubator-quickstep git commit: Add visualization for
execution plan DAGs combined with profiling stats
Posted by ji...@apache.org.
Add visualization for execution plan DAGs combined with profiling stats
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/1b07eaae
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/1b07eaae
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/1b07eaae
Branch: refs/heads/LIP-for-tpch
Commit: 1b07eaae6f3a1b591960a331190dd4d7634426bf
Parents: 8cd5a56
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Tue Aug 2 16:57:47 2016 -0500
Committer: Harshad Deshmukh <hb...@apache.org>
Committed: Wed Aug 3 17:37:31 2016 -0500
----------------------------------------------------------------------
CMakeLists.txt | 1 +
cli/QuickstepCli.cpp | 19 +-
query_execution/ForemanSingleNode.cpp | 16 +-
query_execution/ForemanSingleNode.hpp | 11 +
query_execution/PolicyEnforcerBase.cpp | 16 +-
query_execution/PolicyEnforcerBase.hpp | 15 +-
query_execution/QueryExecutionMessages.proto | 10 +-
query_execution/QueryExecutionTypedefs.hpp | 15 ++
query_execution/Worker.cpp | 15 +-
.../tests/QueryManagerSingleNode_unittest.cpp | 5 +
relational_operators/AggregationOperator.hpp | 11 +
relational_operators/BuildHashOperator.hpp | 13 ++
relational_operators/CreateIndexOperator.hpp | 4 +
relational_operators/CreateTableOperator.hpp | 5 +
relational_operators/DeleteOperator.hpp | 5 +
relational_operators/DestroyHashOperator.hpp | 6 +
relational_operators/DropTableOperator.hpp | 5 +
.../FinalizeAggregationOperator.hpp | 5 +
relational_operators/HashJoinOperator.hpp | 24 ++
relational_operators/InsertOperator.hpp | 5 +
.../NestedLoopsJoinOperator.hpp | 5 +
relational_operators/RelationalOperator.hpp | 17 ++
relational_operators/SampleOperator.hpp | 5 +
relational_operators/SaveBlocksOperator.hpp | 5 +
relational_operators/SelectOperator.hpp | 9 +
relational_operators/SortMergeRunOperator.hpp | 5 +
.../SortRunGenerationOperator.hpp | 5 +
relational_operators/TableGeneratorOperator.hpp | 5 +
relational_operators/TextScanOperator.hpp | 4 +
relational_operators/UpdateOperator.hpp | 5 +
.../WindowAggregationOperator.hpp | 5 +
utility/CMakeLists.txt | 14 ++
utility/ExecutionDAGVisualizer.cpp | 230 +++++++++++++++++++
utility/ExecutionDAGVisualizer.hpp | 112 +++++++++
34 files changed, 600 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0bbde61..3192713 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -770,6 +770,7 @@ target_link_libraries(quickstep_cli_shell
quickstep_queryoptimizer_QueryProcessor
quickstep_storage_PreloaderThread
quickstep_threading_ThreadIDBasedMap
+ quickstep_utility_ExecutionDAGVisualizer
quickstep_utility_Macros
quickstep_utility_PtrVector
quickstep_utility_SqlError
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/cli/QuickstepCli.cpp
----------------------------------------------------------------------
diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp
index 68a3599..154c689 100644
--- a/cli/QuickstepCli.cpp
+++ b/cli/QuickstepCli.cpp
@@ -75,6 +75,7 @@ typedef quickstep::LineReaderDumb LineReaderImpl;
#include "storage/PreloaderThread.hpp"
#include "threading/ThreadIDBasedMap.hpp"
+#include "utility/ExecutionDAGVisualizer.hpp"
#include "utility/Macros.hpp"
#include "utility/PtrVector.hpp"
#include "utility/SqlError.hpp"
@@ -185,6 +186,10 @@ DEFINE_string(profile_file_name, "",
// To put things in perspective, the first run is, in my experiments, about 5-10
// times more expensive than the average run. That means the query needs to be
// run at least a hundred times to make the impact of the first run small (< 5 %).
+DEFINE_bool(visualize_execution_dag, false,
+ "If true, visualize the execution plan DAG into a graph in DOT "
+ "format (DOT is a plain text graph description language) which is "
+ "then printed via stderr.");
} // namespace quickstep
@@ -361,7 +366,7 @@ int main(int argc, char* argv[]) {
query_processor->getStorageManager(),
-1, // Don't pin the Foreman thread.
num_numa_nodes_system,
- quickstep::FLAGS_profile_and_report_workorder_perf);
+ quickstep::FLAGS_profile_and_report_workorder_perf || quickstep::FLAGS_visualize_execution_dag);
// Start the worker threads.
for (Worker &worker : workers) {
@@ -434,6 +439,12 @@ int main(int argc, char* argv[]) {
}
DCHECK(query_handle->getQueryPlanMutable() != nullptr);
+ std::unique_ptr<quickstep::ExecutionDAGVisualizer> dag_visualizer;
+ if (quickstep::FLAGS_visualize_execution_dag) {
+ dag_visualizer.reset(
+ new quickstep::ExecutionDAGVisualizer(*query_handle->getQueryPlanMutable()));
+ }
+
start = std::chrono::steady_clock::now();
QueryExecutionUtil::ConstructAndSendAdmitRequestMessage(
main_thread_client_id,
@@ -471,6 +482,12 @@ int main(int argc, char* argv[]) {
foreman.printWorkOrderProfilingResults(query_handle->query_id(),
stdout);
}
+ if (quickstep::FLAGS_visualize_execution_dag) {
+ const auto &profiling_stats =
+ foreman.getWorkOrderProfilingResults(query_handle->query_id());
+ dag_visualizer->bindProfilingStats(profiling_stats);
+ std::cerr << "\n" << dag_visualizer->toDOT() << "\n";
+ }
} catch (const std::exception &e) {
fprintf(stderr, "QUERY EXECUTION ERROR: %s\n", e.what());
break;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/query_execution/ForemanSingleNode.cpp
----------------------------------------------------------------------
diff --git a/query_execution/ForemanSingleNode.cpp b/query_execution/ForemanSingleNode.cpp
index d2b56ae..f935a0b 100644
--- a/query_execution/ForemanSingleNode.cpp
+++ b/query_execution/ForemanSingleNode.cpp
@@ -236,22 +236,26 @@ void ForemanSingleNode::sendWorkerMessage(const size_t worker_thread_index,
<< worker_directory_->getClientID(worker_thread_index);
}
+const std::vector<WorkOrderTimeEntry>& ForemanSingleNode
+ ::getWorkOrderProfilingResults(const std::size_t query_id) const {
+ return policy_enforcer_->getProfilingResults(query_id);
+}
+
void ForemanSingleNode::printWorkOrderProfilingResults(const std::size_t query_id,
std::FILE *out) const {
- const std::vector<
- std::tuple<std::size_t, std::size_t, std::size_t>>
- &recorded_times = policy_enforcer_->getProfilingResults(query_id);
+ const std::vector<WorkOrderTimeEntry> &recorded_times =
+ policy_enforcer_->getProfilingResults(query_id);
fputs("Query ID,Worker ID,NUMA Socket,Operator ID,Time (microseconds)\n", out);
for (auto workorder_entry : recorded_times) {
// Note: Index of the "worker thread index" in the tuple is 0.
- const std::size_t worker_id = std::get<0>(workorder_entry);
+ const std::size_t worker_id = workorder_entry.worker_id;
fprintf(out,
"%lu,%lu,%d,%lu,%lu\n",
query_id,
worker_id,
worker_directory_->getNUMANode(worker_id),
- std::get<1>(workorder_entry), // Operator ID.
- std::get<2>(workorder_entry)); // Time.
+ workorder_entry.operator_id, // Operator ID.
+ workorder_entry.end_time - workorder_entry.start_time); // Time.
}
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/query_execution/ForemanSingleNode.hpp
----------------------------------------------------------------------
diff --git a/query_execution/ForemanSingleNode.hpp b/query_execution/ForemanSingleNode.hpp
index caef5e0..d999095 100644
--- a/query_execution/ForemanSingleNode.hpp
+++ b/query_execution/ForemanSingleNode.hpp
@@ -76,6 +76,17 @@ class ForemanSingleNode final : public ForemanBase {
~ForemanSingleNode() override {}
+
+ /**
+ * @brief Get the results of profiling individual work orders for a given
+ * query.
+ *
+ * @param query_id The ID of the query for which the results are to be printed.
+ * @return A vector of tuples, each being a single profiling entry.
+ **/
+ const std::vector<WorkOrderTimeEntry>& getWorkOrderProfilingResults(
+ const std::size_t query_id) const;
+
/**
* @brief Print the results of profiling individual work orders for a given
* query.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/query_execution/PolicyEnforcerBase.cpp
----------------------------------------------------------------------
diff --git a/query_execution/PolicyEnforcerBase.cpp b/query_execution/PolicyEnforcerBase.cpp
index d16a502..3371d6d 100644
--- a/query_execution/PolicyEnforcerBase.cpp
+++ b/query_execution/PolicyEnforcerBase.cpp
@@ -28,6 +28,7 @@
#include "catalog/PartitionScheme.hpp"
#include "query_execution/QueryExecutionMessages.pb.h"
#include "query_execution/QueryExecutionState.hpp"
+#include "query_execution/QueryExecutionTypedefs.hpp"
#include "query_execution/QueryManagerBase.hpp"
#include "relational_operators/WorkOrder.hpp"
#include "storage/StorageBlockInfo.hpp"
@@ -165,13 +166,14 @@ bool PolicyEnforcerBase::admitQueries(
void PolicyEnforcerBase::recordTimeForWorkOrder(
const serialization::NormalWorkOrderCompletionMessage &proto) {
const std::size_t query_id = proto.query_id();
- if (workorder_time_recorder_.find(query_id) == workorder_time_recorder_.end()) {
- workorder_time_recorder_[query_id];
- }
- workorder_time_recorder_[query_id].emplace_back(
- proto.worker_thread_index(),
- proto.operator_index(),
- proto.execution_time_in_microseconds());
+ std::vector<WorkOrderTimeEntry> &workorder_time_entries
+ = workorder_time_recorder_[query_id];
+ workorder_time_entries.emplace_back();
+ WorkOrderTimeEntry &entry = workorder_time_entries.back();
+ entry.worker_id = proto.worker_thread_index(),
+ entry.operator_id = proto.operator_index(),
+ entry.start_time = proto.execution_start_time(),
+ entry.end_time = proto.execution_end_time();
}
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/query_execution/PolicyEnforcerBase.hpp
----------------------------------------------------------------------
diff --git a/query_execution/PolicyEnforcerBase.hpp b/query_execution/PolicyEnforcerBase.hpp
index 0482ebc..15bc118 100644
--- a/query_execution/PolicyEnforcerBase.hpp
+++ b/query_execution/PolicyEnforcerBase.hpp
@@ -126,8 +126,8 @@ class PolicyEnforcerBase {
*
* @return A vector of tuples, each being a single profiling entry.
**/
- inline const std::vector<std::tuple<std::size_t, std::size_t, std::size_t>>&
- getProfilingResults(const std::size_t query_id) const {
+ inline const std::vector<WorkOrderTimeEntry>& getProfilingResults(
+ const std::size_t query_id) const {
DCHECK(profile_individual_workorders_);
DCHECK(workorder_time_recorder_.find(query_id) !=
workorder_time_recorder_.end());
@@ -158,16 +158,7 @@ class PolicyEnforcerBase {
// The queries which haven't been admitted yet.
std::queue<QueryHandle*> waiting_queries_;
- // Key = Query ID.
- // Value = A tuple indicating a record of executing a work order.
- // Within a tuple ...
- // 1st element: Logical worker ID.
- // 2nd element: Operator ID.
- // 3rd element: Time in microseconds to execute the work order.
- std::unordered_map<
- std::size_t,
- std::vector<std::tuple<std::size_t, std::size_t, std::size_t>>>
- workorder_time_recorder_;
+ WorkOrderTimeRecorder workorder_time_recorder_;
private:
/**
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/query_execution/QueryExecutionMessages.proto
----------------------------------------------------------------------
diff --git a/query_execution/QueryExecutionMessages.proto b/query_execution/QueryExecutionMessages.proto
index f2219f6..5a089d2 100644
--- a/query_execution/QueryExecutionMessages.proto
+++ b/query_execution/QueryExecutionMessages.proto
@@ -38,7 +38,10 @@ message NormalWorkOrderCompletionMessage {
required uint64 operator_index = 1;
required uint64 worker_thread_index = 2;
required uint64 query_id = 3;
- optional uint64 execution_time_in_microseconds = 4;
+
+ // Epoch time in microseconds.
+ optional uint64 execution_start_time = 4;
+ optional uint64 execution_end_time = 5;
}
// A message sent upon completion of a rebuild WorkOrder execution.
@@ -46,7 +49,10 @@ message RebuildWorkOrderCompletionMessage {
required uint64 operator_index = 1;
required uint64 worker_thread_index = 2;
required uint64 query_id = 3;
- optional uint64 execution_time_in_microseconds = 4;
+
+ // Epoch time in microseconds.
+ optional uint64 execution_start_time = 4;
+ optional uint64 execution_end_time = 5;
}
message CatalogRelationNewBlockMessage {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/query_execution/QueryExecutionTypedefs.hpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryExecutionTypedefs.hpp b/query_execution/QueryExecutionTypedefs.hpp
index b67209f..4bbab59 100644
--- a/query_execution/QueryExecutionTypedefs.hpp
+++ b/query_execution/QueryExecutionTypedefs.hpp
@@ -18,6 +18,9 @@
#ifndef QUICKSTEP_QUERY_EXECUTION_QUERY_EXECUTION_TYPEDEFS_HPP_
#define QUICKSTEP_QUERY_EXECUTION_QUERY_EXECUTION_TYPEDEFS_HPP_
+#include <unordered_map>
+#include <vector>
+
#include "query_optimizer/QueryOptimizerConfig.h" // For QUICKSTEP_DISTRIBUTED
#include "threading/ThreadIDBasedMap.hpp"
@@ -98,6 +101,18 @@ enum QueryExecutionMessageType : message_type_id {
#endif
};
+// WorkOrder profiling data structures.
+// Profiling record for an individual work order.
+struct WorkOrderTimeEntry {
+ std::size_t worker_id;
+ std::size_t operator_id;
+ std::size_t start_time; // Epoch time measured in microseconds
+ std::size_t end_time; // Epoch time measured in microseconds
+};
+// Key = query ID.
+// Value = vector of work order profiling records.
+typedef std::unordered_map<std::size_t, std::vector<WorkOrderTimeEntry>> WorkOrderTimeRecorder;
+
/** @} */
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/query_execution/Worker.cpp
----------------------------------------------------------------------
diff --git a/query_execution/Worker.cpp b/query_execution/Worker.cpp
index 6ba27f1..a582132 100644
--- a/query_execution/Worker.cpp
+++ b/query_execution/Worker.cpp
@@ -120,14 +120,21 @@ void Worker::executeWorkOrderHelper(const TaggedMessage &tagged_message,
worker_message.getWorkOrder()->execute();
end = std::chrono::steady_clock::now();
delete worker_message.getWorkOrder();
- const uint64_t execution_time_microseconds =
- std::chrono::duration_cast<std::chrono::microseconds>(end - start)
- .count();
+
+ // Convert the measured timestamps to epoch times in microseconds.
+ const uint64_t execution_start_time =
+ std::chrono::duration_cast<std::chrono::microseconds>(
+ start.time_since_epoch()).count();
+ const uint64_t execution_end_time =
+ std::chrono::duration_cast<std::chrono::microseconds>(
+ end.time_since_epoch()).count();
+
// Construct the proto message.
proto->set_operator_index(worker_message.getRelationalOpIndex());
proto->set_query_id(query_id_for_workorder);
proto->set_worker_thread_index(worker_thread_index_);
- proto->set_execution_time_in_microseconds(execution_time_microseconds);
+ proto->set_execution_start_time(execution_start_time);
+ proto->set_execution_end_time(execution_end_time);
}
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/query_execution/tests/QueryManagerSingleNode_unittest.cpp
----------------------------------------------------------------------
diff --git a/query_execution/tests/QueryManagerSingleNode_unittest.cpp b/query_execution/tests/QueryManagerSingleNode_unittest.cpp
index 39ca58c..09ae6ba 100644
--- a/query_execution/tests/QueryManagerSingleNode_unittest.cpp
+++ b/query_execution/tests/QueryManagerSingleNode_unittest.cpp
@@ -17,6 +17,7 @@
#include <climits>
#include <memory>
+#include <string>
#include <utility>
#include <vector>
@@ -104,6 +105,10 @@ class MockOperator: public RelationalOperator {
num_calls_donefeedingblocks_(0) {
}
+ std::string getName() const override {
+ return "MockOperator";
+ }
+
#define MOCK_OP_LOG(x) VLOG(x) << "Op[" << op_index_ << "]: " << __func__ << ": "
// The methods below are used to check whether QueryManager calls the Relational
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/AggregationOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/AggregationOperator.hpp b/relational_operators/AggregationOperator.hpp
index 4bcbcf6..5bbf2f9 100644
--- a/relational_operators/AggregationOperator.hpp
+++ b/relational_operators/AggregationOperator.hpp
@@ -18,6 +18,7 @@
#ifndef QUICKSTEP_RELATIONAL_OPERATORS_AGGREGATION_OPERATOR_HPP_
#define QUICKSTEP_RELATIONAL_OPERATORS_AGGREGATION_OPERATOR_HPP_
+#include <string>
#include <vector>
#include "catalog/CatalogRelation.hpp"
@@ -68,6 +69,7 @@ class AggregationOperator : public RelationalOperator {
bool input_relation_is_stored,
const QueryContext::aggregation_state_id aggr_state_index)
: RelationalOperator(query_id),
+ input_relation_(input_relation),
input_relation_is_stored_(input_relation_is_stored),
input_relation_block_ids_(input_relation_is_stored ? input_relation.getBlocksSnapshot()
: std::vector<block_id>()),
@@ -77,6 +79,14 @@ class AggregationOperator : public RelationalOperator {
~AggregationOperator() override {}
+ std::string getName() const override {
+ return "AggregationOperator";
+ }
+
+ const CatalogRelation& input_relation() const {
+ return input_relation_;
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
@@ -103,6 +113,7 @@ class AggregationOperator : public RelationalOperator {
**/
serialization::WorkOrder* createWorkOrderProto(const block_id block);
+ const CatalogRelation &input_relation_;
const bool input_relation_is_stored_;
std::vector<block_id> input_relation_block_ids_;
const QueryContext::aggregation_state_id aggr_state_index_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/BuildHashOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/BuildHashOperator.hpp b/relational_operators/BuildHashOperator.hpp
index 464bbf8..41346c8 100644
--- a/relational_operators/BuildHashOperator.hpp
+++ b/relational_operators/BuildHashOperator.hpp
@@ -18,6 +18,7 @@
#ifndef QUICKSTEP_RELATIONAL_OPERATORS_BUILD_HASH_OPERATOR_HPP_
#define QUICKSTEP_RELATIONAL_OPERATORS_BUILD_HASH_OPERATOR_HPP_
+#include <string>
#include <utility>
#include <vector>
@@ -93,6 +94,14 @@ class BuildHashOperator : public RelationalOperator {
~BuildHashOperator() override {}
+ const CatalogRelation& input_relation() const {
+ return input_relation_;
+ }
+
+ std::string getName() const override {
+ return "BuildHashOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
@@ -196,6 +205,10 @@ class BuildHashWorkOrder : public WorkOrder {
~BuildHashWorkOrder() override {}
+ const CatalogRelationSchema& input_relation() const {
+ return input_relation_;
+ }
+
void execute() override;
private:
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/CreateIndexOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/CreateIndexOperator.hpp b/relational_operators/CreateIndexOperator.hpp
index 18ca656..4e05448 100644
--- a/relational_operators/CreateIndexOperator.hpp
+++ b/relational_operators/CreateIndexOperator.hpp
@@ -69,6 +69,10 @@ class CreateIndexOperator : public RelationalOperator {
~CreateIndexOperator() override {}
+ std::string getName() const override {
+ return "CreateIndexOperator";
+ }
+
/**
* @note No WorkOrder generated for this operator.
**/
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/CreateTableOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/CreateTableOperator.hpp b/relational_operators/CreateTableOperator.hpp
index 6d91142..7786cef 100644
--- a/relational_operators/CreateTableOperator.hpp
+++ b/relational_operators/CreateTableOperator.hpp
@@ -19,6 +19,7 @@
#define QUICKSTEP_RELATIONAL_OPERATORS_CREATE_TABLE_OPERATOR_HPP_
#include <cstddef>
+#include <string>
#include <memory>
#include "catalog/CatalogRelation.hpp"
@@ -66,6 +67,10 @@ class CreateTableOperator : public RelationalOperator {
~CreateTableOperator() override {}
+ std::string getName() const override {
+ return "CreateTableOperator";
+ }
+
/**
* @note No WorkOrder generated for this operator.
**/
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/DeleteOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/DeleteOperator.hpp b/relational_operators/DeleteOperator.hpp
index 74da8c1..6bb2075 100644
--- a/relational_operators/DeleteOperator.hpp
+++ b/relational_operators/DeleteOperator.hpp
@@ -19,6 +19,7 @@
#define QUICKSTEP_RELATIONAL_OPERATORS_DELETE_OPERATOR_HPP_
#include <cstddef>
+#include <string>
#include <vector>
#include "catalog/CatalogRelation.hpp"
@@ -81,6 +82,10 @@ class DeleteOperator : public RelationalOperator {
~DeleteOperator() override {}
+ std::string getName() const override {
+ return "DeleteOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/DestroyHashOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/DestroyHashOperator.hpp b/relational_operators/DestroyHashOperator.hpp
index 181386f..fc48ef9 100644
--- a/relational_operators/DestroyHashOperator.hpp
+++ b/relational_operators/DestroyHashOperator.hpp
@@ -18,6 +18,8 @@
#ifndef QUICKSTEP_RELATIONAL_OPERATORS_DESTROY_HASH_OPERATOR_HPP_
#define QUICKSTEP_RELATIONAL_OPERATORS_DESTROY_HASH_OPERATOR_HPP_
+#include <string>
+
#include "query_execution/QueryContext.hpp"
#include "relational_operators/RelationalOperator.hpp"
#include "relational_operators/WorkOrder.hpp"
@@ -58,6 +60,10 @@ class DestroyHashOperator : public RelationalOperator {
~DestroyHashOperator() override {}
+ std::string getName() const override {
+ return "DestroyHashOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/DropTableOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/DropTableOperator.hpp b/relational_operators/DropTableOperator.hpp
index 6c7fca3..ab3344d 100644
--- a/relational_operators/DropTableOperator.hpp
+++ b/relational_operators/DropTableOperator.hpp
@@ -19,6 +19,7 @@
#define QUICKSTEP_RELATIONAL_OPERATORS_DROP_TABLE_OPERATOR_HPP_
#include <cstddef>
+#include <string>
#include <utility>
#include <vector>
@@ -74,6 +75,10 @@ class DropTableOperator : public RelationalOperator {
~DropTableOperator() override {}
+ std::string getName() const override {
+ return "DropTableOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/FinalizeAggregationOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/FinalizeAggregationOperator.hpp b/relational_operators/FinalizeAggregationOperator.hpp
index 158a637..af11bc3 100644
--- a/relational_operators/FinalizeAggregationOperator.hpp
+++ b/relational_operators/FinalizeAggregationOperator.hpp
@@ -19,6 +19,7 @@
#define QUICKSTEP_RELATIONAL_OPERATORS_FINALIZE_AGGREGATION_OPERATOR_HPP_
#include <cstddef>
+#include <string>
#include <memory>
#include "catalog/CatalogRelation.hpp"
@@ -74,6 +75,10 @@ class FinalizeAggregationOperator : public RelationalOperator {
~FinalizeAggregationOperator() override {}
+ std::string getName() const override {
+ return "FinalizeAggregationOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index 5d3d7da..235bfe4 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -22,6 +22,7 @@
#include <cstddef>
#include <memory>
+#include <string>
#include <utility>
#include <vector>
@@ -157,6 +158,29 @@ class HashJoinOperator : public RelationalOperator {
~HashJoinOperator() override {}
+ std::string getName() const override {
+ switch (join_type_) {
+ case JoinType::kInnerJoin:
+ return "HashJoinOperator";
+ case JoinType::kLeftSemiJoin:
+ return "HashJoinOperator(LeftSemi)";
+ case JoinType::kLeftAntiJoin:
+ return "HashJoinOperator(LeftAnti)";
+ case JoinType::kLeftOuterJoin:
+ return "HashJoinOperator(LeftOuter)";
+ default: break;
+ }
+ LOG(FATAL) << "Unknown join type in HashJoinOperator::getName()";
+ }
+
+ const CatalogRelation& build_relation() const {
+ return build_relation_;
+ }
+
+ const CatalogRelation& probe_relation() const {
+ return probe_relation_;
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/InsertOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/InsertOperator.hpp b/relational_operators/InsertOperator.hpp
index 78f5199..bf9c56a 100644
--- a/relational_operators/InsertOperator.hpp
+++ b/relational_operators/InsertOperator.hpp
@@ -19,6 +19,7 @@
#define QUICKSTEP_RELATIONAL_OPERATORS_INSERT_OPERATOR_HPP_
#include <cstddef>
+#include <string>
#include <memory>
#include "catalog/CatalogRelation.hpp"
@@ -73,6 +74,10 @@ class InsertOperator : public RelationalOperator {
~InsertOperator() override {}
+ std::string getName() const override {
+ return "InsertOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/NestedLoopsJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/NestedLoopsJoinOperator.hpp b/relational_operators/NestedLoopsJoinOperator.hpp
index 992e76d..041b8e9 100644
--- a/relational_operators/NestedLoopsJoinOperator.hpp
+++ b/relational_operators/NestedLoopsJoinOperator.hpp
@@ -20,6 +20,7 @@
#include <cstddef>
#include <memory>
+#include <string>
#include <vector>
#include "catalog/CatalogRelation.hpp"
@@ -116,6 +117,10 @@ class NestedLoopsJoinOperator : public RelationalOperator {
~NestedLoopsJoinOperator() override {}
+ std::string getName() const override {
+ return "NestedLoopsJoinOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/RelationalOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/RelationalOperator.hpp b/relational_operators/RelationalOperator.hpp
index 116727b..b8d1bd0 100644
--- a/relational_operators/RelationalOperator.hpp
+++ b/relational_operators/RelationalOperator.hpp
@@ -19,6 +19,7 @@
#define QUICKSTEP_RELATIONAL_OPERATORS_RELATIONAL_OPERATOR_HPP_
#include <cstddef>
+#include <string>
#include <vector>
#include "catalog/CatalogTypedefs.hpp"
@@ -55,6 +56,13 @@ class RelationalOperator {
virtual ~RelationalOperator() {}
/**
+ * @brief Get the name of this relational operator.
+ *
+ * @return The name of this relational operator.
+ */
+ virtual std::string getName() const = 0;
+
+ /**
* @brief Generate all the next WorkOrders for this RelationalOperator.
*
* @note If a RelationalOperator has blocking dependencies, it should not
@@ -226,6 +234,15 @@ class RelationalOperator {
op_index_ = operator_index;
}
+ /**
+ * @brief Get the index of this operator in the query plan DAG.
+ *
+ * @return The index of this operator in the query plan DAG.
+ */
+ std::size_t getOperatorIndex() const {
+ return op_index_;
+ }
+
protected:
/**
* @brief Constructor
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/SampleOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/SampleOperator.hpp b/relational_operators/SampleOperator.hpp
index f8fe5f6..400a83f 100644
--- a/relational_operators/SampleOperator.hpp
+++ b/relational_operators/SampleOperator.hpp
@@ -20,6 +20,7 @@
#include <cstddef>
#include <memory>
+#include <string>
#include <vector>
#include "catalog/CatalogRelation.hpp"
@@ -93,6 +94,10 @@ class SampleOperator : public RelationalOperator {
~SampleOperator() override {}
+ std::string getName() const override {
+ return "SampleOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/SaveBlocksOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/SaveBlocksOperator.hpp b/relational_operators/SaveBlocksOperator.hpp
index 50032b6..d56ee2c 100644
--- a/relational_operators/SaveBlocksOperator.hpp
+++ b/relational_operators/SaveBlocksOperator.hpp
@@ -19,6 +19,7 @@
#define QUICKSTEP_RELATIONAL_OPERATORS_SAVE_BLOCKS_OPERATOR_HPP_
#include <cstddef>
+#include <string>
#include <vector>
#include "catalog/CatalogTypedefs.hpp"
@@ -64,6 +65,10 @@ class SaveBlocksOperator : public RelationalOperator {
~SaveBlocksOperator() override {}
+ std::string getName() const override {
+ return "SaveBlocksOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/SelectOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/SelectOperator.hpp b/relational_operators/SelectOperator.hpp
index 0c10686..764dfa3 100644
--- a/relational_operators/SelectOperator.hpp
+++ b/relational_operators/SelectOperator.hpp
@@ -19,6 +19,7 @@
#define QUICKSTEP_RELATIONAL_OPERATORS_SELECT_OPERATOR_HPP_
#include <memory>
+#include <string>
#include <utility>
#include <vector>
@@ -189,6 +190,14 @@ class SelectOperator : public RelationalOperator {
~SelectOperator() override {}
+ std::string getName() const override {
+ return "SelectOperator";
+ }
+
+ const CatalogRelation& input_relation() const {
+ return input_relation_;
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/SortMergeRunOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/SortMergeRunOperator.hpp b/relational_operators/SortMergeRunOperator.hpp
index 177836f..531e269 100644
--- a/relational_operators/SortMergeRunOperator.hpp
+++ b/relational_operators/SortMergeRunOperator.hpp
@@ -19,6 +19,7 @@
#define QUICKSTEP_RELATIONAL_OPERATORS_SORT_MERGE_RUN_OPERATOR_HPP_
#include <cstddef>
+#include <string>
#include <utility>
#include <vector>
@@ -129,6 +130,10 @@ class SortMergeRunOperator : public RelationalOperator {
**/
~SortMergeRunOperator() {}
+ std::string getName() const override {
+ return "SortMergeRunOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/SortRunGenerationOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/SortRunGenerationOperator.hpp b/relational_operators/SortRunGenerationOperator.hpp
index 96a3ce1..d43b90b 100644
--- a/relational_operators/SortRunGenerationOperator.hpp
+++ b/relational_operators/SortRunGenerationOperator.hpp
@@ -18,6 +18,7 @@
#ifndef QUICKSTEP_RELATIONAL_OPERATORS_SORT_RUN_GENERATION_OPERATOR_HPP_
#define QUICKSTEP_RELATIONAL_OPERATORS_SORT_RUN_GENERATION_OPERATOR_HPP_
+#include <string>
#include <vector>
#include "catalog/CatalogRelation.hpp"
@@ -109,6 +110,10 @@ class SortRunGenerationOperator : public RelationalOperator {
~SortRunGenerationOperator() {}
+ std::string getName() const override {
+ return "SortRunGenerationOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/TableGeneratorOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/TableGeneratorOperator.hpp b/relational_operators/TableGeneratorOperator.hpp
index 1b791a6..ad3a9ff 100644
--- a/relational_operators/TableGeneratorOperator.hpp
+++ b/relational_operators/TableGeneratorOperator.hpp
@@ -19,6 +19,7 @@
#ifndef QUICKSTEP_RELATIONAL_OPERATORS_TABLE_GENERATOR_OPERATOR_HPP_
#define QUICKSTEP_RELATIONAL_OPERATORS_TABLE_GENERATOR_OPERATOR_HPP_
+#include <string>
#include <vector>
#include "catalog/CatalogRelation.hpp"
@@ -76,6 +77,10 @@ class TableGeneratorOperator : public RelationalOperator {
~TableGeneratorOperator() override {}
+ std::string getName() const override {
+ return "TableGeneratorOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/TextScanOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/TextScanOperator.hpp b/relational_operators/TextScanOperator.hpp
index 1a62ded..6890d7d 100644
--- a/relational_operators/TextScanOperator.hpp
+++ b/relational_operators/TextScanOperator.hpp
@@ -134,6 +134,10 @@ class TextScanOperator : public RelationalOperator {
~TextScanOperator() override {}
+ std::string getName() const override {
+ return "TextScanOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/UpdateOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/UpdateOperator.hpp b/relational_operators/UpdateOperator.hpp
index 4471a17..a443b5d 100644
--- a/relational_operators/UpdateOperator.hpp
+++ b/relational_operators/UpdateOperator.hpp
@@ -20,6 +20,7 @@
#include <cstddef>
#include <memory>
+#include <string>
#include <unordered_map>
#include <vector>
@@ -94,6 +95,10 @@ class UpdateOperator : public RelationalOperator {
~UpdateOperator() override {}
+ std::string getName() const override {
+ return "UpdateOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/relational_operators/WindowAggregationOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/WindowAggregationOperator.hpp b/relational_operators/WindowAggregationOperator.hpp
index bd83248..05632cc 100644
--- a/relational_operators/WindowAggregationOperator.hpp
+++ b/relational_operators/WindowAggregationOperator.hpp
@@ -20,6 +20,7 @@
#ifndef QUICKSTEP_RELATIONAL_OPERATORS_WINDOW_AGGREGATION_OPERATOR_HPP_
#define QUICKSTEP_RELATIONAL_OPERATORS_WINDOW_AGGREGATION_OPERATOR_HPP_
+#include <string>
#include <vector>
#include "catalog/CatalogRelation.hpp"
@@ -78,6 +79,10 @@ class WindowAggregationOperator : public RelationalOperator {
~WindowAggregationOperator() override {}
+ std::string getName() const override {
+ return "WindowAggregationOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index 2d3db8f..803b909 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -167,6 +167,9 @@ add_library(quickstep_utility_Cast ../empty_src.cpp Cast.hpp)
add_library(quickstep_utility_CheckSnprintf ../empty_src.cpp CheckSnprintf.hpp)
add_library(quickstep_utility_DAG ../empty_src.cpp DAG.hpp)
add_library(quickstep_utility_EqualsAnyConstant ../empty_src.cpp EqualsAnyConstant.hpp)
+add_library(quickstep_utility_ExecutionDAGVisualizer
+ ExecutionDAGVisualizer.cpp
+ ExecutionDAGVisualizer.hpp)
add_library(quickstep_utility_Glob Glob.cpp Glob.hpp)
add_library(quickstep_utility_HashPair ../empty_src.cpp HashPair.hpp)
add_library(quickstep_utility_Macros ../empty_src.cpp Macros.hpp)
@@ -225,6 +228,16 @@ target_link_libraries(quickstep_utility_CheckSnprintf
target_link_libraries(quickstep_utility_DAG
glog
quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_ExecutionDAGVisualizer
+ quickstep_catalog_CatalogRelationSchema
+ quickstep_queryexecution_QueryExecutionTypedefs
+ quickstep_queryoptimizer_QueryPlan
+ quickstep_relationaloperators_AggregationOperator
+ quickstep_relationaloperators_BuildHashOperator
+ quickstep_relationaloperators_HashJoinOperator
+ quickstep_relationaloperators_SelectOperator
+ quickstep_utility_Macros
+ quickstep_utility_StringUtil)
target_link_libraries(quickstep_utility_Glob
glog)
target_link_libraries(quickstep_utility_MemStream
@@ -303,6 +316,7 @@ target_link_libraries(quickstep_utility
quickstep_utility_CheckSnprintf
quickstep_utility_DAG
quickstep_utility_EqualsAnyConstant
+ quickstep_utility_ExecutionDAGVisualizer
quickstep_utility_Glob
quickstep_utility_HashPair
quickstep_utility_Macros
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/utility/ExecutionDAGVisualizer.cpp
----------------------------------------------------------------------
diff --git a/utility/ExecutionDAGVisualizer.cpp b/utility/ExecutionDAGVisualizer.cpp
new file mode 100644
index 0000000..0c0bbb1
--- /dev/null
+++ b/utility/ExecutionDAGVisualizer.cpp
@@ -0,0 +1,230 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#include "utility/ExecutionDAGVisualizer.hpp"
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <iomanip>
+#include <limits>
+#include <set>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "catalog/CatalogRelationSchema.hpp"
+#include "query_execution/QueryExecutionTypedefs.hpp"
+#include "query_optimizer/QueryPlan.hpp"
+#include "relational_operators/AggregationOperator.hpp"
+#include "relational_operators/BuildHashOperator.hpp"
+#include "relational_operators/HashJoinOperator.hpp"
+#include "relational_operators/SelectOperator.hpp"
+#include "utility/StringUtil.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+ExecutionDAGVisualizer::ExecutionDAGVisualizer(const QueryPlan &plan) {
+ // Do not display these relational operators in the graph.
+ std::set<std::string> no_display_op_names =
+ { "DestroyHashOperator", "DropTableOperator" };
+
+ const auto &dag = plan.getQueryPlanDAG();
+ num_nodes_ = dag.size();
+
+ // Collect DAG vertices info.
+ std::vector<bool> display_ops(num_nodes_, false);
+ for (std::size_t node_index = 0; node_index < num_nodes_; ++node_index) {
+ const auto &node = dag.getNodePayload(node_index);
+ const std::string relop_name = node.getName();
+ if (no_display_op_names.find(relop_name) == no_display_op_names.end()) {
+ display_ops[node_index] = true;
+ NodeInfo &node_info = nodes_[node_index];
+ node_info.id = node_index;
+ node_info.labels.emplace_back(
+ "[" + std::to_string(node.getOperatorIndex()) + "] " + relop_name);
+
+ std::vector<std::pair<std::string, const CatalogRelationSchema*>> input_relations;
+ if (relop_name == "AggregationOperator") {
+ const AggregationOperator &aggregation_op =
+ static_cast<const AggregationOperator&>(node);
+ input_relations.emplace_back("input", &aggregation_op.input_relation());
+ } else if (relop_name == "BuildHashOperator") {
+ const BuildHashOperator &build_hash_op =
+ static_cast<const BuildHashOperator&>(node);
+ input_relations.emplace_back("input", &build_hash_op.input_relation());
+ } else if (relop_name == "HashJoinOperator") {
+ const HashJoinOperator &hash_join_op =
+ static_cast<const HashJoinOperator&>(node);
+ input_relations.emplace_back("probe side", &hash_join_op.probe_relation());
+ } else if (relop_name == "SelectOperator") {
+ const SelectOperator &select_op =
+ static_cast<const SelectOperator&>(node);
+ input_relations.emplace_back("input", &select_op.input_relation());
+ }
+ for (const auto &rel_pair : input_relations) {
+ if (!rel_pair.second->isTemporary()) {
+ node_info.labels.emplace_back(
+ rel_pair.first + " stored relation [" +
+ rel_pair.second->getName() + "]");
+ }
+ }
+ }
+ }
+
+ // Collect DAG edges info.
+ for (std::size_t node_index = 0; node_index < num_nodes_; ++node_index) {
+ if (display_ops[node_index]) {
+ for (const auto &link : dag.getDependents(node_index)) {
+ if (display_ops[link.first]) {
+ edges_.emplace_back();
+ edges_.back().src_node_id = node_index;
+ edges_.back().dst_node_id = link.first;
+ edges_.back().is_pipeline_breaker = link.second;
+ }
+ }
+ }
+ }
+}
+
+void ExecutionDAGVisualizer::bindProfilingStats(
+ const std::vector<WorkOrderTimeEntry> &execution_time_records) {
+ std::vector<std::size_t> time_start(num_nodes_, std::numeric_limits<std::size_t>::max());
+ std::vector<std::size_t> time_end(num_nodes_, 0);
+ std::vector<std::size_t> time_elapsed(num_nodes_, 0);
+ std::size_t overall_start_time = std::numeric_limits<std::size_t>::max();
+ std::size_t overall_end_time = 0;
+ for (const auto &entry : execution_time_records) {
+ const std::size_t relop_index = entry.operator_id;
+ DCHECK_LT(relop_index, num_nodes_);
+
+ const std::size_t workorder_start_time = entry.start_time;
+ const std::size_t workorder_end_time = entry.end_time;
+ overall_start_time = std::min(overall_start_time, workorder_start_time);
+ overall_end_time = std::max(overall_end_time, workorder_end_time);
+
+ time_start[relop_index] =
+ std::min(time_start[relop_index], workorder_start_time);
+ time_end[relop_index] =
+ std::max(time_end[relop_index], workorder_end_time);
+ time_elapsed[relop_index] += (workorder_end_time - workorder_start_time);
+ }
+
+ double total_time_elapsed = 0;
+ for (std::size_t i = 0; i < time_elapsed.size(); ++i) {
+ total_time_elapsed += time_elapsed[i];
+ }
+ std::vector<double> time_percentage(num_nodes_, 0);
+ std::vector<double> span_percentage(num_nodes_, 0);
+ double overall_span = overall_end_time - overall_start_time;
+ double max_percentage = 0;
+ for (std::size_t i = 0; i < time_elapsed.size(); ++i) {
+ time_percentage[i] = time_elapsed[i] / total_time_elapsed * 100;
+ span_percentage[i] = (time_end[i] - time_start[i]) / overall_span * 100;
+ max_percentage = std::max(max_percentage, time_percentage[i] + span_percentage[i]);
+ }
+
+ for (std::size_t node_index = 0; node_index < num_nodes_; ++node_index) {
+ if (nodes_.find(node_index) != nodes_.end()) {
+ const std::size_t relop_start_time = time_start[node_index];
+ const std::size_t relop_end_time = time_end[node_index];
+ const std::size_t relop_elapsed_time = time_elapsed[node_index];
+ NodeInfo &node_info = nodes_[node_index];
+
+ const double hue =
+ (time_percentage[node_index] + span_percentage[node_index]) / max_percentage;
+ node_info.color = std::to_string(hue) + " " + std::to_string(hue) + " 1.0";
+
+ if (overall_start_time == 0) {
+ node_info.labels.emplace_back(
+ "span: " +
+ std::to_string((relop_end_time - relop_start_time) / 1000) + "ms");
+ } else {
+ node_info.labels.emplace_back(
+ "span: [" +
+ std::to_string((relop_start_time - overall_start_time) / 1000) + "ms, " +
+ std::to_string((relop_end_time - overall_start_time) / 1000) + "ms] (" +
+ FormatDigits(span_percentage[node_index], 2) + "%)");
+ }
+
+ node_info.labels.emplace_back(
+ "total: " +
+ std::to_string(relop_elapsed_time / 1000) + "ms (" +
+ FormatDigits(time_percentage[node_index], 2) + "%)");
+
+ const double concurrency =
+ static_cast<double>(relop_elapsed_time) / (relop_end_time - relop_start_time);
+ node_info.labels.emplace_back(
+ "effective concurrency: " + FormatDigits(concurrency, 2));
+ }
+ }
+}
+
+std::string ExecutionDAGVisualizer::toDOT() {
+ // Format output graph
+ std::ostringstream graph_oss;
+ graph_oss << "digraph g {\n";
+ graph_oss << " rankdir=BT\n";
+ graph_oss << " node [penwidth=2]\n";
+ graph_oss << " edge [fontsize=16 fontcolor=gray penwidth=2]\n\n";
+
+ // Format nodes
+ for (const auto &node_pair : nodes_) {
+ const NodeInfo &node_info = node_pair.second;
+ graph_oss << " " << node_info.id << " [ ";
+ if (!node_info.labels.empty()) {
+ graph_oss << "label=\""
+ << EscapeSpecialChars(JoinToString(node_info.labels, " "))
+ << "\" ";
+ }
+ if (!node_info.color.empty()) {
+ graph_oss << "style=filled fillcolor=\"" << node_info.color << "\" ";
+ }
+ graph_oss << "]\n";
+ }
+ graph_oss << "\n";
+
+ // Format edges
+ for (const EdgeInfo &edge_info : edges_) {
+ graph_oss << " " << edge_info.src_node_id << " -> "
+ << edge_info.dst_node_id << " [ ";
+ if (edge_info.is_pipeline_breaker) {
+ graph_oss << "style=dashed ";
+ }
+ if (!edge_info.labels.empty()) {
+ graph_oss << "label=\""
+ << EscapeSpecialChars(JoinToString(edge_info.labels, " "))
+ << "\" ";
+ }
+ graph_oss << "]\n";
+ }
+ graph_oss << "}\n";
+
+ return graph_oss.str();
+}
+
+std::string ExecutionDAGVisualizer::FormatDigits(const double value,
+ const int num_digits) {
+ std::ostringstream oss;
+ oss << std::fixed << std::setprecision(num_digits) << value;
+ return oss.str();
+}
+
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b07eaae/utility/ExecutionDAGVisualizer.hpp
----------------------------------------------------------------------
diff --git a/utility/ExecutionDAGVisualizer.hpp b/utility/ExecutionDAGVisualizer.hpp
new file mode 100644
index 0000000..5c9e434
--- /dev/null
+++ b/utility/ExecutionDAGVisualizer.hpp
@@ -0,0 +1,112 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_EXECUTION_DAG_VISUALIZER_HPP_
+#define QUICKSTEP_UTILITY_EXECUTION_DAG_VISUALIZER_HPP_
+
+#include <cstddef>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+
+class QueryPlan;
+struct WorkOrderTimeEntry;
+
+/** \addtogroup Utility
+ * @{
+ */
+
+/**
+ * @brief A visualizer that converts an execution plan DAG into a graph in
+ * DOT format. Note that DOT is a plain text graph description language.
+ *
+ * @note This utility tool can be further extended to be more generic.
+ */
+class ExecutionDAGVisualizer {
+ public:
+ /**
+ * @brief Constructor
+ *
+ * @param plan The execution plan to be visualized.
+ */
+ explicit ExecutionDAGVisualizer(const QueryPlan &plan);
+
+ /**
+ * @brief Destructor
+ */
+ ~ExecutionDAGVisualizer() {}
+
+ /**
+ * @brief Summarize the execution timing stats and bind the stats to the
+ * corresponding relational operators in the execution plan.
+ *
+ * @param execution_time_records The profiled timing records of execution.
+ */
+ void bindProfilingStats(
+ const std::vector<WorkOrderTimeEntry> &execution_time_records);
+
+ /**
+ * @brief Get the string represenation of the visualized execution plan
+ * in DOT format (DOT is a plain text graph description language).
+ *
+ * @return The execution plan graph in DOT format.
+ */
+ std::string toDOT();
+
+ private:
+ /**
+ * @brief Format a float value to string representation with the specified
+ * number of decimal digits.
+ */
+ static std::string FormatDigits(const double value,
+ const int num_digits);
+
+ /**
+ * @brief Information of a graph node.
+ */
+ struct NodeInfo {
+ std::size_t id;
+ std::vector<std::string> labels;
+ std::string color;
+ };
+
+ /**
+ * @brief Information of a graph edge.
+ */
+ struct EdgeInfo {
+ std::size_t src_node_id;
+ std::size_t dst_node_id;
+ std::vector<std::string> labels;
+ bool is_pipeline_breaker;
+ };
+
+ std::size_t num_nodes_;
+ std::map<std::size_t, NodeInfo> nodes_;
+ std::vector<EdgeInfo> edges_;
+
+ DISALLOW_COPY_AND_ASSIGN(ExecutionDAGVisualizer);
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif /* QUICKSTEP_UTILITY_EXECUTION_DAG_VISUALIZER_HPP_ */
[10/13] incubator-quickstep git commit: Initial commit
Posted by ji...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/PackedRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/PackedRowStoreValueAccessor.hpp b/storage/PackedRowStoreValueAccessor.hpp
index 03a975e..cbd273e 100644
--- a/storage/PackedRowStoreValueAccessor.hpp
+++ b/storage/PackedRowStoreValueAccessor.hpp
@@ -18,6 +18,8 @@
#ifndef QUICKSTEP_STORAGE_PACKED_ROW_STORE_VALUE_ACCESSOR_HPP_
#define QUICKSTEP_STORAGE_PACKED_ROW_STORE_VALUE_ACCESSOR_HPP_
+#include <utility>
+
#include "catalog/CatalogRelationSchema.hpp"
#include "catalog/CatalogTypedefs.hpp"
#include "storage/StorageBlockInfo.hpp"
@@ -40,7 +42,8 @@ class PackedRowStoreValueAccessorHelper {
: relation_(relation),
num_tuples_(num_tuples),
tuple_storage_(tuple_storage),
- null_bitmap_(null_bitmap) {
+ null_bitmap_(null_bitmap),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()) {
}
inline tuple_id numPackedTuples() const {
@@ -65,6 +68,25 @@ class PackedRowStoreValueAccessorHelper {
+ relation_.getFixedLengthAttributeOffset(attr); // Attribute offset within tuple.
}
+ template <bool check_null>
+ inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple,
+ const attribute_id attr) const {
+ DEBUG_ASSERT(tuple < num_tuples_);
+ DEBUG_ASSERT(relation_.hasAttributeWithId(attr));
+ if (check_null) {
+ const int nullable_idx = relation_.getNullableAttributeIndex(attr);
+ if ((nullable_idx != -1)
+ && null_bitmap_->getBit(tuple * relation_.numNullableAttributes() + nullable_idx)) {
+ return std::make_pair(nullptr, 0);
+ }
+ }
+
+ return std::make_pair(static_cast<const char*>(tuple_storage_)
+ + (tuple * relation_.getFixedByteLength())
+ + relation_.getFixedLengthAttributeOffset(attr),
+ attr_max_lengths_[attr]);
+ }
+
inline TypedValue getAttributeValueTyped(const tuple_id tuple,
const attribute_id attr) const {
const Type &attr_type = relation_.getAttributeById(attr)->getType();
@@ -79,6 +101,7 @@ class PackedRowStoreValueAccessorHelper {
const tuple_id num_tuples_;
const void *tuple_storage_;
const BitVector<false> *null_bitmap_;
+ const std::vector<std::size_t> &attr_max_lengths_;
DISALLOW_COPY_AND_ASSIGN(PackedRowStoreValueAccessorHelper);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/SplitRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/SplitRowStoreValueAccessor.hpp b/storage/SplitRowStoreValueAccessor.hpp
index 9ea1a3a..19937f2 100644
--- a/storage/SplitRowStoreValueAccessor.hpp
+++ b/storage/SplitRowStoreValueAccessor.hpp
@@ -100,6 +100,11 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_position_);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, current_position_);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return getTypedValueAtAbsolutePosition(attr_id, current_position_);
}
@@ -140,6 +145,44 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
}
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ DEBUG_ASSERT(occupancy_bitmap_.getBit(tid));
+ DEBUG_ASSERT(relation_.hasAttributeWithId(attr_id));
+ const char *tuple_slot = static_cast<const char*>(tuple_storage_)
+ + tuple_slot_bytes_ * tid;
+ if (check_null) {
+ const int nullable_idx = relation_.getNullableAttributeIndex(attr_id);
+ if (nullable_idx != -1) {
+ // const_cast is safe here. We will only be using read-only methods of
+ // BitVector.
+ BitVector<true> tuple_null_bitmap(const_cast<void*>(static_cast<const void*>(tuple_slot)),
+ relation_.numNullableAttributes());
+ if (tuple_null_bitmap.getBit(nullable_idx)) {
+ return std::make_pair(nullptr, 0);
+ }
+ }
+ }
+
+ const int variable_length_idx = relation_.getVariableLengthAttributeIndex(attr_id);
+ if (variable_length_idx == -1) {
+ // Fixed-length, stored in-line in slot.
+ return std::make_pair(tuple_slot + per_tuple_null_bitmap_bytes_
+ + relation_.getFixedLengthAttributeOffset(attr_id),
+ attr_max_lengths_[attr_id]);
+
+ } else {
+ // Variable-length, stored at back of block.
+ const std::uint32_t *pos_ptr = reinterpret_cast<const std::uint32_t*>(
+ tuple_slot + per_tuple_null_bitmap_bytes_
+ + relation_.getFixedByteLength()
+ + variable_length_idx * 2 * sizeof(std::uint32_t));
+ return std::make_pair(static_cast<const char*>(tuple_storage_) + pos_ptr[0],
+ pos_ptr[1]);
+ }
+ }
+
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
DEBUG_ASSERT(occupancy_bitmap_.getBit(tid));
@@ -317,6 +360,7 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
tuple_storage_(tuple_storage),
tuple_slot_bytes_(tuple_slot_bytes),
per_tuple_null_bitmap_bytes_(per_tuple_null_bitmap_bytes),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()),
current_position_(std::numeric_limits<std::size_t>::max()) {
}
@@ -327,6 +371,7 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
const void *tuple_storage_;
const std::size_t tuple_slot_bytes_;
const std::size_t per_tuple_null_bitmap_bytes_;
+ const std::vector<std::size_t> &attr_max_lengths_;
std::size_t current_position_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/StorageBlock.cpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.cpp b/storage/StorageBlock.cpp
index fdd438d..78aba7c 100644
--- a/storage/StorageBlock.cpp
+++ b/storage/StorageBlock.cpp
@@ -389,15 +389,7 @@ AggregationState* StorageBlock::aggregate(
const AggregationHandle &handle,
const std::vector<std::unique_ptr<const Scalar>> &arguments,
const std::vector<attribute_id> *arguments_as_attributes,
- const Predicate *predicate,
std::unique_ptr<TupleIdSequence> *reuse_matches) const {
- // If there is a filter predicate that hasn't already been evaluated,
- // evaluate it now and save the results for other aggregates on this same
- // block.
- if (predicate && !*reuse_matches) {
- reuse_matches->reset(getMatchesForPredicate(predicate));
- }
-
#ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
// If all the arguments to this aggregate are plain relation attributes,
// aggregate directly on a ValueAccessor from this block to avoid a copy.
@@ -418,7 +410,6 @@ void StorageBlock::aggregateGroupBy(
const AggregationHandle &handle,
const std::vector<std::unique_ptr<const Scalar>> &arguments,
const std::vector<std::unique_ptr<const Scalar>> &group_by,
- const Predicate *predicate,
AggregationStateHashTableBase *hash_table,
std::unique_ptr<TupleIdSequence> *reuse_matches,
std::vector<std::unique_ptr<ColumnVector>> *reuse_group_by_vectors) const {
@@ -440,14 +431,7 @@ void StorageBlock::aggregateGroupBy(
ColumnVectorsValueAccessor temp_result;
{
std::unique_ptr<ValueAccessor> accessor;
- if (predicate) {
- if (!*reuse_matches) {
- // If there is a filter predicate that hasn't already been evaluated,
- // evaluate it now and save the results for other aggregates on this
- // same block.
- reuse_matches->reset(getMatchesForPredicate(predicate));
- }
-
+ if (reuse_matches) {
// Create a filtered ValueAccessor that only iterates over predicate
// matches.
accessor.reset(tuple_store_->createValueAccessor(reuse_matches->get()));
@@ -499,7 +483,6 @@ void StorageBlock::aggregateDistinct(
const std::vector<std::unique_ptr<const Scalar>> &arguments,
const std::vector<attribute_id> *arguments_as_attributes,
const std::vector<std::unique_ptr<const Scalar>> &group_by,
- const Predicate *predicate,
AggregationStateHashTableBase *distinctify_hash_table,
std::unique_ptr<TupleIdSequence> *reuse_matches,
std::vector<std::unique_ptr<ColumnVector>> *reuse_group_by_vectors) const {
@@ -514,14 +497,7 @@ void StorageBlock::aggregateDistinct(
ColumnVectorsValueAccessor temp_result;
{
std::unique_ptr<ValueAccessor> accessor;
- if (predicate) {
- if (!*reuse_matches) {
- // If there is a filter predicate that hasn't already been evaluated,
- // evaluate it now and save the results for other aggregates on this
- // same block.
- reuse_matches->reset(getMatchesForPredicate(predicate));
- }
-
+ if (reuse_matches) {
// Create a filtered ValueAccessor that only iterates over predicate
// matches.
accessor.reset(tuple_store_->createValueAccessor(reuse_matches->get()));
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/StorageBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.hpp b/storage/StorageBlock.hpp
index 3ae3812..3217fa2 100644
--- a/storage/StorageBlock.hpp
+++ b/storage/StorageBlock.hpp
@@ -410,7 +410,6 @@ class StorageBlock : public StorageBlockBase {
const AggregationHandle &handle,
const std::vector<std::unique_ptr<const Scalar>> &arguments,
const std::vector<attribute_id> *arguments_as_attributes,
- const Predicate *predicate,
std::unique_ptr<TupleIdSequence> *reuse_matches) const;
/**
@@ -460,7 +459,6 @@ class StorageBlock : public StorageBlockBase {
void aggregateGroupBy(const AggregationHandle &handle,
const std::vector<std::unique_ptr<const Scalar>> &arguments,
const std::vector<std::unique_ptr<const Scalar>> &group_by,
- const Predicate *predicate,
AggregationStateHashTableBase *hash_table,
std::unique_ptr<TupleIdSequence> *reuse_matches,
std::vector<std::unique_ptr<ColumnVector>>
@@ -505,7 +503,6 @@ class StorageBlock : public StorageBlockBase {
const std::vector<std::unique_ptr<const Scalar>> &arguments,
const std::vector<attribute_id> *arguments_as_attributes,
const std::vector<std::unique_ptr<const Scalar>> &group_by,
- const Predicate *predicate,
AggregationStateHashTableBase *distinctify_hash_table,
std::unique_ptr<TupleIdSequence> *reuse_matches,
std::vector<std::unique_ptr<ColumnVector>> *reuse_group_by_vectors) const;
@@ -588,6 +585,8 @@ class StorageBlock : public StorageBlockBase {
**/
const std::size_t getNumTuples() const;
+ TupleIdSequence* getMatchesForPredicate(const Predicate *predicate) const;
+
private:
static TupleStorageSubBlock* CreateTupleStorageSubBlock(
const CatalogRelationSchema &relation,
@@ -627,8 +626,6 @@ class StorageBlock : public StorageBlockBase {
// StorageBlock's header.
bool rebuildIndexes(bool short_circuit);
- TupleIdSequence* getMatchesForPredicate(const Predicate *predicate) const;
-
std::unordered_map<attribute_id, TypedValue>* generateUpdatedValues(
const ValueAccessor &accessor,
const tuple_id tuple,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/storage/ValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/ValueAccessor.hpp b/storage/ValueAccessor.hpp
index e2a898e..e9370cc 100644
--- a/storage/ValueAccessor.hpp
+++ b/storage/ValueAccessor.hpp
@@ -375,6 +375,11 @@ class TupleIdSequenceAdapterValueAccessor : public ValueAccessor {
return accessor_->template getUntypedValueAtAbsolutePosition<check_null>(attr_id, *current_position_);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return accessor_->template getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, *current_position_);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return accessor_->getTypedValueAtAbsolutePosition(attr_id, *current_position_);
}
@@ -387,6 +392,13 @@ class TupleIdSequenceAdapterValueAccessor : public ValueAccessor {
}
// Pass-through.
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ return accessor_->template getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, tid);
+ }
+
+ // Pass-through.
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
return accessor_->getTypedValueAtAbsolutePosition(attr_id, tid);
@@ -560,6 +572,12 @@ class OrderedTupleIdSequenceAdapterValueAccessor : public ValueAccessor {
id_sequence_[current_position_]);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return accessor_->template getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(
+ attr_id, id_sequence_[current_position_]);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return accessor_->getTypedValueAtAbsolutePosition(attr_id, id_sequence_[current_position_]);
}
@@ -571,6 +589,13 @@ class OrderedTupleIdSequenceAdapterValueAccessor : public ValueAccessor {
"OrderedTupleIdSequenceAdapterValueAccessor");
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ FATAL_ERROR("getUntypedValueAndByteLengthAtAbsolutePosition() not implemented in "
+ "OrderedTupleIdSequenceAdapterValueAccessor");
+ }
+
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
FATAL_ERROR("getTypedValueAtAbsolutePosition() not implemented in "
@@ -737,6 +762,11 @@ class PackedTupleStorageSubBlockValueAccessor : public ValueAccessor {
return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_tuple_);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, current_tuple_);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return getTypedValueAtAbsolutePosition(attr_id, current_tuple_);
}
@@ -747,6 +777,12 @@ class PackedTupleStorageSubBlockValueAccessor : public ValueAccessor {
return helper_.template getAttributeValue<check_null>(tid, attr_id);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ return helper_.template getAttributeValueAndByteLength<check_null>(tid, attr_id);
+ }
+
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
return helper_.getAttributeValueTyped(tid, attr_id);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/types/containers/ColumnVector.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVector.hpp b/types/containers/ColumnVector.hpp
index 76968ba..a9349ee 100644
--- a/types/containers/ColumnVector.hpp
+++ b/types/containers/ColumnVector.hpp
@@ -193,6 +193,22 @@ class NativeColumnVector : public ColumnVector {
}
/**
+ * @brief Get the untyped pointer to a value as well as the value's byte length
+ * in this NativeColumnVector as a pair.
+ *
+ * @param position The position of the value to get.
+ * @return A pair containing the untyped pointer to the value at position and
+ * the value's byte length.
+ **/
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const std::size_t position) const {
+ DCHECK_LT(position, actual_length_);
+ return (check_null && null_bitmap_ && null_bitmap_->getBit(position))
+ ? std::make_pair(nullptr, 0)
+ : std::make_pair(static_cast<const char*>(values_) + (position * type_length_), type_length_);
+ }
+
+ /**
* @brief Get a value in this NativeColumnVector as a TypedValue.
*
* @param position The position of the value to get.
@@ -453,6 +469,25 @@ class IndirectColumnVector : public ColumnVector {
}
/**
+ * @brief Get the untyped pointer to a value as well as the value's byte length
+ * in this IndirectColumnVector as a pair.
+ *
+ * @param position The position of the value to get.
+ * @return A pair containing the untyped pointer to the value at position and
+ * the value's byte length.
+ **/
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const std::size_t position) const {
+ DCHECK_LT(position, values_.size());
+ if (check_null && type_is_nullable_ && values_[position].isNull()) {
+ return std::make_pair(nullptr, 0);
+ } else {
+ const TypedValue &value = values_[position];
+ return std::make_pair(value.getDataPtr(), value.getDataSize());
+ }
+ }
+
+ /**
* @brief Get a value in this IndirectColumnVector as a TypedValue.
*
* @param position The position of the value to get.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/types/containers/ColumnVectorsValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVectorsValueAccessor.hpp b/types/containers/ColumnVectorsValueAccessor.hpp
index f1d29a2..d69d1d8 100644
--- a/types/containers/ColumnVectorsValueAccessor.hpp
+++ b/types/containers/ColumnVectorsValueAccessor.hpp
@@ -124,6 +124,11 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_position_);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, current_position_);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return getTypedValueAtAbsolutePosition(attr_id, current_position_);
}
@@ -140,6 +145,18 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
}
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ DCHECK(attributeIdInRange(attr_id));
+ DCHECK(tupleIdInRange(tid));
+ if (column_native_[attr_id]) {
+ return static_cast<const NativeColumnVector&>(*columns_[attr_id]).getUntypedValueAndByteLength<check_null>(tid);
+ } else {
+ return static_cast<const IndirectColumnVector&>(*columns_[attr_id]).getUntypedValueAndByteLength<check_null>(tid);
+ }
+ }
+
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
DCHECK(attributeIdInRange(attr_id));
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/utility/BloomFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/BloomFilter.hpp b/utility/BloomFilter.hpp
index b93df84..973ca14 100644
--- a/utility/BloomFilter.hpp
+++ b/utility/BloomFilter.hpp
@@ -26,6 +26,7 @@
#include <algorithm>
#include <cstddef>
#include <cstdint>
+#include <cstring>
#include <memory>
#include <utility>
#include <vector>
@@ -44,11 +45,358 @@ namespace quickstep {
* @{
*/
+class BloomFilterOriginal;
+class BloomFilterBlocked;
+typedef BloomFilterBlocked BloomFilter;
+
+/**
+ * @brief A "blocked" version of Bloom Filter based on this paper:
+ * Putze, Felix, Peter Sanders, and Johannes Singler.
+ * "Cache-, hash-and space-efficient bloom filters."
+ * International Workshop on Experimental and Efficient Algorithms.
+ * Springer Berlin Heidelberg, 2007.
+ **/
+class BloomFilterBlocked {
+ public:
+ static const std::uint8_t kNumBitsPerByte = 8;
+ static const std::uint8_t kMaxNumHashFns = 4;
+
+ // This union allows us to read/write position in convenient fashion,
+ // through nested structs and their bitfield members
+ //
+ // A position can simply be a 32-bit hash
+ // Or it can be a cache line (block of 512 bits) and position within it
+ // Or it can be a byte (block of 8 bits) and position within it
+ union Position {
+ std::uint32_t hash;
+ struct CacheLinePosition {
+ unsigned index_in_line : 9;
+ unsigned line_num : 23;
+ } cache_line_pos;
+ struct BytePosition {
+ unsigned index_in_byte : 3;
+ unsigned byte_num : 29;
+ } byte_pos;
+ };
+
+ // This Bloom filter implementation requires the bit array to be a
+ // multiple of the cache-line size. So we either have to round up to a
+ // multiple (default behavior) or round down to a multiple.
+ // Rounding up is usually preferable but rounding down is necessary when
+ // we are given a bit array that we don't control the size of, in the
+ // constructor.
+ static std::uint64_t getNearestAllowedSize(
+ const std::uint64_t approx_size,
+ bool round_down = false) {
+ if (round_down)
+ return (approx_size / kCacheLineBytes) * kCacheLineBytes;
+ return ((approx_size + kCacheLineBytes - 1)/ kCacheLineBytes) * kCacheLineBytes;
+ }
+
+
+ /**
+ * @brief Constructor.
+ * @note When no bit_array is being passed to the constructor,
+ * then the bit_array is owned and managed by this class.
+ *
+ * @param hash_fn_count The number of hash functions used by this bloom filter.
+ * @param bit_array_size_in_bytes Size of the bit array.
+ **/
+ BloomFilterBlocked(const std::uint8_t hash_fn_count,
+ const std::uint64_t bit_array_size_in_bytes)
+ : hash_fn_count_(hash_fn_count),
+ array_size_in_bytes_(getNearestAllowedSize(bit_array_size_in_bytes)),
+ is_bit_array_owner_(true),
+ bit_array_(new std::uint8_t[array_size_in_bytes_]) {
+ reset();
+ }
+
+ /**
+ * @brief Constructor.
+ * @note When a bit_array is passed as an argument to the constructor,
+ * then the ownership of the bit array lies with the caller.
+ *
+ * @param hash_fn_count The number of hash functions used by this bloom filter.
+ * @param bit_array_size_in_bytes Size of the bit array.
+ * @param bit_array A pointer to the memory region that is used to store bit array.
+ * @param is_initialized A boolean that indicates whether to zero-out the region
+ * before use or not.
+ **/
+ BloomFilterBlocked(const std::uint8_t hash_fn_count,
+ const std::uint64_t bit_array_size_in_bytes,
+ std::uint8_t *bit_array,
+ const bool is_initialized)
+ : hash_fn_count_(hash_fn_count),
+ array_size_in_bytes_(getNearestAllowedSize(bit_array_size_in_bytes, true)),
+ is_bit_array_owner_(false),
+ bit_array_(bit_array) { // Owned by the calling method.
+ if (!is_initialized) {
+ reset();
+ }
+ }
+
+ /**
+ * @brief Constructor.
+ * @note When a bloom filter proto is passed as an initializer,
+ * then the bit_array is owned and managed by this class.
+ *
+ * @param bloom_filter_proto The protobuf representation of a
+ * bloom filter configuration.
+ **/
+ explicit BloomFilterBlocked(const serialization::BloomFilter &bloom_filter_proto)
+ : hash_fn_count_(bloom_filter_proto.number_of_hashes()),
+ array_size_in_bytes_(bloom_filter_proto.bloom_filter_size()),
+ is_bit_array_owner_(true),
+ bit_array_(new std::uint8_t[array_size_in_bytes_]) {
+ reset();
+ }
+
+ /**
+ * @brief Destructor.
+ **/
+ ~BloomFilterBlocked() {
+ if (is_bit_array_owner_) {
+ bit_array_.reset();
+ } else {
+ bit_array_.release();
+ }
+ }
+
+ static bool ProtoIsValid(const serialization::BloomFilter &bloom_filter_proto) {
+ return bloom_filter_proto.IsInitialized();
+ }
+
+ /**
+ * @brief Zeros out the contents of the bit array.
+ **/
+ inline void reset() {
+ // Initialize the bit_array with all zeros.
+ std::fill_n(bit_array_.get(), array_size_in_bytes_, 0x00);
+ inserted_element_count_ = 0;
+ }
+
+ /**
+ * @brief Get the number of hash functions used in this bloom filter.
+ *
+ * @return Returns the number of hash functions.
+ **/
+ inline std::uint8_t getNumberOfHashes() const {
+ return hash_fn_count_;
+ }
+
+ /**
+ * @brief Get the size of the bit array in bytes for this bloom filter.
+ *
+ * @return Returns the bit array size (in bytes).
+ **/
+ inline std::uint64_t getBitArraySize() const {
+ return array_size_in_bytes_;
+ }
+
+ /**
+ * @brief Get the constant pointer to the bit array for this bloom filter
+ *
+ * @return Returns constant pointer to the bit array.
+ **/
+ inline const std::uint8_t* getBitArray() const {
+ return bit_array_.get();
+ }
+
+ template <typename T>
+ void insert(const T &value) {
+ insert(reinterpret_cast<const std::uint8_t *>(&value), sizeof(T));
+ }
+
+ /**
+ * @brief Inserts a given value into the bloom filter in a thread-safe manner.
+ *
+ * @param key_begin A pointer to the value being inserted.
+ * @param length Size of the value being inserted in bytes.
+ */
+ inline void insert(const std::uint8_t *key_begin, const std::size_t length) {
+ SpinSharedMutexExclusiveLock<false> exclusive_writer_lock(bloom_filter_insert_mutex_);
+ insertUnSafe(key_begin, length);
+ }
+
+ template <typename T>
+ void insertUnSafe(const T &value) {
+ insertUnSafe(reinterpret_cast<const std::uint8_t *>(&value), sizeof(T));
+ }
+
+ /**
+ * @brief Inserts a given value into the bloom filter.
+ * @Warning This is a faster thread-unsafe version of the insert() function.
+ * The caller needs to ensure the thread safety.
+ *
+ * @param key_begin A pointer to the value being inserted.
+ * @param length Size of the value being inserted in bytes.
+ */
+ inline void insertUnSafe(const std::uint8_t *key_begin, const std::size_t length) {
+ Position first_pos = getFirstPosition(key_begin, length);
+ setBitAtPosition(first_pos);
+ Position other_pos;
+ for (std::uint8_t i = 1; i <hash_fn_count_; ++i) {
+ other_pos = getOtherPosition(key_begin, length, first_pos, i);
+ setBitAtPosition(other_pos);
+ }
+ ++inserted_element_count_;
+ }
+
+ template <typename T>
+ bool contains(const T &value) {
+ return contains(reinterpret_cast<const std::uint8_t *>(&value), sizeof(T));
+ }
+
+ /**
+ * @brief Test membership of a given value in the bloom filter.
+ * If true is returned, then a value may or may not be present in the bloom filter.
+ * If false is returned, a value is certainly not present in the bloom filter.
+ *
+ * @note The membersip test does not require any locks, because the assumption is that
+ * the bloom filter will only be used after it has been built.
+ *
+ * @param key_begin A pointer to the value being tested for membership.
+ * @param length Size of the value being inserted in bytes.
+ */
+ inline bool contains(
+ const std::uint8_t *__restrict__ key_begin,
+ const std::size_t length) const {
+ Position first_pos = getFirstPosition(key_begin, length);
+ if (!getBitAtPosition(first_pos)) {
+ return false;
+ }
+ Position other_pos;
+ for (std::uint8_t i = 1; i < hash_fn_count_; ++i) {
+ other_pos = getOtherPosition(key_begin, length, first_pos, i);
+ if (!getBitAtPosition(other_pos)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * @brief Perform a bitwise-OR of the given Bloom filter with this bloom filter.
+ * Essentially, it does a union of this bloom filter with the passed bloom filter.
+ *
+ * @param bloom_filter A const pointer to the bloom filter object to do bitwise-OR with.
+ */
+ inline void bitwiseOr(const BloomFilterBlocked *bloom_filter) {
+ SpinSharedMutexExclusiveLock<false> exclusive_writer_lock(bloom_filter_insert_mutex_);
+ for (std::size_t byte_index = 0; byte_index < bloom_filter->getBitArraySize(); ++byte_index) {
+ (bit_array_.get())[byte_index] |= bloom_filter->getBitArray()[byte_index];
+ }
+ }
+
+ /**
+ * @brief Return the number of elements currently inserted into bloom filter.
+ *
+ * @return The number of elements inserted into bloom filter.
+ **/
+ inline std::uint32_t element_count() const {
+ return inserted_element_count_;
+ }
+
+ protected:
+ Position getFirstPosition(const std::uint8_t *begin, std::size_t length) const {
+ Position pos;
+ pos.hash = hash_identity(begin, length);
+ return pos;
+ }
+
+ Position getOtherPosition(
+ const std::uint8_t *begin,
+ std::size_t length,
+ const Position first_pos,
+ const std::uint8_t index) const {
+ Position pos;
+ pos.hash = hash_multiplicative(begin, length, hash_fn_[index-1]);
+ pos.cache_line_pos.line_num = first_pos.cache_line_pos.line_num;
+ return pos;
+ }
+
+ void fillPosition(
+ const std::uint8_t *begin,
+ std::size_t length,
+ const std::uint8_t index,
+ Position positions[]) const {
+ if (index == 0)
+ positions[0].hash = hash_identity(begin, length);
+ else {
+ positions[index].hash = hash_multiplicative(begin, length, hash_fn_[index-1]);
+ positions[index].cache_line_pos.line_num = positions[0].cache_line_pos.line_num;
+ }
+ }
+
+ void setBitAtPosition(const Position &pos) {
+ (bit_array_.get())[pos.byte_pos.byte_num] |= (1 << pos.byte_pos.index_in_byte);
+ }
+
+ bool getBitAtPosition(const Position &pos) const {
+ return (bit_array_.get())[pos.byte_pos.byte_num] & (1 << pos.byte_pos.index_in_byte);
+ }
+
+ inline std::uint32_t hash_identity(
+ const std::uint8_t *__restrict__ begin,
+ std::size_t length) const {
+ std::uint32_t hash;
+ if (length >= 4)
+ hash = *reinterpret_cast<const std::uint32_t*> (begin);
+ else
+ std::memcpy(&hash, begin, length);
+ return hash % (array_size_in_bytes_ * kNumBitsPerByte);
+ }
+
+ inline std::uint32_t hash_multiplicative(
+ const std::uint8_t *__restrict__ begin,
+ std::size_t length,
+ const std::uint64_t multiplier) const {
+ std::uint32_t hash = 0;
+ std::size_t bytes_hashed = 0;
+ if (length >= 4) {
+ while (bytes_hashed < length) {
+ auto val = *reinterpret_cast<const std::uint32_t *>(begin + bytes_hashed);
+ hash += (multiplier * val) >> 24;
+ bytes_hashed += 4;
+ }
+ }
+ while (bytes_hashed < length) {
+ std::uint8_t val = *(begin + bytes_hashed);
+ hash += (multiplier * val) >> 24;
+ bytes_hashed++;
+ }
+ return hash;// % (array_size_in_bytes_ * kNumBitsPerByte);
+ }
+
+ private:
+ const std::uint32_t hash_fn_count_;
+ const std::uint64_t array_size_in_bytes_;
+ std::uint32_t inserted_element_count_;
+ const bool is_bit_array_owner_;
+
+ static constexpr std::uint64_t kKnuthGoldenRatioNumber = 2654435761;
+ const std::uint64_t hash_fn_[kMaxNumHashFns] = { // hash_fn_[i] is 2**(i+1) - 1
+ 0x00000001 * kKnuthGoldenRatioNumber, // 0x00000003, 0x00000007, 0x0000000f,
+ // 0x0000001f * kKnuthGoldenRatioNumber, // 0x0000003f, 0x0000007f, 0x000000ff,
+ 0x000001ff * kKnuthGoldenRatioNumber, // 0x000003ff, 0x000007ff, 0x00000fff,
+ // 0x00001fff * kKnuthGoldenRatioNumber, // 0x00003fff, 0x00007fff, 0x0000ffff,
+ 0x0001ffff * kKnuthGoldenRatioNumber, // 0x0003ffff, 0x0007ffff, 0x000fffff,
+ // 0x001fffff * kKnuthGoldenRatioNumber, // 0x003fffff, 0x007fffff, 0x00ffffff,
+ 0x01ffffff * kKnuthGoldenRatioNumber, // 0x03ffffff, 0x07ffffff, 0x0fffffff,
+ // 0x1fffffff * kKnuthGoldenRatioNumber // 0x3fffffff, 0x7fffffff, 0xffffffff
+ };
+
+ alignas(kCacheLineBytes) std::unique_ptr<std::uint8_t> bit_array_;
+ alignas(kCacheLineBytes) mutable SpinSharedMutex<false> bloom_filter_insert_mutex_;
+
+ DISALLOW_COPY_AND_ASSIGN(BloomFilterBlocked);
+};
+
/**
* @brief A simple Bloom Filter implementation with basic primitives
* based on Partow's Bloom Filter implementation.
**/
-class BloomFilter {
+class BloomFilterOriginal {
public:
static const uint32_t kNumBitsPerByte = 8;
@@ -57,21 +405,17 @@ class BloomFilter {
* @note When no bit_array is being passed to the constructor,
* then the bit_array is owned and managed by this class.
*
- * @param random_seed A random_seed that generates unique hash functions.
* @param hash_fn_count The number of hash functions used by this bloom filter.
* @param bit_array_size_in_bytes Size of the bit array.
**/
- BloomFilter(const std::uint64_t random_seed,
- const std::size_t hash_fn_count,
+ BloomFilterOriginal(const std::size_t hash_fn_count,
const std::uint64_t bit_array_size_in_bytes)
- : random_seed_(random_seed),
- hash_fn_count_(hash_fn_count),
+ : hash_fn_count_(hash_fn_count),
array_size_in_bytes_(bit_array_size_in_bytes),
array_size_(array_size_in_bytes_ * kNumBitsPerByte),
bit_array_(new std::uint8_t[array_size_in_bytes_]),
is_bit_array_owner_(true) {
reset();
- generate_unique_hash_fn();
}
/**
@@ -79,20 +423,17 @@ class BloomFilter {
* @note When a bit_array is passed as an argument to the constructor,
* then the ownership of the bit array lies with the caller.
*
- * @param random_seed A random_seed that generates unique hash functions.
* @param hash_fn_count The number of hash functions used by this bloom filter.
* @param bit_array_size_in_bytes Size of the bit array.
* @param bit_array A pointer to the memory region that is used to store bit array.
* @param is_initialized A boolean that indicates whether to zero-out the region
* before use or not.
**/
- BloomFilter(const std::uint64_t random_seed,
- const std::size_t hash_fn_count,
+ BloomFilterOriginal(const std::size_t hash_fn_count,
const std::uint64_t bit_array_size_in_bytes,
std::uint8_t *bit_array,
const bool is_initialized)
- : random_seed_(random_seed),
- hash_fn_count_(hash_fn_count),
+ : hash_fn_count_(hash_fn_count),
array_size_in_bytes_(bit_array_size_in_bytes),
array_size_(bit_array_size_in_bytes * kNumBitsPerByte),
bit_array_(bit_array), // Owned by the calling method.
@@ -100,7 +441,6 @@ class BloomFilter {
if (!is_initialized) {
reset();
}
- generate_unique_hash_fn();
}
/**
@@ -111,21 +451,19 @@ class BloomFilter {
* @param bloom_filter_proto The protobuf representation of a
* bloom filter configuration.
**/
- explicit BloomFilter(const serialization::BloomFilter &bloom_filter_proto)
- : random_seed_(bloom_filter_proto.bloom_filter_seed()),
- hash_fn_count_(bloom_filter_proto.number_of_hashes()),
+ explicit BloomFilterOriginal(const serialization::BloomFilter &bloom_filter_proto)
+ : hash_fn_count_(bloom_filter_proto.number_of_hashes()),
array_size_in_bytes_(bloom_filter_proto.bloom_filter_size()),
array_size_(array_size_in_bytes_ * kNumBitsPerByte),
bit_array_(new std::uint8_t[array_size_in_bytes_]),
is_bit_array_owner_(true) {
reset();
- generate_unique_hash_fn();
}
/**
* @brief Destructor.
**/
- ~BloomFilter() {
+ ~BloomFilterOriginal() {
if (is_bit_array_owner_) {
bit_array_.reset();
} else {
@@ -147,15 +485,6 @@ class BloomFilter {
}
/**
- * @brief Get the random seed that was used to initialize this bloom filter.
- *
- * @return Returns the random seed.
- **/
- inline std::uint64_t getRandomSeed() const {
- return random_seed_;
- }
-
- /**
* @brief Get the number of hash functions used in this bloom filter.
*
* @return Returns the number of hash functions.
@@ -198,7 +527,7 @@ class BloomFilter {
// Determine all the bit positions that are required to be set.
for (std::size_t i = 0; i < hash_fn_count_; ++i) {
- compute_indices(hash_ap(key_begin, length, hash_fn_[i]), &bit_index, &bit);
+ compute_indices(hash_multiplicative(key_begin, length, hash_fn_[i]), &bit_index, &bit);
modified_bit_positions.push_back(std::make_pair(bit_index, bit));
}
@@ -243,7 +572,7 @@ class BloomFilter {
std::size_t bit = 0;
for (std::size_t i = 0; i < hash_fn_count_; ++i) {
- compute_indices(hash_ap(key_begin, length, hash_fn_[i]), &bit_index, &bit);
+ compute_indices(hash_multiplicative(key_begin, length, hash_fn_[i]), &bit_index, &bit);
(bit_array_.get())[bit_index / kNumBitsPerByte] |= (1 << bit);
}
@@ -265,7 +594,7 @@ class BloomFilter {
std::size_t bit_index = 0;
std::size_t bit = 0;
for (std::size_t i = 0; i < hash_fn_count_; ++i) {
- compute_indices(hash_ap(key_begin, length, hash_fn_[i]), &bit_index, &bit);
+ compute_indices(hash_multiplicative(key_begin, length, hash_fn_[i]), &bit_index, &bit);
if (((bit_array_.get())[bit_index / kNumBitsPerByte] & (1 << bit)) != (1 << bit)) {
return false;
}
@@ -279,7 +608,7 @@ class BloomFilter {
*
* @param bloom_filter A const pointer to the bloom filter object to do bitwise-OR with.
*/
- inline void bitwiseOr(const BloomFilter *bloom_filter) {
+ inline void bitwiseOr(const BloomFilterOriginal *bloom_filter) {
SpinSharedMutexExclusiveLock<false> exclusive_writer_lock(bloom_filter_insert_mutex_);
for (std::size_t byte_index = 0; byte_index < bloom_filter->getBitArraySize(); ++byte_index) {
(bit_array_.get())[byte_index] |= bloom_filter->getBitArray()[byte_index];
@@ -301,95 +630,28 @@ class BloomFilter {
*bit = *bit_index % kNumBitsPerByte;
}
- void generate_unique_hash_fn() {
- hash_fn_.reserve(hash_fn_count_);
- const std::uint32_t predef_hash_fn_count = 128;
- static const std::uint32_t predef_hash_fn[predef_hash_fn_count] = {
- 0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
- 0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B,
- 0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66,
- 0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA,
- 0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99,
- 0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33,
- 0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5,
- 0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000,
- 0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F,
- 0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63,
- 0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7,
- 0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492,
- 0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A,
- 0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B,
- 0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3,
- 0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432,
- 0xEFFE9412, 0xAEAEDF79, 0x9F245A31, 0x83C136FC,
- 0xC3DA4A8C, 0xA5112C8C, 0x5271F491, 0x9A948DAB,
- 0xCEE59A8D, 0xB5F525AB, 0x59D13217, 0x24E7C331,
- 0x697C2103, 0x84B0A460, 0x86156DA9, 0xAEF2AC68,
- 0x23243DA5, 0x3F649643, 0x5FA495A8, 0x67710DF8,
- 0x9A6C499E, 0xDCFB0227, 0x46A43433, 0x1832B07A,
- 0xC46AFF3C, 0xB9C8FFF0, 0xC9500467, 0x34431BDF,
- 0xB652432B, 0xE367F12B, 0x427F4C1B, 0x224C006E,
- 0x2E7E5A89, 0x96F99AA5, 0x0BEB452A, 0x2FD87C39,
- 0x74B2E1FB, 0x222EFD24, 0xF357F60C, 0x440FCB1E,
- 0x8BBE030F, 0x6704DC29, 0x1144D12F, 0x948B1355,
- 0x6D8FD7E9, 0x1C11A014, 0xADD1592F, 0xFB3C712E,
- 0xFC77642F, 0xF9C4CE8C, 0x31312FB9, 0x08B0DD79,
- 0x318FA6E7, 0xC040D23D, 0xC0589AA7, 0x0CA5C075,
- 0xF874B172, 0x0CF914D5, 0x784D3280, 0x4E8CFEBC,
- 0xC569F575, 0xCDB2A091, 0x2CC016B4, 0x5C5F4421
- };
- if (hash_fn_count_ <= predef_hash_fn_count) {
- std::copy(predef_hash_fn, predef_hash_fn + hash_fn_count_, hash_fn_.begin());
- for (std::uint32_t i = 0; i < hash_fn_.size(); ++i) {
- hash_fn_[i] = hash_fn_[i] * hash_fn_[(i + 3) % hash_fn_count_] + static_cast<std::uint32_t>(random_seed_);
+ inline std::uint32_t hash_multiplicative(
+ const std::uint8_t *begin,
+ std::size_t remaining_length,
+ const std::uint64_t multiplier) const {
+ std::uint32_t hash = 0;
+ std::size_t bytes_hashed = 0;
+ if (remaining_length >= 4) {
+ while (bytes_hashed < remaining_length) {
+ auto val = *reinterpret_cast<const std::uint32_t *>(begin + bytes_hashed);
+ hash += (multiplier * val) >> 32;
+ bytes_hashed += 4;
}
- } else {
- LOG(FATAL) << "Requested number of hash functions is too large.";
}
- }
-
- inline std::uint32_t hash_ap(const std::uint8_t *begin, std::size_t remaining_length, std::uint32_t hash) const {
- const std::uint8_t *itr = begin;
- std::uint32_t loop = 0;
- while (remaining_length >= 8) {
- const std::uint32_t &i1 = *(reinterpret_cast<const std::uint32_t*>(itr)); itr += sizeof(std::uint32_t);
- const std::uint32_t &i2 = *(reinterpret_cast<const std::uint32_t*>(itr)); itr += sizeof(std::uint32_t);
- hash ^= (hash << 7) ^ i1 * (hash >> 3) ^ (~((hash << 11) + (i2 ^ (hash >> 5))));
- remaining_length -= 8;
- }
- if (remaining_length) {
- if (remaining_length >= 4) {
- const std::uint32_t &i = *(reinterpret_cast<const std::uint32_t*>(itr));
- if (loop & 0x01) {
- hash ^= (hash << 7) ^ i * (hash >> 3);
- } else {
- hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
- }
- ++loop;
- remaining_length -= 4;
- itr += sizeof(std::uint32_t);
- }
- if (remaining_length >= 2) {
- const std::uint16_t &i = *(reinterpret_cast<const std::uint16_t*>(itr));
- if (loop & 0x01) {
- hash ^= (hash << 7) ^ i * (hash >> 3);
- } else {
- hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
- }
- ++loop;
- remaining_length -= 2;
- itr += sizeof(std::uint16_t);
- }
- if (remaining_length) {
- hash += ((*itr) ^ (hash * 0xA5A5A5A5)) + loop;
- }
+ while (bytes_hashed < remaining_length) {
+ std::uint8_t val = *(begin + bytes_hashed);
+ hash += (multiplier * val) >> 32;
+ bytes_hashed++;
}
return hash;
}
private:
- const std::uint64_t random_seed_;
- std::vector<std::uint32_t> hash_fn_;
const std::uint32_t hash_fn_count_;
std::uint64_t array_size_in_bytes_;
std::uint64_t array_size_;
@@ -397,9 +659,21 @@ class BloomFilter {
std::uint32_t inserted_element_count_;
const bool is_bit_array_owner_;
+ static constexpr std::uint64_t kKnuthGoldenRatioNumber = 2654435761;
+ static constexpr std::size_t kMaxNumHashFns = 8;
+ const std::uint64_t hash_fn_[kMaxNumHashFns] = { // hash_fn_[i] is 2**(i+1) - 1
+ 0x00000001 * kKnuthGoldenRatioNumber, // 0x00000003, 0x00000007, 0x0000000f,
+ 0x0000001f * kKnuthGoldenRatioNumber, // 0x0000003f, 0x0000007f, 0x000000ff,
+ 0x000001ff * kKnuthGoldenRatioNumber, // 0x000003ff, 0x000007ff, 0x00000fff,
+ 0x00001fff * kKnuthGoldenRatioNumber, // 0x00003fff, 0x00007fff, 0x0000ffff,
+ 0x0001ffff * kKnuthGoldenRatioNumber, // 0x0003ffff, 0x0007ffff, 0x000fffff,
+ 0x001fffff * kKnuthGoldenRatioNumber, // 0x003fffff, 0x007fffff, 0x00ffffff,
+ 0x01ffffff * kKnuthGoldenRatioNumber, // 0x03ffffff, 0x07ffffff, 0x0fffffff,
+ 0x1fffffff * kKnuthGoldenRatioNumber // 0x3fffffff, 0x7fffffff, 0xffffffff
+ };
alignas(kCacheLineBytes) mutable SpinSharedMutex<false> bloom_filter_insert_mutex_;
- DISALLOW_COPY_AND_ASSIGN(BloomFilter);
+ DISALLOW_COPY_AND_ASSIGN(BloomFilterOriginal);
};
/** @} */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/utility/BloomFilter.proto
----------------------------------------------------------------------
diff --git a/utility/BloomFilter.proto b/utility/BloomFilter.proto
index 8dd9163..b5d14a9 100644
--- a/utility/BloomFilter.proto
+++ b/utility/BloomFilter.proto
@@ -21,10 +21,8 @@ message BloomFilter {
// The default values were determined from empirical experiments.
// These values control the amount of false positivity that
// is expected from Bloom Filter.
- // - Default seed for initializing family of hashes = 0xA5A5A5A55A5A5A5A.
// - Default bloom filter size = 10 KB.
// - Default number of hash functions used in bloom filter = 5.
- optional fixed64 bloom_filter_seed = 1 [default = 0xA5A5A5A55A5A5A5A];
- optional uint32 bloom_filter_size = 2 [default = 10000];
- optional uint32 number_of_hashes = 3 [default = 5];
+ optional uint32 bloom_filter_size = 1 [default = 10000];
+ optional uint32 number_of_hashes = 2 [default = 5];
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/utility/BloomFilterAdapter.hpp
----------------------------------------------------------------------
diff --git a/utility/BloomFilterAdapter.hpp b/utility/BloomFilterAdapter.hpp
new file mode 100644
index 0000000..f094307
--- /dev/null
+++ b/utility/BloomFilterAdapter.hpp
@@ -0,0 +1,142 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_BLOOM_FILTER_ADAPTER_HPP
+#define QUICKSTEP_UTILITY_BLOOM_FILTER_ADAPTER_HPP
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "utility/BloomFilter.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ * @{
+ */
+
+class BloomFilterAdapter {
+ public:
+ BloomFilterAdapter(const std::vector<const BloomFilter*> &bloom_filters,
+ const std::vector<attribute_id> &attribute_ids,
+ const std::vector<std::size_t> &attr_sizes) {
+ DCHECK_EQ(bloom_filters.size(), attribute_ids.size());
+ DCHECK_EQ(bloom_filters.size(), attr_sizes.size());
+
+ bloom_filter_entries_.reserve(bloom_filters.size());
+ for (std::size_t i = 0; i < bloom_filters.size(); ++i) {
+ bloom_filter_entries_.emplace_back(
+ new BloomFilterEntry(
+ bloom_filters[i], attribute_ids[i], attr_sizes[i]));
+ }
+ }
+
+ ~BloomFilterAdapter() {
+ for (auto &entry : bloom_filter_entries_) {
+ delete entry;
+ }
+ }
+
+ template <bool adapt_filters, typename ValueAccessorT>
+ inline std::size_t bulkProbe(const ValueAccessorT *accessor,
+ std::vector<tuple_id> &batch,
+ const std::size_t batch_size) {
+ std::size_t out_size = batch_size;
+ for (auto &entry : bloom_filter_entries_) {
+ out_size = bulkProbeBloomFilterEntry<adapt_filters>(*entry, accessor, batch, out_size);
+ }
+ adaptEntryOrder();
+ return out_size;
+ }
+
+ private:
+ struct BloomFilterEntry {
+ BloomFilterEntry(const BloomFilter *in_bloom_filter,
+ const attribute_id &in_attribute_id,
+ const std::size_t &in_attribute_size)
+ : bloom_filter(in_bloom_filter),
+ attribute_id(in_attribute_id),
+ attribute_size(in_attribute_size),
+ miss(0),
+ cnt(0) {
+ }
+
+ static bool isBetterThan(const BloomFilterEntry *a,
+ const BloomFilterEntry *b) {
+ return a->miss_rate > b->miss_rate;
+ }
+
+ const BloomFilter *bloom_filter;
+ const attribute_id attribute_id;
+ const std::size_t attribute_size;
+ std::uint32_t miss;
+ std::uint32_t cnt;
+ float miss_rate;
+ };
+
+ template <bool adapt_filters, typename ValueAccessorT>
+ inline std::size_t bulkProbeBloomFilterEntry(
+ BloomFilterEntry &entry,
+ const ValueAccessorT *accessor,
+ std::vector<tuple_id> &batch,
+ const std::size_t in_size) {
+ std::size_t out_size = 0;
+ const BloomFilter *bloom_filter = entry.bloom_filter;
+
+ for (std::size_t t = 0; t < in_size; ++t) {
+ const tuple_id tid = batch[t];
+ const auto value = static_cast<const std::uint8_t*>(
+ accessor->getUntypedValueAtAbsolutePosition(entry.attribute_id, tid));
+ if (bloom_filter->contains(value, entry.attribute_size)) {
+ batch[out_size] = tid;
+ ++out_size;
+ }
+ }
+ if (adapt_filters) {
+ entry.cnt += in_size;
+ entry.miss += (in_size - out_size);
+ }
+ return out_size;
+ }
+
+ inline void adaptEntryOrder() {
+ for (auto &entry : bloom_filter_entries_) {
+ entry->miss_rate = static_cast<float>(entry->miss) / entry->cnt;
+ }
+ std::sort(bloom_filter_entries_.begin(),
+ bloom_filter_entries_.end(),
+ BloomFilterEntry::isBetterThan);
+ }
+
+ std::vector<BloomFilterEntry *> bloom_filter_entries_;
+
+ DISALLOW_COPY_AND_ASSIGN(BloomFilterAdapter);
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_UTILITY_BLOOM_FILTER_ADAPTER_HPP
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index 803b909..dfb8a6e 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -159,6 +159,7 @@ add_library(quickstep_utility_Alignment ../empty_src.cpp Alignment.hpp)
add_library(quickstep_utility_BitManipulation ../empty_src.cpp BitManipulation.hpp)
add_library(quickstep_utility_BitVector ../empty_src.cpp BitVector.hpp)
add_library(quickstep_utility_BloomFilter ../empty_src.cpp BloomFilter.hpp)
+add_library(quickstep_utility_BloomFilterAdapter ../empty_src.cpp BloomFilterAdapter.hpp)
add_library(quickstep_utility_BloomFilter_proto
${quickstep_utility_BloomFilter_proto_srcs}
${quickstep_utility_BloomFilter_proto_hdrs})
@@ -166,6 +167,8 @@ add_library(quickstep_utility_CalculateInstalledMemory CalculateInstalledMemory.
add_library(quickstep_utility_Cast ../empty_src.cpp Cast.hpp)
add_library(quickstep_utility_CheckSnprintf ../empty_src.cpp CheckSnprintf.hpp)
add_library(quickstep_utility_DAG ../empty_src.cpp DAG.hpp)
+add_library(quickstep_utility_DisjointTreeForest ../empty_src.cpp DisjointTreeForest.hpp)
+add_library(quickstep_utility_EventProfiler EventProfiler.cpp EventProfiler.hpp)
add_library(quickstep_utility_EqualsAnyConstant ../empty_src.cpp EqualsAnyConstant.hpp)
add_library(quickstep_utility_ExecutionDAGVisualizer
ExecutionDAGVisualizer.cpp
@@ -219,6 +222,10 @@ target_link_libraries(quickstep_utility_BloomFilter
quickstep_threading_SpinSharedMutex
quickstep_utility_BloomFilter_proto
quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_BloomFilterAdapter
+ quickstep_catalog_CatalogTypedefs
+ quickstep_utility_BloomFilter
+ quickstep_utility_Macros)
target_link_libraries(quickstep_utility_BloomFilter_proto
${PROTOBUF_LIBRARY})
target_link_libraries(quickstep_utility_CalculateInstalledMemory
@@ -228,6 +235,9 @@ target_link_libraries(quickstep_utility_CheckSnprintf
target_link_libraries(quickstep_utility_DAG
glog
quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_DisjointTreeForest)
+target_link_libraries(quickstep_utility_EventProfiler
+ quickstep_threading_Mutex)
target_link_libraries(quickstep_utility_ExecutionDAGVisualizer
quickstep_catalog_CatalogRelationSchema
quickstep_queryexecution_QueryExecutionTypedefs
@@ -310,11 +320,14 @@ target_link_libraries(quickstep_utility
quickstep_utility_BitManipulation
quickstep_utility_BitVector
quickstep_utility_BloomFilter
+ quickstep_utility_BloomFilterAdapter
quickstep_utility_BloomFilter_proto
quickstep_utility_CalculateInstalledMemory
quickstep_utility_Cast
quickstep_utility_CheckSnprintf
quickstep_utility_DAG
+ quickstep_utility_DisjointTreeForest
+ quickstep_utility_EventProfiler
quickstep_utility_EqualsAnyConstant
quickstep_utility_ExecutionDAGVisualizer
quickstep_utility_Glob
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/utility/DisjointTreeForest.hpp
----------------------------------------------------------------------
diff --git a/utility/DisjointTreeForest.hpp b/utility/DisjointTreeForest.hpp
new file mode 100644
index 0000000..f5722ba
--- /dev/null
+++ b/utility/DisjointTreeForest.hpp
@@ -0,0 +1,116 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_DISJOINT_TREE_FOREST_HPP_
+#define QUICKSTEP_UTILITY_DISJOINT_TREE_FOREST_HPP_
+
+#include <cstddef>
+#include <limits>
+#include <utility>
+#include <unordered_map>
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ * @{
+ */
+
+/**
+ * @brief A.k.a. union-find set.
+ */
+template <typename ElementT,
+ class MapperT = std::unordered_map<ElementT, std::size_t>>
+class DisjointTreeForest {
+ public:
+ inline bool hasElement(const ElementT &element) const {
+ return elements_map_.find(element) != elements_map_.end();
+ }
+
+ inline void makeSet(const ElementT &element) {
+ if (!hasElement(element)) {
+ std::size_t loc = nodes_.size();
+ nodes_.emplace_back(0, loc);
+ elements_map_.emplace(element, loc);
+ }
+ }
+
+ inline std::size_t find(const ElementT &element) {
+ const std::size_t node_id = elements_map_.at(element);
+ std::size_t root_id = node_id;
+ std::size_t parent_id;
+ while ((parent_id = nodes_[root_id].parent) != root_id) {
+ root_id = parent_id;
+ }
+ compress_path(node_id, root_id);
+ return root_id;
+ }
+
+ inline void merge(const ElementT &element1, const ElementT &element2) {
+ std::size_t root_id1 = find(element1);
+ std::size_t root_id2 = find(element2);
+ if (root_id1 != root_id2) {
+ Node &n1 = nodes_[root_id1];
+ Node &n2 = nodes_[root_id2];
+ if (n1.rank > n2.rank) {
+ n2.parent = root_id1;
+ } else if (n1.rank < n2.rank) {
+ n1.parent = root_id2;
+ } else {
+ n1.parent = root_id2;
+ n2.rank += 1;
+ }
+ }
+ }
+
+ inline bool isConnected(const ElementT &element1, const ElementT &element2) {
+ return find(element1) == find(element2);
+ }
+
+ private:
+ struct Node {
+ Node(const std::size_t rank_in, const std::size_t parent_in)
+ : rank(rank_in), parent(parent_in) {
+ }
+ std::size_t rank;
+ std::size_t parent;
+ };
+
+ inline void compress_path(const std::size_t leaf_node_id, const std::size_t root_node_id) {
+ std::size_t node_id = leaf_node_id;
+ std::size_t max_rank = 0;
+ while (node_id != root_node_id) {
+ const Node &node = nodes_[node_id];
+ max_rank = std::max(max_rank, node.rank);
+
+ const std::size_t parent_id = node.parent;
+ nodes_[node_id].parent = root_node_id;
+ node_id = parent_id;
+ }
+ nodes_[root_node_id].rank = max_rank + 1;
+ }
+
+ std::vector<Node> nodes_;
+ MapperT elements_map_;
+
+ static constexpr std::size_t kInvalid = std::numeric_limits<std::size_t>::max();
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_UTILITY_DISJOINT_TREE_FOREST_HPP_
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/utility/EventProfiler.cpp
----------------------------------------------------------------------
diff --git a/utility/EventProfiler.cpp b/utility/EventProfiler.cpp
new file mode 100644
index 0000000..728ebff
--- /dev/null
+++ b/utility/EventProfiler.cpp
@@ -0,0 +1,29 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#include "utility/EventProfiler.hpp"
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+namespace quickstep {
+
+EventProfiler<int, std::size_t> simple_profiler;
+EventProfiler<std::size_t> relop_profiler;
+
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/utility/EventProfiler.hpp
----------------------------------------------------------------------
diff --git a/utility/EventProfiler.hpp b/utility/EventProfiler.hpp
new file mode 100644
index 0000000..70024e6
--- /dev/null
+++ b/utility/EventProfiler.hpp
@@ -0,0 +1,188 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_EVENT_PROFILER_HPP_
+#define QUICKSTEP_UTILITY_EVENT_PROFILER_HPP_
+
+#include <chrono>
+#include <cstddef>
+#include <cstring>
+#include <ctime>
+#include <iomanip>
+#include <map>
+#include <ostream>
+#include <thread>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "threading/Mutex.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ * @{
+ */
+
+using clock = std::chrono::steady_clock;
+
+template <typename TagT, typename ...PayloadT>
+class EventProfiler {
+
+ public:
+ EventProfiler()
+ : zero_time_(clock::now()) {
+ }
+
+ struct EventInfo {
+ clock::time_point start_time;
+ clock::time_point end_time;
+ bool is_finished;
+ std::tuple<PayloadT...> payload;
+
+ explicit EventInfo(const clock::time_point &start_time_in)
+ : start_time(start_time_in),
+ is_finished(false) {
+ }
+
+ EventInfo()
+ : start_time(clock::now()),
+ is_finished(false) {
+ }
+
+ inline void setPayload(PayloadT &&...in_payload) {
+ payload = std::make_tuple(in_payload...);
+ }
+
+ inline void endEvent() {
+ end_time = clock::now();
+ is_finished = true;
+ }
+ };
+
+ struct EventContainer {
+ EventContainer()
+ : context(0) {}
+
+ inline void startEvent(const TagT &tag) {
+ events[tag].emplace_back(clock::now());
+ }
+
+ inline void endEvent(const TagT &tag) {
+ auto &event_info = events.at(tag).back();
+ event_info.is_finished = true;
+ event_info.end_time = clock::now();
+ }
+
+ inline std::vector<EventInfo> *getEventLine(const TagT &tag) {
+ return &events[tag];
+ }
+
+ inline void setContext(int context_in) {
+ context = context_in;
+ }
+
+ inline int getContext() const {
+ return context;
+ }
+
+ std::map<TagT, std::vector<EventInfo>> events;
+ int context;
+ };
+
+ EventContainer *getContainer() {
+ MutexLock lock(mutex_);
+ return &thread_map_[std::this_thread::get_id()];
+ }
+
+ void writeToStream(std::ostream &os) const {
+ time_t rawtime;
+ time(&rawtime);
+ char event_id[32];
+ strftime(event_id, sizeof event_id, "%Y-%m-%d %H:%M:%S", localtime(&rawtime));
+
+ int thread_id = 0;
+ for (const auto &thread_ctx : thread_map_) {
+ for (const auto &event_group : thread_ctx.second.events) {
+ for (const auto &event_info : event_group.second) {
+ CHECK(event_info.is_finished) << "Unfinished profiling event";
+
+ os << std::setprecision(12)
+ << event_id << ","
+ << thread_id << "," << event_group.first << ",";
+
+ PrintTuple(os, event_info.payload, ",");
+
+ os << std::chrono::duration<double>(event_info.start_time - zero_time_).count()
+ << ","
+ << std::chrono::duration<double>(event_info.end_time - zero_time_).count()
+ << "\n";
+ }
+ }
+ ++thread_id;
+ }
+ }
+
+ void clear() {
+ zero_time_ = clock::now();
+ thread_map_.clear();
+ }
+
+ const std::map<std::thread::id, EventContainer> &containers() {
+ return thread_map_;
+ }
+
+ const clock::time_point &zero_time() {
+ return zero_time_;
+ }
+
+ private:
+ template<class Tuple, std::size_t N>
+ struct TuplePrinter {
+ static void Print(std::ostream &os, const Tuple &t, const std::string &sep) {
+ TuplePrinter<Tuple, N-1>::Print(os, t, sep);
+ os << std::get<N-1>(t) << sep;
+ }
+ };
+
+ template<class Tuple>
+ struct TuplePrinter<Tuple, 1> {
+ static void Print(std::ostream &os, const Tuple &t, const std::string &sep) {
+ os << std::get<0>(t) << sep;
+ }
+ };
+
+ template<class... Args>
+ static void PrintTuple(std::ostream &os, const std::tuple<Args...>& t, const std::string &sep) {
+ TuplePrinter<decltype(t), sizeof...(Args)>::Print(os, t, sep);
+ }
+
+ clock::time_point zero_time_;
+ std::map<std::thread::id, EventContainer> thread_map_;
+ Mutex mutex_;
+};
+
+extern EventProfiler<int, std::size_t> simple_profiler;
+extern EventProfiler<std::size_t> relop_profiler;
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_UTILITY_EVENT_PROFILER_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/utility/PlanVisualizer.cpp
----------------------------------------------------------------------
diff --git a/utility/PlanVisualizer.cpp b/utility/PlanVisualizer.cpp
index 962d577..4cc1b0f 100644
--- a/utility/PlanVisualizer.cpp
+++ b/utility/PlanVisualizer.cpp
@@ -19,6 +19,7 @@
#include <cstddef>
#include <memory>
+#include <set>
#include <sstream>
#include <string>
#include <unordered_map>
@@ -28,6 +29,7 @@
#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
#include "query_optimizer/physical/HashJoin.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/PhysicalType.hpp"
@@ -101,6 +103,10 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
int node_id = ++id_counter_;
node_id_map_.emplace(input, node_id);
+ std::set<E::ExprId> referenced_ids;
+ for (const auto &attr : input->getReferencedAttributes()) {
+ referenced_ids.emplace(attr->id());
+ }
for (const auto &child : input->children()) {
visit(child);
@@ -111,10 +117,8 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
edge_info.src_node_id = child_id;
edge_info.dst_node_id = node_id;
- // Print output attributes except for TableReference -- there are just too many
- // attributes out of TableReference.
- if (child->getPhysicalType() != P::PhysicalType::kTableReference) {
- for (const auto &attr : child->getOutputAttributes()) {
+ for (const auto &attr : child->getOutputAttributes()) {
+ if (referenced_ids.find(attr->id()) != referenced_ids.end()) {
edge_info.labels.emplace_back(attr->attribute_alias());
}
}
@@ -145,6 +149,36 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
node_info.labels.emplace_back(
left_attributes[i]->attribute_alias() + " = " + right_attributes[i]->attribute_alias());
}
+ if (hash_join->left()->impliesUniqueAttributes(left_attributes)) {
+ node_info.labels.emplace_back("LEFT join attrs unique");
+ }
+ if (hash_join->right()->impliesUniqueAttributes(right_attributes)) {
+ node_info.labels.emplace_back("RIGHT join attrs unique");
+ }
+
+ const auto &bf_config = hash_join->bloom_filter_config();
+ for (const auto &bf : bf_config.build_side_bloom_filters) {
+ node_info.labels.emplace_back(
+ std::string("[BF build] ") + bf.attribute->attribute_alias());
+ }
+ for (const auto &bf : bf_config.probe_side_bloom_filters) {
+ node_info.labels.emplace_back(
+ std::string("[BF probe] ") + bf.attribute->attribute_alias());
+ }
+
+ break;
+ }
+ case P::PhysicalType::kAggregate: {
+ const P::AggregatePtr aggregate =
+ std::static_pointer_cast<const P::Aggregate>(input);
+ node_info.labels.emplace_back(input->getName());
+
+ const auto &bf_config = aggregate->bloom_filter_config();
+ for (const auto &bf : bf_config.probe_side_bloom_filters) {
+ node_info.labels.emplace_back(
+ std::string("[BF probe] ") + bf.attribute->attribute_alias());
+ }
+
break;
}
default: {
[02/13] incubator-quickstep git commit: RANGE mode and computation
optimization. - Supported RANGE mode for window aggregation. - Optimized the
AVG calculation time complexity from O(nk) to O(n),
where n is the number of tuples and k is the window size.
Posted by ji...@apache.org.
RANGE mode and computation optimization.
- Supported RANGE mode for window aggregation.
- Optimized the AVG calculation time complexity from O(nk) to O(n), where n is the number of tuples and k is the window size.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/d0172fde
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/d0172fde
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/d0172fde
Branch: refs/heads/LIP-for-tpch
Commit: d0172fde0cffbf10bc858090e11169e11834be89
Parents: e53186e
Author: shixuan <sh...@apache.org>
Authored: Tue Jul 26 11:49:07 2016 -0500
Committer: shixuan <sh...@apache.org>
Committed: Mon Aug 1 09:38:34 2016 -0500
----------------------------------------------------------------------
expressions/window_aggregation/CMakeLists.txt | 15 +-
.../WindowAggregateFunction.hpp | 19 +-
.../WindowAggregateFunctionAvg.cpp | 14 +-
.../WindowAggregateFunctionAvg.hpp | 6 +-
.../WindowAggregateFunctionCount.cpp | 6 +-
.../WindowAggregateFunctionCount.hpp | 6 +-
.../WindowAggregateFunctionMax.cpp | 6 +-
.../WindowAggregateFunctionMax.hpp | 6 +-
.../WindowAggregateFunctionMin.cpp | 6 +-
.../WindowAggregateFunctionMin.hpp | 6 +-
.../WindowAggregateFunctionSum.cpp | 6 +-
.../WindowAggregateFunctionSum.hpp | 6 +-
.../WindowAggregationHandle.cpp | 186 ++++++++++++++++
.../WindowAggregationHandle.hpp | 100 ++++++---
.../WindowAggregationHandleAvg.cpp | 201 ++++++-----------
.../WindowAggregationHandleAvg.hpp | 35 ++-
.../WindowAggregationHandleAvg_unittest.cpp | 220 +++++++++++++++----
query_optimizer/ExecutionGenerator.cpp | 11 +-
query_optimizer/resolver/Resolver.cpp | 19 +-
.../tests/execution_generator/Select.test | 41 +++-
storage/WindowAggregationOperationState.cpp | 69 +++---
storage/WindowAggregationOperationState.hpp | 9 +-
storage/WindowAggregationOperationState.proto | 1 +
23 files changed, 692 insertions(+), 302 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/CMakeLists.txt b/expressions/window_aggregation/CMakeLists.txt
index 6a16fcc..3a79b7e 100644
--- a/expressions/window_aggregation/CMakeLists.txt
+++ b/expressions/window_aggregation/CMakeLists.txt
@@ -44,7 +44,7 @@ add_library(quickstep_expressions_windowaggregation_WindowAggregateFunctionSum
WindowAggregateFunctionSum.cpp
WindowAggregateFunctionSum.hpp)
add_library(quickstep_expressions_windowaggregation_WindowAggregationHandle
- ../../empty_src.cpp
+ WindowAggregationHandle.cpp
WindowAggregationHandle.hpp)
add_library(quickstep_expressions_windowaggregation_WindowAggregationHandleAvg
WindowAggregationHandleAvg.cpp
@@ -130,10 +130,17 @@ target_link_libraries(quickstep_expressions_windowaggregation_WindowAggregationH
glog
quickstep_catalog_CatalogRelationSchema
quickstep_catalog_CatalogTypedefs
+ quickstep_expressions_scalar_Scalar
quickstep_storage_StorageBlockInfo
+ quickstep_types_Type
+ quickstep_types_TypeFactory
+ quickstep_types_TypeID
quickstep_types_TypedValue
quickstep_types_containers_ColumnVector
quickstep_types_containers_ColumnVectorsValueAccessor
+ quickstep_types_operations_binaryoperations_BinaryOperation
+ quickstep_types_operations_binaryoperations_BinaryOperationFactory
+ quickstep_types_operations_binaryoperations_BinaryOperationID
quickstep_types_operations_comparisons_Comparison
quickstep_types_operations_comparisons_ComparisonFactory
quickstep_types_operations_comparisons_ComparisonID
@@ -141,8 +148,6 @@ target_link_libraries(quickstep_expressions_windowaggregation_WindowAggregationH
target_link_libraries(quickstep_expressions_windowaggregation_WindowAggregationHandleAvg
glog
quickstep_catalog_CatalogTypedefs
- quickstep_expressions_scalar_Scalar
- quickstep_expressions_scalar_ScalarAttribute
quickstep_expressions_windowaggregation_WindowAggregationHandle
quickstep_storage_ValueAccessor
quickstep_types_Type
@@ -179,11 +184,13 @@ add_executable(WindowAggregationHandle_tests
target_link_libraries(WindowAggregationHandle_tests
gtest
gtest_main
+ quickstep_catalog_CatalogAttribute
quickstep_catalog_CatalogTypedefs
+ quickstep_expressions_scalar_Scalar
+ quickstep_expressions_scalar_ScalarAttribute
quickstep_expressions_windowaggregation_WindowAggregateFunction
quickstep_expressions_windowaggregation_WindowAggregateFunctionFactory
quickstep_expressions_windowaggregation_WindowAggregationHandle
- quickstep_expressions_windowaggregation_WindowAggregationHandleAvg
quickstep_expressions_windowaggregation_WindowAggregationID
quickstep_storage_ValueAccessor
quickstep_types_CharType
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregateFunction.hpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregateFunction.hpp b/expressions/window_aggregation/WindowAggregateFunction.hpp
index e40479b..7ffc4ae 100644
--- a/expressions/window_aggregation/WindowAggregateFunction.hpp
+++ b/expressions/window_aggregation/WindowAggregateFunction.hpp
@@ -20,6 +20,7 @@
#ifndef QUICKSTEP_EXPRESSIONS_WINDOW_AGGREGATION_WINDOW_AGGREGATE_FUNCTION_HPP_
#define QUICKSTEP_EXPRESSIONS_WINDOW_AGGREGATION_WINDOW_AGGREGATE_FUNCTION_HPP_
+#include <memory>
#include <string>
#include <vector>
@@ -32,6 +33,7 @@
namespace quickstep {
class CatalogRelationSchema;
+class Scalar;
class Type;
class WindowAggregationHandle;
@@ -120,16 +122,23 @@ class WindowAggregateFunction {
*
* @param argument_types A list of zero or more Types (in order) for
* arguments to this WindowAggregateFunction.
- * @param partition_key_types A list or zero or more Types for partition keys
- * to this WindowAggregateFunction.
+ * @param partition_by_attributes A list of attributes used as partition key.
+ * @param order_by_attributes A list of attributes used as order key.
+ * @param is_row True if the frame mode is ROWS, false if RANGE.
+ * @param num_preceding The number of rows/range that precedes the current row.
+ * @param num_following The number of rows/range that follows the current row.
*
* @return A new WindowAggregationHandle that can be used to compute this
- * WindowAggregateFunction over the specified argument_types. Caller
- * is responsible for deleting the returned object.
+ * WindowAggregateFunction over the specified window definition.
+ * Caller is responsible for deleting the returned object.
**/
virtual WindowAggregationHandle* createHandle(
const std::vector<const Type*> &argument_types,
- const std::vector<const Type*> &partition_key_types) const = 0;
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following) const = 0;
protected:
explicit WindowAggregateFunction(const WindowAggregationID win_agg_id)
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregateFunctionAvg.cpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregateFunctionAvg.cpp b/expressions/window_aggregation/WindowAggregateFunctionAvg.cpp
index bc31a53..beb1c7a 100644
--- a/expressions/window_aggregation/WindowAggregateFunctionAvg.cpp
+++ b/expressions/window_aggregation/WindowAggregateFunctionAvg.cpp
@@ -73,13 +73,21 @@ const Type* WindowAggregateFunctionAvg::resultTypeForArgumentTypes(
WindowAggregationHandle* WindowAggregateFunctionAvg::createHandle(
const std::vector<const Type*> &argument_types,
- const std::vector<const Type*> &partition_key_types) const {
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following) const {
DCHECK(canApplyToTypes(argument_types))
<< "Attempted to create an WindowAggregationHandleAvg for argument Type(s)"
<< " that AVG can not be applied to.";
- return new WindowAggregationHandleAvg(partition_key_types,
- *argument_types.front());
+ return new WindowAggregationHandleAvg(partition_by_attributes,
+ order_by_attributes,
+ is_row,
+ num_preceding,
+ num_following,
+ argument_types[0]);
}
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregateFunctionAvg.hpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregateFunctionAvg.hpp b/expressions/window_aggregation/WindowAggregateFunctionAvg.hpp
index 32fd9d5..0e50415 100644
--- a/expressions/window_aggregation/WindowAggregateFunctionAvg.hpp
+++ b/expressions/window_aggregation/WindowAggregateFunctionAvg.hpp
@@ -58,7 +58,11 @@ class WindowAggregateFunctionAvg : public WindowAggregateFunction {
WindowAggregationHandle* createHandle(
const std::vector<const Type*> &argument_types,
- const std::vector<const Type*> &partition_key_types) const override;
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following) const override;
private:
WindowAggregateFunctionAvg()
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregateFunctionCount.cpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregateFunctionCount.cpp b/expressions/window_aggregation/WindowAggregateFunctionCount.cpp
index 504e000..ccd81ac 100644
--- a/expressions/window_aggregation/WindowAggregateFunctionCount.cpp
+++ b/expressions/window_aggregation/WindowAggregateFunctionCount.cpp
@@ -47,7 +47,11 @@ const Type* WindowAggregateFunctionCount::resultTypeForArgumentTypes(
WindowAggregationHandle* WindowAggregateFunctionCount::createHandle(
const std::vector<const Type*> &argument_types,
- const std::vector<const Type*> &partition_key_types) const {
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following) const {
DCHECK(canApplyToTypes(argument_types))
<< "Attempted to create a WindowAggregationHandleCount for argument Types "
<< "that COUNT can not be applied to (> 1 argument).";
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregateFunctionCount.hpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregateFunctionCount.hpp b/expressions/window_aggregation/WindowAggregateFunctionCount.hpp
index 1b40fdd..2e5506a 100644
--- a/expressions/window_aggregation/WindowAggregateFunctionCount.hpp
+++ b/expressions/window_aggregation/WindowAggregateFunctionCount.hpp
@@ -58,7 +58,11 @@ class WindowAggregateFunctionCount : public WindowAggregateFunction {
WindowAggregationHandle* createHandle(
const std::vector<const Type*> &argument_types,
- const std::vector<const Type*> &partition_key_types) const override;
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following) const override;
private:
WindowAggregateFunctionCount()
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregateFunctionMax.cpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregateFunctionMax.cpp b/expressions/window_aggregation/WindowAggregateFunctionMax.cpp
index f3997c7..acfce82 100644
--- a/expressions/window_aggregation/WindowAggregateFunctionMax.cpp
+++ b/expressions/window_aggregation/WindowAggregateFunctionMax.cpp
@@ -55,7 +55,11 @@ const Type* WindowAggregateFunctionMax::resultTypeForArgumentTypes(
WindowAggregationHandle* WindowAggregateFunctionMax::createHandle(
const std::vector<const Type*> &argument_types,
- const std::vector<const Type*> &partition_key_types) const {
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following) const {
DCHECK(canApplyToTypes(argument_types))
<< "Attempted to create a WindowAggregationHandleMax for argument Type(s) "
<< "that MAX can not be applied to.";
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregateFunctionMax.hpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregateFunctionMax.hpp b/expressions/window_aggregation/WindowAggregateFunctionMax.hpp
index 00c788e..a215703 100644
--- a/expressions/window_aggregation/WindowAggregateFunctionMax.hpp
+++ b/expressions/window_aggregation/WindowAggregateFunctionMax.hpp
@@ -58,7 +58,11 @@ class WindowAggregateFunctionMax : public WindowAggregateFunction {
WindowAggregationHandle* createHandle(
const std::vector<const Type*> &argument_types,
- const std::vector<const Type*> &partition_key_types) const override;
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following) const override;
private:
WindowAggregateFunctionMax()
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregateFunctionMin.cpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregateFunctionMin.cpp b/expressions/window_aggregation/WindowAggregateFunctionMin.cpp
index a13e28e..cd845bd 100644
--- a/expressions/window_aggregation/WindowAggregateFunctionMin.cpp
+++ b/expressions/window_aggregation/WindowAggregateFunctionMin.cpp
@@ -55,7 +55,11 @@ const Type* WindowAggregateFunctionMin::resultTypeForArgumentTypes(
WindowAggregationHandle* WindowAggregateFunctionMin::createHandle(
const std::vector<const Type*> &argument_types,
- const std::vector<const Type*> &partition_key_types) const {
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following) const {
DCHECK(canApplyToTypes(argument_types))
<< "Attempted to create a WindowAggregationHandleMin for argument Type(s) "
<< "that MIN can not be applied to.";
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregateFunctionMin.hpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregateFunctionMin.hpp b/expressions/window_aggregation/WindowAggregateFunctionMin.hpp
index aeba539..fab88a8 100644
--- a/expressions/window_aggregation/WindowAggregateFunctionMin.hpp
+++ b/expressions/window_aggregation/WindowAggregateFunctionMin.hpp
@@ -58,7 +58,11 @@ class WindowAggregateFunctionMin : public WindowAggregateFunction {
WindowAggregationHandle* createHandle(
const std::vector<const Type*> &argument_types,
- const std::vector<const Type*> &partition_key_types) const override;
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following) const override;
private:
WindowAggregateFunctionMin()
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregateFunctionSum.cpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregateFunctionSum.cpp b/expressions/window_aggregation/WindowAggregateFunctionSum.cpp
index 636c53a..e2aeb60 100644
--- a/expressions/window_aggregation/WindowAggregateFunctionSum.cpp
+++ b/expressions/window_aggregation/WindowAggregateFunctionSum.cpp
@@ -71,7 +71,11 @@ const Type* WindowAggregateFunctionSum::resultTypeForArgumentTypes(
WindowAggregationHandle* WindowAggregateFunctionSum::createHandle(
const std::vector<const Type*> &argument_types,
- const std::vector<const Type*> &partition_key_types) const {
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following) const {
DCHECK(canApplyToTypes(argument_types))
<< "Attempted to create a WindowAggregationHandleSum for argument Type(s) "
<< "that SUM can not be applied to.";
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregateFunctionSum.hpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregateFunctionSum.hpp b/expressions/window_aggregation/WindowAggregateFunctionSum.hpp
index 047113c..8d7d61d 100644
--- a/expressions/window_aggregation/WindowAggregateFunctionSum.hpp
+++ b/expressions/window_aggregation/WindowAggregateFunctionSum.hpp
@@ -58,7 +58,11 @@ class WindowAggregateFunctionSum : public WindowAggregateFunction {
WindowAggregationHandle* createHandle(
const std::vector<const Type*> &argument_types,
- const std::vector<const Type*> &partition_key_types) const override;
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following) const override;
private:
WindowAggregateFunctionSum()
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregationHandle.cpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregationHandle.cpp b/expressions/window_aggregation/WindowAggregationHandle.cpp
new file mode 100644
index 0000000..835eaff
--- /dev/null
+++ b/expressions/window_aggregation/WindowAggregationHandle.cpp
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "expressions/window_aggregation/WindowAggregationHandle.hpp"
+
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "expressions/scalar/Scalar.hpp"
+#include "types/Type.hpp"
+#include "types/TypeFactory.hpp"
+#include "types/TypeID.hpp"
+#include "types/TypedValue.hpp"
+#include "types/containers/ColumnVectorsValueAccessor.hpp"
+#include "types/operations/binary_operations/BinaryOperation.hpp"
+#include "types/operations/binary_operations/BinaryOperationFactory.hpp"
+#include "types/operations/binary_operations/BinaryOperationID.hpp"
+#include "types/operations/comparisons/Comparison.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+WindowAggregationHandle::WindowAggregationHandle(
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following)
+ : is_row_(is_row),
+ num_preceding_(num_preceding),
+ num_following_(num_following) {
+ // IDs and types of partition keys.
+ std::vector<const Type*> partition_key_types;
+ for (const std::unique_ptr<const Scalar> &partition_by_attribute : partition_by_attributes) {
+ partition_key_ids_.push_back(
+ partition_by_attribute->getAttributeIdForValueAccessor());
+ partition_key_types.push_back(&partition_by_attribute->getType());
+ }
+
+ // Comparison operators for checking if two tuples belong to the same partition.
+ for (const Type *partition_key_type : partition_key_types) {
+ partition_equal_comparators_.emplace_back(
+ ComparisonFactory::GetComparison(ComparisonID::kEqual)
+ .makeUncheckedComparatorForTypes(*partition_key_type, *partition_key_type));
+ }
+
+ // IDs and types of order keys.
+ const Type *first_order_key_type = nullptr;
+ for (const std::unique_ptr<const Scalar> &order_by_attribute : order_by_attributes) {
+ order_key_ids_.push_back(
+ order_by_attribute->getAttributeIdForValueAccessor());
+ if (first_order_key_type == nullptr) {
+ first_order_key_type = &order_by_attribute->getType();
+ }
+ }
+
+ // ID and type of the first order key if in RANGE mode.
+ if (!is_row) {
+ DCHECK(first_order_key_type != nullptr);
+
+ // Comparators and operators to check window frame in RANGE mode.
+ const Type &long_type = TypeFactory::GetType(kLong, false);
+ range_compare_type_ =
+ TypeFactory::GetUnifyingType(*first_order_key_type, long_type);
+
+ range_add_operator_.reset(
+ BinaryOperationFactory::GetBinaryOperation(BinaryOperationID::kAdd)
+ .makeUncheckedBinaryOperatorForTypes(*first_order_key_type, long_type));
+ range_comparator_.reset(
+ ComparisonFactory::GetComparison(ComparisonID::kLessOrEqual)
+ .makeUncheckedComparatorForTypes(*range_compare_type_, *range_compare_type_));
+ }
+}
+
+bool WindowAggregationHandle::samePartition(
+ const ColumnVectorsValueAccessor *tuple_accessor,
+ const tuple_id test_tuple_id) const {
+ // If test tuple does not exist.
+ if (test_tuple_id < 0 ||
+ test_tuple_id >= tuple_accessor->getNumTuples()) {
+ return false;
+ }
+
+ // Check all partition by attributes.
+ for (std::size_t partition_by_index = 0;
+ partition_by_index < partition_key_ids_.size();
+ ++partition_by_index) {
+ if (!partition_equal_comparators_[partition_by_index]->compareTypedValues(
+ tuple_accessor->getTypedValue(partition_key_ids_[partition_by_index]),
+ tuple_accessor->getTypedValueAtAbsolutePosition(
+ partition_key_ids_[partition_by_index], test_tuple_id))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool WindowAggregationHandle::inWindow(
+ const ColumnVectorsValueAccessor *tuple_accessor,
+ const tuple_id test_tuple_id) const {
+ // If test tuple does not exist.
+ if (!samePartition(tuple_accessor, test_tuple_id)) {
+ return false;
+ }
+
+ tuple_id current_tuple_id = tuple_accessor->getCurrentPosition();
+
+ // If test tuple is the current tuple, then it is in the window.
+ if (test_tuple_id == current_tuple_id) {
+ return true;
+ }
+
+ // In ROWS mode, check the difference of tuple_id.
+ if (is_row_) {
+ if (num_preceding_ != -1 &&
+ test_tuple_id < current_tuple_id - num_preceding_) {
+ return false;
+ }
+
+ if (num_following_ != -1 &&
+ test_tuple_id > current_tuple_id + num_following_) {
+ return false;
+ }
+ } else {
+ // In RANGE mode, check the difference of first order key value.
+ // Get the test value.
+ const Type &long_type = TypeFactory::GetType(kLong, false);
+ TypedValue test_value =
+ range_add_operator_->applyToTypedValues(
+ tuple_accessor->getTypedValueAtAbsolutePosition(order_key_ids_[0], test_tuple_id),
+ long_type.makeZeroValue());
+
+ // NULL will be considered not in range.
+ if (test_value.isNull() ||
+ tuple_accessor->getTypedValue(order_key_ids_[0]).isNull()) {
+ return false;
+ }
+
+ // Get the boundary value if it is not UNBOUNDED.
+ if (num_preceding_ > -1) {
+ // num_preceding needs to be negated for calculation.
+ std::int64_t neg_num_preceding = -num_preceding_;
+ TypedValue start_boundary_value =
+ range_add_operator_->applyToTypedValues(
+ tuple_accessor->getTypedValue(order_key_ids_[0]),
+ long_type.makeValue(&neg_num_preceding));
+ if (!range_comparator_->compareTypedValues(start_boundary_value, test_value)) {
+ return false;
+ }
+ }
+
+ if (num_following_ > -1) {
+ TypedValue end_boundary_value =
+ range_add_operator_->applyToTypedValues(
+ tuple_accessor->getTypedValue(order_key_ids_[0]),
+ long_type.makeValue(&num_following_));
+ if (!range_comparator_->compareTypedValues(test_value, end_boundary_value)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregationHandle.hpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregationHandle.hpp b/expressions/window_aggregation/WindowAggregationHandle.hpp
index 65f95d9..41d1d96 100644
--- a/expressions/window_aggregation/WindowAggregationHandle.hpp
+++ b/expressions/window_aggregation/WindowAggregationHandle.hpp
@@ -27,19 +27,23 @@
#include "catalog/CatalogRelationSchema.hpp"
#include "catalog/CatalogTypedefs.hpp"
#include "storage/StorageBlockInfo.hpp"
+#include "types/Type.hpp"
+#include "types/TypeFactory.hpp"
+#include "types/TypeID.hpp"
#include "types/TypedValue.hpp"
#include "types/containers/ColumnVector.hpp"
#include "types/containers/ColumnVectorsValueAccessor.hpp"
#include "types/operations/comparisons/Comparison.hpp"
#include "types/operations/comparisons/ComparisonFactory.hpp"
#include "types/operations/comparisons/ComparisonID.hpp"
+#include "types/operations/binary_operations/BinaryOperation.hpp"
+#include "types/operations/binary_operations/BinaryOperationFactory.hpp"
+#include "types/operations/binary_operations/BinaryOperationID.hpp"
#include "utility/Macros.hpp"
namespace quickstep {
-class InsertDestinationInterface;
class Scalar;
-class StorageManager;
class Type;
class ValueAccessor;
@@ -55,27 +59,29 @@ class ValueAccessor;
*
* A WindowAggregationHandle is created by calling
* WindowAggregateFunction::createHandle(). The WindowAggregationHandle object
- * provides methods that are used to actually compute the window aggregate,
- * storing intermediate results in WindowAggregationState objects.
+ * provides methods that are used to actually compute the window aggregate.
*
* The work flow for computing a window aggregate is:
- * 1. Create an initial state by createInitialState().
- * 2. One thread will handle all the computation, iterating from the first
+ * 1. One thread will handle all the computation, iterating from the first
* tuple to the last tuple. Note there will be two modes that could be
* used upon different situations:
* a. If the window aggregate is defined as accumulative, which are:
* i. Functions applied to whole partition, such as rank(), ntile()
- * and dense_rank().
+ * and dense_rank(). (Not implemented yet).
* ii. The window frame is defined as "BETWEEN UNBOUNDED PRECEDING
* AND CURRENT ROW" or "BETWEEN CURRENT ROW AND UNBOUNDED
* FOLLOWING".
* Then, for functions except median, we could store some global
- * values in the state without keeping all the tuple values around.
+ * values without keeping all the tuple values around. For simplicity,
+ * in avg(), count() and sum(), we treat the accumulative one as
+ * sliding window since the time complexity does not vary.
* b. If the window frame is sliding, such as "BETWEEN 3 PRECEDING AND
- * 3 FOLLOWING", we have to store all the tuples in the state so that
+ * 3 FOLLOWING", we have to store all the tuples in the state (at
+ * least two pointers to the start tuple and end tuple), so that
* we could know which values should be dropped as the window slides.
- * For each computed value, generate a tuple store in the column vector.
- * 3. Insert the new column into the original relation and return.
+ * For each computed value, generate a TypedValue and store it into a
+ * ColumnVector for window aggregate values.
+ * 2. Return the result ColumnVector.
*
* TODO(Shixuan): Currently we don't support parallelization. The basic idea for
* parallelization is to calculate the partial result inside each block. Each
@@ -96,37 +102,67 @@ class WindowAggregationHandle {
*
* @param block_accessors A pointer to the value accessor of block attributes.
* @param arguments The ColumnVectors of arguments
- * @param partition_by_ids The ids of partition keys.
- * @param is_row True if the frame mode is ROWS, false if it is RANGE.
- * @param num_preceding The number of rows/range that precedes the current row.
- * @param num_following The number of rows/range that follows the current row.
*
* @return A ColumnVector of the calculated window aggregates.
**/
virtual ColumnVector* calculate(ColumnVectorsValueAccessor* block_accessors,
- std::vector<ColumnVector*> &&arguments,
- const std::vector<attribute_id> &partition_by_ids,
- const bool is_row,
- const std::int64_t num_preceding,
- const std::int64_t num_following) const = 0;
+ const std::vector<ColumnVector*> &arguments) const = 0;
protected:
/**
* @brief Constructor.
*
- * @param partition_key_types The Types of the partition key.
+ * @param partition_by_attributes A list of attributes used as partition key.
+ * @param order_by_attributes A list of attributes used as order key.
+ * @param is_row True if the frame mode is ROWS, false if RANGE.
+ * @param num_preceding The number of rows/range that precedes the current row.
+ * @param num_following The number of rows/range that follows the current row.
+ **/
+ WindowAggregationHandle(
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following);
+
+ /**
+ * @brief Check if test tuple is in the same partition as the current
+ * tuple in the accessor.
+ *
+ * @param tuple_accessor The ValueAccessor for tuples.
+ * @param test_tuple_id The id of the test tuple.
+ *
+ * @return True if test tuple is in the same partition as the current tuple in
+ * the accessor, false if not.
**/
- explicit WindowAggregationHandle(
- const std::vector<const Type*> &partition_key_types) {
- // Comparison operators for checking if two tuples belong to the same partition.
- for (const Type *partition_key_type : partition_key_types) {
- equal_comparators_.emplace_back(
- ComparisonFactory::GetComparison(ComparisonID::kEqual)
- .makeUncheckedComparatorForTypes(*partition_key_type, *partition_key_type));
- }
- }
-
- std::vector<std::unique_ptr<UncheckedComparator>> equal_comparators_;
+ bool samePartition(const ColumnVectorsValueAccessor *tuple_accessor,
+ const tuple_id test_tuple_id) const;
+
+ /**
+ * @brief Check if test tuple is in the defined range.
+ *
+ * @param tuple_accessor The ValueAccessor for tuples.
+ * @param test_tuple_id The id of the test tuple.
+ *
+ * @return True if test tuple is in the defined window, false if not.
+ **/
+ bool inWindow(const ColumnVectorsValueAccessor *tuple_accessor,
+ const tuple_id test_tuple_id) const;
+
+ // IDs and comparators for partition keys.
+ std::vector<attribute_id> partition_key_ids_;
+ std::vector<std::unique_ptr<UncheckedComparator>> partition_equal_comparators_;
+
+ // IDs, type, Comparator and operator for frame boundary check in RANGE mode.
+ std::vector<attribute_id> order_key_ids_;
+ std::unique_ptr<UncheckedBinaryOperator> range_add_operator_;
+ std::unique_ptr<UncheckedComparator> range_comparator_; // Less than or Equal
+ const Type* range_compare_type_;
+
+ // Window frame information.
+ const bool is_row_;
+ const std::int64_t num_preceding_;
+ const std::int64_t num_following_;
private:
DISALLOW_COPY_AND_ASSIGN(WindowAggregationHandle);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregationHandleAvg.cpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregationHandleAvg.cpp b/expressions/window_aggregation/WindowAggregationHandleAvg.cpp
index a6a10d4..e6a4b3f 100644
--- a/expressions/window_aggregation/WindowAggregationHandleAvg.cpp
+++ b/expressions/window_aggregation/WindowAggregationHandleAvg.cpp
@@ -24,8 +24,7 @@
#include <vector>
#include "catalog/CatalogTypedefs.hpp"
-#include "expressions/scalar/Scalar.hpp"
-#include "expressions/scalar/ScalarAttribute.hpp"
+#include "expressions/window_aggregation/WindowAggregationHandle.hpp"
#include "storage/ValueAccessor.hpp"
#include "types/Type.hpp"
#include "types/TypeFactory.hpp"
@@ -42,14 +41,21 @@
namespace quickstep {
WindowAggregationHandleAvg::WindowAggregationHandleAvg(
- const std::vector<const Type*> &partition_key_types,
- const Type &type)
- : WindowAggregationHandle(partition_key_types),
- argument_type_(type) {
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
+ const bool is_row,
+ const std::int64_t num_preceding,
+ const std::int64_t num_following,
+ const Type *argument_type)
+ : WindowAggregationHandle(partition_by_attributes,
+ order_by_attributes,
+ is_row,
+ num_preceding,
+ num_following) {
// We sum Int as Long and Float as Double so that we have more headroom when
// adding many values.
TypeID type_id;
- switch (type.getTypeID()) {
+ switch (argument_type->getTypeID()) {
case kInt:
case kLong:
type_id = kLong;
@@ -59,7 +65,7 @@ WindowAggregationHandleAvg::WindowAggregationHandleAvg(
type_id = kDouble;
break;
default:
- type_id = type.getTypeID();
+ type_id = argument_type->getTypeID();
break;
}
@@ -76,7 +82,13 @@ WindowAggregationHandleAvg::WindowAggregationHandleAvg(
// Add operator for summing argument values.
fast_add_operator_.reset(
BinaryOperationFactory::GetBinaryOperation(BinaryOperationID::kAdd)
- .makeUncheckedBinaryOperatorForTypes(*sum_type_, argument_type_));
+ .makeUncheckedBinaryOperatorForTypes(*sum_type_, *argument_type));
+
+ // Subtract operator for dropping argument values off the window.
+ fast_subtract_operator_.reset(
+ BinaryOperationFactory::GetBinaryOperation(BinaryOperationID::kSubtract)
+ .makeUncheckedBinaryOperatorForTypes(*sum_type_, *argument_type));
+
// Divide operator for dividing sum by count to get final average.
divide_operator_.reset(
BinaryOperationFactory::GetBinaryOperation(BinaryOperationID::kDivide)
@@ -85,11 +97,7 @@ WindowAggregationHandleAvg::WindowAggregationHandleAvg(
ColumnVector* WindowAggregationHandleAvg::calculate(
ColumnVectorsValueAccessor *tuple_accessor,
- std::vector<ColumnVector*> &&arguments,
- const std::vector<attribute_id> &partition_by_ids,
- const bool is_row,
- const std::int64_t num_preceding,
- const std::int64_t num_following) const {
+ const std::vector<ColumnVector*> &arguments) const {
DCHECK_EQ(1u, arguments.size());
DCHECK(arguments[0]->isNative());
DCHECK_EQ(static_cast<std::size_t>(tuple_accessor->getNumTuples()),
@@ -98,144 +106,69 @@ ColumnVector* WindowAggregationHandleAvg::calculate(
// Initialize the output column and argument accessor.
NativeColumnVector *window_aggregates =
new NativeColumnVector(*result_type_, tuple_accessor->getNumTuples());
- ColumnVectorsValueAccessor* argument_accessor = new ColumnVectorsValueAccessor();
+ ColumnVectorsValueAccessor *argument_accessor = new ColumnVectorsValueAccessor();
argument_accessor->addColumn(arguments[0]);
+ // Initialize the information about the window.
+ TypedValue sum = sum_type_->makeZeroValue();
+ std::uint64_t count = 0;
+ tuple_id start_tuple_id = 0; // The id of the first tuple in the window.
+ tuple_id end_tuple_id = 0; // The id of the tuple that just passed the last
+ // tuple in the window.
+
// Create a window for each tuple and calculate the window aggregate.
tuple_accessor->beginIteration();
argument_accessor->beginIteration();
while (tuple_accessor->next() && argument_accessor->next()) {
- const TypedValue window_aggregate = this->calculateOneWindow(tuple_accessor,
- argument_accessor,
- partition_by_ids,
- is_row,
- num_preceding,
- num_following);
- window_aggregates->appendTypedValue(window_aggregate);
- }
-
- return window_aggregates;
-}
-
-TypedValue WindowAggregationHandleAvg::calculateOneWindow(
- ColumnVectorsValueAccessor *tuple_accessor,
- ColumnVectorsValueAccessor *argument_accessor,
- const std::vector<attribute_id> &partition_by_ids,
- const bool is_row,
- const std::int64_t num_preceding,
- const std::int64_t num_following) const {
- // Initialize.
- TypedValue sum = sum_type_->makeZeroValue();
- TypedValue current_value = argument_accessor->getTypedValue(0);
- std::uint64_t count = 0;
-
- // Ignore the value if null.
- if (!current_value.isNull()) {
- sum = fast_add_operator_->applyToTypedValues(sum, current_value);
- count++;
- }
-
- // Get the partition key for the current row.
- std::vector<TypedValue> current_row_partition_key;
- for (attribute_id partition_by_id : partition_by_ids) {
- current_row_partition_key.push_back(
- tuple_accessor->getTypedValue(partition_by_id));
- }
-
- // Get current position.
- tuple_id current_tuple_id = tuple_accessor->getCurrentPositionVirtual();
-
- // Find preceding tuples.
- int count_preceding = 0;
- tuple_id preceding_tuple_id = current_tuple_id;
- while (num_preceding == -1 || count_preceding < num_preceding) {
- preceding_tuple_id--;
-
- // No more preceding tuples.
- if (preceding_tuple_id < 0) {
- break;
+ tuple_id current_tuple_id = tuple_accessor->getCurrentPosition();
+
+ // If current tuple is not in the same partition as the previous tuple,
+ // reset the window.
+ if (!samePartition(tuple_accessor, current_tuple_id - 1)) {
+ start_tuple_id = current_tuple_id;
+ end_tuple_id = current_tuple_id;
+ count = 0;
+ sum = sum_type_->makeZeroValue();
}
- // Get the partition keys and compare. If not the same partition as the
- // current row, stop searching preceding tuples.
- if (!samePartition(tuple_accessor,
- current_row_partition_key,
- preceding_tuple_id,
- partition_by_ids)) {
- break;
+ // Drop tuples that will be out of the window from the beginning.
+ while (!inWindow(tuple_accessor, start_tuple_id)) {
+ TypedValue start_value =
+ argument_accessor->getTypedValueAtAbsolutePosition(0, start_tuple_id);
+ // Ignore the value if NULL.
+ if (!start_value.isNull()) {
+ sum = fast_subtract_operator_->applyToTypedValues(sum, start_value);
+ count--;
+ }
+
+ start_tuple_id++;
}
- // Actually count the element and do the calculation.
- count_preceding++;
- TypedValue preceding_value =
- argument_accessor->getTypedValueAtAbsolutePosition(0, preceding_tuple_id);
+ // Add tuples that will be included by the window at the end.
+ while (inWindow(tuple_accessor, end_tuple_id)) {
+ TypedValue end_value =
+ argument_accessor->getTypedValueAtAbsolutePosition(0, end_tuple_id);
- // Ignore the value if null.
- if (!preceding_value.isNull()) {
- sum = fast_add_operator_->applyToTypedValues(sum, preceding_value);
- count++;
- }
- }
-
- // Find following tuples.
- int count_following = 0;
- tuple_id following_tuple_id = current_tuple_id;
- while (num_following == -1 || count_following < num_following) {
- following_tuple_id++;
+ // Ignore the value if NULL.
+ if (!end_value.isNull()) {
+ sum = fast_add_operator_->applyToTypedValues(sum, end_value);
+ count++;
+ }
- // No more following tuples.
- if (following_tuple_id == tuple_accessor->getNumTuples()) {
- break;
+ end_tuple_id++;
}
- // Get the partition keys and compare. If not the same partition as the
- // current row, stop searching preceding tuples.
- if (!samePartition(tuple_accessor,
- current_row_partition_key,
- following_tuple_id,
- partition_by_ids)) {
- break;
+ // If all values are NULLs, return NULL; Otherwise, return the quotient.
+ if (count == 0) {
+ window_aggregates->appendTypedValue(result_type_->makeNullValue());
+ } else {
+ window_aggregates->appendTypedValue(
+ divide_operator_->applyToTypedValues(sum, TypedValue(static_cast<double>(count))));
}
-
- // Actually count the element and do the calculation.
- count_following++;
- TypedValue following_value =
- argument_accessor->getTypedValueAtAbsolutePosition(0, following_tuple_id);
-
- // Ignore the value if null.
- if (!following_value.isNull()) {
- sum = fast_add_operator_->applyToTypedValues(sum, following_value);
- count++;
- }
- }
-
- // If all values are NULLs, return NULL; Otherwise, return the quotient.
- if (count == 0) {
- return result_type_->makeNullValue();
- } else {
- return divide_operator_->applyToTypedValues(sum,
- TypedValue(static_cast<double>(count)));
}
-}
-bool WindowAggregationHandleAvg::samePartition(
- const ColumnVectorsValueAccessor *tuple_accessor,
- const std::vector<TypedValue> ¤t_row_partition_key,
- const tuple_id boundary_tuple_id,
- const std::vector<attribute_id> &partition_by_ids) const {
- for (std::size_t partition_by_index = 0;
- partition_by_index < partition_by_ids.size();
- ++partition_by_index) {
- if (!equal_comparators_[partition_by_index]->compareTypedValues(
- current_row_partition_key[partition_by_index],
- tuple_accessor->getTypedValueAtAbsolutePosition(
- partition_by_ids[partition_by_index], boundary_tuple_id))) {
- return false;
- }
- }
-
- return true;
+ return window_aggregates;
}
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/WindowAggregationHandleAvg.hpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/WindowAggregationHandleAvg.hpp b/expressions/window_aggregation/WindowAggregationHandleAvg.hpp
index 5b41779..f7f2e4d 100644
--- a/expressions/window_aggregation/WindowAggregationHandleAvg.hpp
+++ b/expressions/window_aggregation/WindowAggregationHandleAvg.hpp
@@ -31,7 +31,6 @@
#include "types/Type.hpp"
#include "types/TypedValue.hpp"
#include "types/operations/binary_operations/BinaryOperation.hpp"
-#include "types/operations/comparisons/Comparison.hpp"
#include "utility/Macros.hpp"
#include "glog/logging.h"
@@ -54,11 +53,7 @@ class WindowAggregationHandleAvg : public WindowAggregationHandle {
~WindowAggregationHandleAvg() override {}
ColumnVector* calculate(ColumnVectorsValueAccessor* block_accessors,
- std::vector<ColumnVector*> &&arguments,
- const std::vector<attribute_id> &partition_by_ids,
- const bool is_row,
- const std::int64_t num_preceding,
- const std::int64_t num_following) const override;
+ const std::vector<ColumnVector*> &arguments) const override;
private:
friend class WindowAggregateFunctionAvg;
@@ -66,29 +61,25 @@ class WindowAggregationHandleAvg : public WindowAggregationHandle {
/**
* @brief Constructor.
*
- * @param partition_key_types The Types of the partition key.
- * @param type Type of the avg value.
+ * @param partition_by_attributes A list of attributes used as partition key.
+ * @param order_by_attributes A list of attributes used as order key.
+ * @param is_row True if the frame mode is ROWS, false if RANGE.
+ * @param num_preceding The number of rows/range that precedes the current row.
+ * @param num_following The number of rows/range that follows the current row.
+ * @param argument_type Type of the argument.
**/
- WindowAggregationHandleAvg(const std::vector<const Type*> &partition_key_types,
- const Type &type);
-
- TypedValue calculateOneWindow(
- ColumnVectorsValueAccessor *tuple_accessor,
- ColumnVectorsValueAccessor *argument_accessor,
- const std::vector<attribute_id> &partition_by_ids,
+ WindowAggregationHandleAvg(
+ const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
const bool is_row,
const std::int64_t num_preceding,
- const std::int64_t num_following) const;
-
- bool samePartition(const ColumnVectorsValueAccessor *tuple_accessor,
- const std::vector<TypedValue> ¤t_row_partition_key,
- const tuple_id boundary_tuple_id,
- const std::vector<attribute_id> &partition_by_ids) const;
+ const std::int64_t num_following,
+ const Type *argument_type);
- const Type &argument_type_;
const Type *sum_type_;
const Type *result_type_;
std::unique_ptr<UncheckedBinaryOperator> fast_add_operator_;
+ std::unique_ptr<UncheckedBinaryOperator> fast_subtract_operator_;
std::unique_ptr<UncheckedBinaryOperator> divide_operator_;
DISALLOW_COPY_AND_ASSIGN(WindowAggregationHandleAvg);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/expressions/window_aggregation/tests/WindowAggregationHandleAvg_unittest.cpp
----------------------------------------------------------------------
diff --git a/expressions/window_aggregation/tests/WindowAggregationHandleAvg_unittest.cpp b/expressions/window_aggregation/tests/WindowAggregationHandleAvg_unittest.cpp
index c044a98..cb58083 100644
--- a/expressions/window_aggregation/tests/WindowAggregationHandleAvg_unittest.cpp
+++ b/expressions/window_aggregation/tests/WindowAggregationHandleAvg_unittest.cpp
@@ -23,11 +23,13 @@
#include <memory>
#include <vector>
+#include "catalog/CatalogAttribute.hpp"
#include "catalog/CatalogTypedefs.hpp"
+#include "expressions/scalar/Scalar.hpp"
+#include "expressions/scalar/ScalarAttribute.hpp"
#include "expressions/window_aggregation/WindowAggregateFunction.hpp"
#include "expressions/window_aggregation/WindowAggregateFunctionFactory.hpp"
#include "expressions/window_aggregation/WindowAggregationHandle.hpp"
-#include "expressions/window_aggregation/WindowAggregationHandleAvg.hpp"
#include "expressions/window_aggregation/WindowAggregationID.hpp"
#include "storage/ValueAccessor.hpp"
#include "types/CharType.hpp"
@@ -58,6 +60,9 @@ namespace {
constexpr int kNullInterval = 25;
constexpr int kNumPreceding = 2;
constexpr int kNumFollowing = 2;
+ constexpr int kPartitionKeyIndex = 0;
+ constexpr int kOrderKeyIndex = 1;
+ constexpr int kNumTuplesPerOrderKey = 2;
} // namespace
@@ -65,12 +70,27 @@ namespace {
class WindowAggregationHandleAvgTest : public::testing::Test {
protected:
// Handle initialization.
- void initializeHandle(const Type &argument_type) {
+ WindowAggregationHandle* initializeHandle(const Type &argument_type,
+ const bool is_row = true,
+ const std::int64_t num_preceding = -1,
+ const std::int64_t num_following = 0) {
const WindowAggregateFunction &function =
WindowAggregateFunctionFactory::Get(WindowAggregationID::kAvg);
+ const Type &int_type = TypeFactory::GetType(kInt, false);
+ std::vector<std::unique_ptr<const Scalar>> partition_by_attributes;
+ std::vector<std::unique_ptr<const Scalar>> order_by_attributes;
+ partition_by_attributes.emplace_back(
+ new ScalarAttribute(CatalogAttribute(nullptr, "partition_key", int_type, kPartitionKeyIndex)));
+ order_by_attributes.emplace_back(
+ new ScalarAttribute(CatalogAttribute(nullptr, "order_key", int_type, kOrderKeyIndex)));
std::vector<const Type*> partition_key_types(1, &TypeFactory::GetType(kInt, false));
- handle_avg_.reset(function.createHandle(std::vector<const Type*>(1, &argument_type),
- std::move(partition_key_types)));
+
+ return function.createHandle(std::vector<const Type*>(1, &argument_type),
+ partition_by_attributes,
+ order_by_attributes,
+ is_row,
+ num_preceding,
+ num_following);
}
// Test canApplyToTypes().
@@ -117,24 +137,25 @@ class WindowAggregationHandleAvgTest : public::testing::Test {
template <typename GenericType, typename OutputType = DoubleType>
void checkWindowAggregationAvgGeneric() {
- const GenericType &type = GenericType::Instance(true);
- initializeHandle(type);
-
// Create argument, partition key and cpptype vectors.
std::vector<typename GenericType::cpptype*> argument_cpp_vector;
argument_cpp_vector.reserve(kNumTuples);
ColumnVector *argument_type_vector =
createArgumentGeneric<GenericType>(&argument_cpp_vector);
NativeColumnVector *partition_key_vector =
- new NativeColumnVector(IntType::InstanceNonNullable(), kNumTuples + 2);
+ new NativeColumnVector(IntType::InstanceNonNullable(), kNumTuples);
+ NativeColumnVector *order_key_vector =
+ new NativeColumnVector(IntType::InstanceNonNullable(), kNumTuples);
for (int i = 0; i < kNumTuples; ++i) {
partition_key_vector->appendTypedValue(TypedValue(i / kNumTuplesPerPartition));
+ order_key_vector->appendTypedValue(TypedValue(i / kNumTuplesPerOrderKey));
}
// Create tuple ValueAccessor.
ColumnVectorsValueAccessor *tuple_accessor = new ColumnVectorsValueAccessor();
tuple_accessor->addColumn(partition_key_vector);
+ tuple_accessor->addColumn(order_key_vector);
tuple_accessor->addColumn(argument_type_vector);
// Test UNBOUNDED PRECEDING AND CURRENT ROW.
@@ -182,45 +203,95 @@ class WindowAggregationHandleAvgTest : public::testing::Test {
const std::vector<typename GenericType::cpptype*> &argument_cpp_vector) {
std::vector<ColumnVector*> arguments;
arguments.push_back(argument_type_vector);
- // The partition key index is 0.
- std::vector<attribute_id> partition_key(1, 0);
- ColumnVector *result =
- handle_avg_->calculate(tuple_accessor,
- std::move(arguments),
- partition_key,
- true /* is_row */,
- -1 /* num_preceding: UNBOUNDED PRECEDING */,
- 0 /* num_following: CURRENT ROW */);
+ // Check ROWS mode.
+ WindowAggregationHandle *rows_handle =
+ initializeHandle(GenericType::Instance(true),
+ true /* is_row */,
+ -1 /* num_preceding: UNBOUNDED PRECEDING */,
+ 0 /* num_following: CURRENT ROW */);
+ ColumnVector *rows_result =
+ rows_handle->calculate(tuple_accessor, arguments);
// Get the cpptype result.
- std::vector<typename OutputType::cpptype*> result_cpp_vector;
- typename GenericType::cpptype sum;
- int count;
+ std::vector<typename OutputType::cpptype*> rows_result_cpp_vector;
+ typename GenericType::cpptype rows_sum;
+ int rows_count;
for (std::size_t i = 0; i < argument_cpp_vector.size(); ++i) {
// Start of new partition
if (i % kNumTuplesPerPartition == 0) {
- SetDataType(0, &sum);
- count = 0;
+ SetDataType(0, &rows_sum);
+ rows_count = 0;
}
typename GenericType::cpptype *value = argument_cpp_vector[i];
if (value != nullptr) {
- sum += *value;
- count++;
+ rows_sum += *value;
+ rows_count++;
}
- if (count == 0) {
- result_cpp_vector.push_back(nullptr);
+ if (rows_count == 0) {
+ rows_result_cpp_vector.push_back(nullptr);
} else {
typename OutputType::cpptype *result_cpp_value =
new typename OutputType::cpptype;
- *result_cpp_value = static_cast<typename OutputType::cpptype>(sum) / count;
- result_cpp_vector.push_back(result_cpp_value);
+ *result_cpp_value = static_cast<typename OutputType::cpptype>(rows_sum) / rows_count;
+ rows_result_cpp_vector.push_back(result_cpp_value);
+ }
+ }
+
+ CheckAvgValues(rows_result_cpp_vector, rows_result);
+
+ // Check RANGE mode.
+ WindowAggregationHandle *range_handle =
+ initializeHandle(GenericType::Instance(true),
+ false /* is_row */,
+ -1 /* num_preceding: UNBOUNDED PRECEDING */,
+ 0 /* num_following: CURRENT ROW */);
+ ColumnVector *range_result =
+ range_handle->calculate(tuple_accessor, arguments);
+
+ // Get the cpptype result.
+ std::vector<typename OutputType::cpptype*> range_result_cpp_vector;
+ typename GenericType::cpptype range_sum;
+ int range_count;
+ std::size_t current_tuple = 0;
+ while (current_tuple < kNumTuples) {
+ // Start of new partition
+ if (current_tuple % kNumTuplesPerPartition == 0) {
+ SetDataType(0, &range_sum);
+ range_count = 0;
+ }
+
+ // We have to consider following tuples with the same order key value.
+ std::size_t next_tuple = current_tuple;
+ while (next_tuple < kNumTuples &&
+ next_tuple / kNumTuplesPerPartition == current_tuple / kNumTuplesPerPartition &&
+ next_tuple / kNumTuplesPerOrderKey == current_tuple / kNumTuplesPerOrderKey) {
+ typename GenericType::cpptype *value = argument_cpp_vector[next_tuple];
+ if (value != nullptr) {
+ range_sum += *value;
+ range_count++;
+ }
+
+ next_tuple++;
+ }
+
+ // Calculate the result cpp value.
+ typename OutputType::cpptype *result_cpp_value = nullptr;
+ if (range_count != 0) {
+ result_cpp_value = new typename OutputType::cpptype;
+ *result_cpp_value = static_cast<typename OutputType::cpptype>(range_sum) / range_count;
+ }
+
+ // Add the result values to the tuples with in the same order key value.
+ while (current_tuple != next_tuple) {
+ range_result_cpp_vector.push_back(result_cpp_value);
+ current_tuple++;
}
}
- CheckAvgValues(result_cpp_vector, result);
+ CheckAvgValues(range_result_cpp_vector, range_result);
}
template <typename GenericType, typename OutputType>
@@ -229,20 +300,19 @@ class WindowAggregationHandleAvgTest : public::testing::Test {
const std::vector<typename GenericType::cpptype*> &argument_cpp_vector) {
std::vector<ColumnVector*> arguments;
arguments.push_back(argument_type_vector);
- // The partition key index is 0.
- std::vector<attribute_id> partition_key(1, 0);
- ColumnVector *result =
- handle_avg_->calculate(tuple_accessor,
- std::move(arguments),
- partition_key,
- true /* is_row */,
- kNumPreceding,
- kNumFollowing);
+ // Check ROWS mode.
+ WindowAggregationHandle *rows_handle =
+ initializeHandle(GenericType::Instance(true),
+ true /* is_row */,
+ kNumPreceding,
+ kNumFollowing);
+ ColumnVector *rows_result =
+ rows_handle->calculate(tuple_accessor, arguments);
// Get the cpptype result.
// For each value, calculate all surrounding values in the window.
- std::vector<typename OutputType::cpptype*> result_cpp_vector;
+ std::vector<typename OutputType::cpptype*> rows_result_cpp_vector;
for (std::size_t i = 0; i < argument_cpp_vector.size(); ++i) {
typename GenericType::cpptype sum;
@@ -281,19 +351,81 @@ class WindowAggregationHandleAvgTest : public::testing::Test {
}
if (count == 0) {
- result_cpp_vector.push_back(nullptr);
+ rows_result_cpp_vector.push_back(nullptr);
} else {
typename OutputType::cpptype *result_cpp_value =
new typename OutputType::cpptype;
*result_cpp_value = static_cast<typename OutputType::cpptype>(sum) / count;
- result_cpp_vector.push_back(result_cpp_value);
+ rows_result_cpp_vector.push_back(result_cpp_value);
}
}
- CheckAvgValues(result_cpp_vector, result);
- }
+ CheckAvgValues(rows_result_cpp_vector, rows_result);
+
+ // Check RANGE mode.
+ WindowAggregationHandle *range_handle =
+ initializeHandle(GenericType::Instance(true),
+ false /* is_row */,
+ kNumPreceding,
+ kNumFollowing);
+ ColumnVector *range_result =
+ range_handle->calculate(tuple_accessor, arguments);
+
+ // Get the cpptype result.
+ // For each value, calculate all surrounding values in the window.
+ std::vector<typename OutputType::cpptype*> range_result_cpp_vector;
+
+ for (std::size_t i = 0; i < argument_cpp_vector.size(); ++i) {
+ typename GenericType::cpptype sum;
+ SetDataType(0, &sum);
+ int count = 0;
+
+ if (argument_cpp_vector[i] != nullptr) {
+ sum += *argument_cpp_vector[i];
+ count++;
+ }
+
+ int preceding_bound = i / kNumTuplesPerOrderKey - kNumPreceding;
+ for (std::size_t precede = 1; precede <= kNumTuples; ++precede) {
+ // Not in range or the same partition.
+ if (i / kNumTuplesPerPartition != (i - precede) / kNumTuplesPerPartition ||
+ static_cast<int>((i - precede) / kNumTuplesPerOrderKey) < preceding_bound) {
+ break;
+ }
+
+ if (argument_cpp_vector[i - precede] != nullptr) {
+ sum += *argument_cpp_vector[i - precede];
+ count++;
+ }
+ }
+
+ int following_bound = i / kNumTuplesPerOrderKey + kNumFollowing;
+ for (int follow = 1; follow <= kNumTuples; ++follow) {
+ // Not in range or the same partition.
+ if (i + follow >= kNumTuples ||
+ i / kNumTuplesPerPartition != (i + follow) / kNumTuplesPerPartition ||
+ static_cast<int>((i + follow) / kNumTuplesPerOrderKey) > following_bound) {
+ break;
+ }
+
+ if (argument_cpp_vector[i + follow] != nullptr) {
+ sum += *argument_cpp_vector[i + follow];
+ count++;
+ }
+ }
+
+ if (count == 0) {
+ rows_result_cpp_vector.push_back(nullptr);
+ } else {
+ typename OutputType::cpptype *result_cpp_value =
+ new typename OutputType::cpptype;
+ *result_cpp_value = static_cast<typename OutputType::cpptype>(sum) / count;
+ range_result_cpp_vector.push_back(result_cpp_value);
+ }
+ }
- std::unique_ptr<WindowAggregationHandle> handle_avg_;
+ CheckAvgValues(range_result_cpp_vector, range_result);
+ }
};
template <>
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index ce21ade..88103df 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -1663,7 +1663,7 @@ void ExecutionGenerator::convertWindowAggregate(
std::static_pointer_cast<const E::WindowAggregateFunction>(
named_window_aggregate_expression->expression());
- // Set the AggregateFunction.
+ // Set the WindowAggregateFunction.
window_aggr_state_proto->mutable_function()->MergeFrom(
window_aggregate_function->window_aggregate().getProto());
@@ -1683,6 +1683,15 @@ void ExecutionGenerator::convertWindowAggregate(
->MergeFrom(concretized_partition_by_attribute->getProto());
}
+ // Set order keys.
+ for (const E::ScalarPtr &order_by_attribute
+ : window_info.order_by_attributes) {
+ unique_ptr<const Scalar> concretized_order_by_attribute(
+ order_by_attribute->concretize(attribute_substitution_map_));
+ window_aggr_state_proto->add_order_by_attributes()
+ ->MergeFrom(concretized_order_by_attribute->getProto());
+ }
+
// Set window frame info.
if (window_info.frame_info == nullptr) {
// If the frame is not specified, use the default setting:
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/query_optimizer/resolver/Resolver.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/resolver/Resolver.cpp b/query_optimizer/resolver/Resolver.cpp
index c224388..46808bf 100644
--- a/query_optimizer/resolver/Resolver.cpp
+++ b/query_optimizer/resolver/Resolver.cpp
@@ -814,9 +814,9 @@ L::LogicalPtr Resolver::resolveInsertSelection(
cast_expressions.emplace_back(selection_attributes[aid]);
} else {
// TODO(jianqiao): implement Cast operation for non-numeric types.
- if (destination_type.getSuperTypeID() == Type::kNumeric
- && selection_type.getSuperTypeID() == Type::kNumeric
- && destination_type.isSafelyCoercibleFrom(selection_type)) {
+ if (destination_type.getSuperTypeID() == Type::SuperTypeID::kNumeric &&
+ selection_type.getSuperTypeID() == Type::SuperTypeID::kNumeric &&
+ destination_type.isSafelyCoercibleFrom(selection_type)) {
// Add cast operation
const E::AttributeReferencePtr attr = selection_attributes[aid];
const E::ExpressionPtr cast_expr =
@@ -1691,6 +1691,19 @@ E::WindowInfo Resolver::resolveWindow(const ParseWindow &parse_window,
// Resolve window frame
if (parse_window.frame_info() != nullptr) {
const quickstep::ParseFrameInfo *parse_frame_info = parse_window.frame_info();
+ // For FRAME mode, the first attribute in ORDER BY must be numeric.
+ // TODO(Shixuan): Time-related types should also be supported. To handle
+ // this, some changes in the parser needs to be done since the time range
+ // should be specified with time units. Also, UNBOUNDED flags might be
+ // needed because -1 might not make sense in this case.
+ if (!parse_frame_info->is_row &&
+ (order_by_attributes.empty() ||
+ order_by_attributes[0]->getValueType().getSuperTypeID() != Type::SuperTypeID::kNumeric)) {
+ THROW_SQL_ERROR_AT(&parse_window)
+ << "A numeric attribute should be specified as the first ORDER BY "
+ << "attribute in FRAME mode";
+ }
+
frame_info = new E::WindowFrameInfo(parse_frame_info->is_row,
parse_frame_info->num_preceding,
parse_frame_info->num_following);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/query_optimizer/tests/execution_generator/Select.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/execution_generator/Select.test b/query_optimizer/tests/execution_generator/Select.test
index 30a3c39..6bada6c 100644
--- a/query_optimizer/tests/execution_generator/Select.test
+++ b/query_optimizer/tests/execution_generator/Select.test
@@ -1025,17 +1025,40 @@ WINDOW w AS
+--------------------+-----------+------------------------+
==
-# Currently this is not supported, an empty table will be returned.
-SELECT int_col, sum(float_col) OVER
-(PARTITION BY char_col, long_col
- ORDER BY double_col DESC NULLS LAST, int_col ASC NULLS FIRST
- RANGE BETWEEN 3 PRECEDING AND 3 FOLLOWING)
+SELECT float_col, double_col, avg(double_col) OVER
+(ORDER BY float_col DESC NULLS LAST, int_col ASC NULLS FIRST
+ RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING)
FROM test;
--
-+-----------+------------------------+
-|int_col |sum(float_col) |
-+-----------+------------------------+
-+-----------+------------------------+
++---------------+------------------------+------------------------+
+|float_col |double_col |avg(double_col) |
++---------------+------------------------+------------------------+
+| 4.89897966| 117.57550765359254| -5.2010907233390986|
+| 4.79583168| -110.30412503619254| -3.3458568752518572|
+| 4.69041586| 103.18914671611546| -3.3458568752518572|
+| 4.5825758| -96.234089594072643| -4.2942570191393745|
+| 4.47213602| NULL| -4.2942570191393745|
+| 4.35889912| -82.81907992727281| -3.1771278735018194|
+| 4.2426405| 76.367532368147124| -3.1771278735018194|
+| 4.12310553| -70.092795635500224| -3.6217507631683268|
+| 4| 64| -3.0100796703699935|
+| 3.87298346| -58.094750193111253| -3.0100796703699935|
+| 3.7416575| 52.38320341483518| -3.0100796703699935|
+| 3.60555124| -46.872166581031856| -3.1193833079868254|
+| 3.46410155| 41.569219381653056| -3.1193833079868254|
+| 3.31662488| -36.4828726939094| -2.8361542397614437|
+| 3.1622777| NULL| -2.8361542397614437|
+| 3| -27| -2.7526926834086507|
+| 2.82842708| 22.627416997969522| -8.4826069851706123|
+| 2.64575124| -18.520259177452136| -9.0010404404476727|
+| 2.44948983| 14.696938456699067| -4.1547599319129516|
+| 2.23606801| -11.180339887498949| -4.2708832009567148|
+| 2| 8| 0.11724429467951912|
+| 1.73205078| -5.196152422706632| -4.7108157334609286|
+| 1.41421354| 2.8284271247461903| -5.1226841602152344|
+| 1| -1| -1.638218767582549|
+| 0| NULL| 1.1580686755098886|
++---------------+------------------------+------------------------+
==
SELECT sum(avg(int_col) OVER w) FROM test
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/storage/WindowAggregationOperationState.cpp
----------------------------------------------------------------------
diff --git a/storage/WindowAggregationOperationState.cpp b/storage/WindowAggregationOperationState.cpp
index 0cdfc1a..49fa44d 100644
--- a/storage/WindowAggregationOperationState.cpp
+++ b/storage/WindowAggregationOperationState.cpp
@@ -56,15 +56,13 @@ WindowAggregationOperationState::WindowAggregationOperationState(
const WindowAggregateFunction *window_aggregate_function,
std::vector<std::unique_ptr<const Scalar>> &&arguments,
const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
const bool is_row,
const std::int64_t num_preceding,
const std::int64_t num_following,
StorageManager *storage_manager)
: input_relation_(input_relation),
arguments_(std::move(arguments)),
- is_row_(is_row),
- num_preceding_(num_preceding),
- num_following_(num_following),
storage_manager_(storage_manager) {
// Get the Types of this window aggregate's arguments so that we can create an
// AggregationHandle.
@@ -76,18 +74,14 @@ WindowAggregationOperationState::WindowAggregationOperationState(
// Check if window aggregate function could apply to the arguments.
DCHECK(window_aggregate_function->canApplyToTypes(argument_types));
- // IDs and types of partition keys.
- std::vector<const Type*> partition_by_types;
- for (const std::unique_ptr<const Scalar> &partition_by_attribute : partition_by_attributes) {
- partition_by_ids_.push_back(
- partition_by_attribute->getAttributeIdForValueAccessor());
- partition_by_types.push_back(&partition_by_attribute->getType());
- }
-
// Create the handle and initial state.
window_aggregation_handle_.reset(
- window_aggregate_function->createHandle(std::move(argument_types),
- std::move(partition_by_types)));
+ window_aggregate_function->createHandle(argument_types,
+ partition_by_attributes,
+ order_by_attributes,
+ is_row,
+ num_preceding,
+ num_following));
}
WindowAggregationOperationState* WindowAggregationOperationState::ReconstructFromProto(
@@ -113,6 +107,15 @@ WindowAggregationOperationState* WindowAggregationOperationState::ReconstructFro
database));
}
+ std::vector<std::unique_ptr<const Scalar>> order_by_attributes;
+ for (int attribute_idx = 0;
+ attribute_idx < proto.order_by_attributes_size();
+ ++attribute_idx) {
+ order_by_attributes.emplace_back(ScalarFactory::ReconstructFromProto(
+ proto.order_by_attributes(attribute_idx),
+ database));
+ }
+
const bool is_row = proto.is_row();
const std::int64_t num_preceding = proto.num_preceding();
const std::int64_t num_following = proto.num_following();
@@ -121,6 +124,7 @@ WindowAggregationOperationState* WindowAggregationOperationState::ReconstructFro
&WindowAggregateFunctionFactory::ReconstructFromProto(proto.function()),
std::move(arguments),
partition_by_attributes,
+ order_by_attributes,
is_row,
num_preceding,
num_following,
@@ -160,6 +164,15 @@ bool WindowAggregationOperationState::ProtoIsValid(const serialization::WindowAg
}
}
+ for (int attribute_idx = 0;
+ attribute_idx < proto.order_by_attributes_size();
+ ++attribute_idx) {
+ if (!ScalarFactory::ProtoIsValid(proto.order_by_attributes(attribute_idx),
+ database)) {
+ return false;
+ }
+ }
+
if (proto.num_preceding() < -1 || proto.num_following() < -1) {
return false;
}
@@ -177,14 +190,6 @@ void WindowAggregationOperationState::windowAggregateBlocks(
return;
}
- // TODO(Shixuan): RANGE frame mode should be supported to make SQL grammar
- // work. This will need Order Key to be passed so that we know where the
- // window should start and end.
- if (!is_row_) {
- std::cout << "Currently we don't support RANGE frame mode :(\n";
- return;
- }
-
// Get the total number of tuples.
int num_tuples = 0;
for (const block_id block_idx : block_ids) {
@@ -226,7 +231,11 @@ void WindowAggregationOperationState::windowAggregateBlocks(
block->getIndices(),
block->getIndicesConsistent());
ValueAccessor *tuple_accessor = tuple_block.createValueAccessor();
- ColumnVectorsValueAccessor *argument_accessor = new ColumnVectorsValueAccessor();
+ ColumnVectorsValueAccessor *argument_accessor = nullptr;
+ if (!arguments_.empty()) {
+ argument_accessor = new ColumnVectorsValueAccessor();
+ }
+
for (const std::unique_ptr<const Scalar> &argument : arguments_) {
argument_accessor->addColumn(argument->getAllValues(tuple_accessor,
&sub_block_ref));
@@ -235,9 +244,15 @@ void WindowAggregationOperationState::windowAggregateBlocks(
InvokeOnAnyValueAccessor(tuple_accessor,
[&] (auto *tuple_accessor) -> void { // NOLINT(build/c++11)
tuple_accessor->beginIteration();
- argument_accessor->beginIteration();
+ if (argument_accessor != nullptr) {
+ argument_accessor->beginIteration();
+ }
+
+ while (tuple_accessor->next()) {
+ if (argument_accessor != nullptr) {
+ argument_accessor->next();
+ }
- while (tuple_accessor->next() && argument_accessor->next()) {
for (std::size_t attr_id = 0; attr_id < attribute_vecs.size(); ++attr_id) {
ColumnVector *attr_vec = attribute_vecs[attr_id];
if (attr_vec->isNative()) {
@@ -275,11 +290,7 @@ void WindowAggregationOperationState::windowAggregateBlocks(
// Do actual calculation in handle.
ColumnVector *window_aggregates =
window_aggregation_handle_->calculate(all_blocks_accessor,
- std::move(argument_vecs),
- partition_by_ids_,
- is_row_,
- num_preceding_,
- num_following_);
+ argument_vecs);
all_blocks_accessor->addColumn(window_aggregates);
output_destination->bulkInsertTuples(all_blocks_accessor);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/storage/WindowAggregationOperationState.hpp
----------------------------------------------------------------------
diff --git a/storage/WindowAggregationOperationState.hpp b/storage/WindowAggregationOperationState.hpp
index 9792a99..726b102 100644
--- a/storage/WindowAggregationOperationState.hpp
+++ b/storage/WindowAggregationOperationState.hpp
@@ -57,6 +57,7 @@ class WindowAggregationOperationState {
* computed.
* @param arguments A list of argument expressions to that aggregate.
* @param partition_by_attributes A list of window partition key.
+ * @param order_by_attributes A list of window order key.
* @param is_row True if the window frame is calculated by ROW, false if it is
* calculated by RANGE.
* @param num_preceding The number of rows/range for the tuples preceding the
@@ -69,6 +70,7 @@ class WindowAggregationOperationState {
const WindowAggregateFunction *window_aggregate_function,
std::vector<std::unique_ptr<const Scalar>> &&arguments,
const std::vector<std::unique_ptr<const Scalar>> &partition_by_attributes,
+ const std::vector<std::unique_ptr<const Scalar>> &order_by_attributes,
const bool is_row,
const std::int64_t num_preceding,
const std::int64_t num_following,
@@ -120,13 +122,6 @@ class WindowAggregationOperationState {
const std::vector<block_id> block_ids_;
std::unique_ptr<WindowAggregationHandle> window_aggregation_handle_;
std::vector<std::unique_ptr<const Scalar>> arguments_;
- std::vector<attribute_id> partition_by_ids_;
-
- // Frame info.
- const bool is_row_;
- const std::int64_t num_preceding_;
- const std::int64_t num_following_;
-
StorageManager *storage_manager_;
DISALLOW_COPY_AND_ASSIGN(WindowAggregationOperationState);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d0172fde/storage/WindowAggregationOperationState.proto
----------------------------------------------------------------------
diff --git a/storage/WindowAggregationOperationState.proto b/storage/WindowAggregationOperationState.proto
index d888461..f879713 100644
--- a/storage/WindowAggregationOperationState.proto
+++ b/storage/WindowAggregationOperationState.proto
@@ -30,4 +30,5 @@ message WindowAggregationOperationState {
required bool is_row = 5;
required int64 num_preceding = 6; // -1 means UNBOUNDED PRECEDING.
required int64 num_following = 7; // -1 means UNBOUNDED FOLLOWING.
+ repeated Scalar order_by_attributes = 8;
}
[03/13] incubator-quickstep git commit: Update the NOTICE file to
acknowledge all the copyrights
Posted by ji...@apache.org.
Update the NOTICE file to acknowledge all the copyrights
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/260b8624
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/260b8624
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/260b8624
Branch: refs/heads/LIP-for-tpch
Commit: 260b8624a8a1f2161e8c38fe56bd00ae1bfb579b
Parents: d0172fd
Author: Jignesh Patel <jm...@hotmail.com>
Authored: Mon Aug 1 09:56:44 2016 -0500
Committer: Jignesh Patel <jm...@hotmail.com>
Committed: Mon Aug 1 09:56:44 2016 -0500
----------------------------------------------------------------------
NOTICE | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/260b8624/NOTICE
----------------------------------------------------------------------
diff --git a/NOTICE b/NOTICE
index 9cfd585..32db8b8 100644
--- a/NOTICE
+++ b/NOTICE
@@ -6,3 +6,39 @@ The Apache Software Foundation (http://www.apache.org/).
Portions Copyright (c) 2011-2015, Quickstep Technologies, LLC.
Portions Copyright (c) 2015-2016, Pivotal Software, Inc.
+
+[Copyright for third_party/benchmark]
+Portions Copyright (c) Arne Beer <ar...@twobeer.de>
+Portions Copyright (c) Christopher Seymour <ch...@hotmail.com>
+Portions Copyright (c) David Coeurjolly <da...@liris.cnrs.fr>
+Portions Copyright (c) Dominic Hamon <dm...@stripysock.com>
+Portions Copyright (c) Eugene Zhuk <eu...@gmail.com>
+Portions Copyright (c) Evgeny Safronov <di...@gmail.com>
+Portions Copyright (c) Felix Homann <li...@showlabor.de>
+Portions Copyright (c) Google Inc.
+Portions Copyright (c) JianXiong Zhou <zh...@gmail.com>
+Portions Copyright (c) Lei Xu <ed...@gmail.com>
+Portions Copyright (c) Matt Clarkson <ma...@gmail.com>
+Portions Copyright (c) Oleksandr Sochka <sa...@gmail.com>
+Portions Copyright (c) Paul Redmond <pa...@gmail.com>
+Portions Copyright (c) Shuo Chen <ch...@chenshuo.com>
+Portions Copyright (c) Yusuke Suzuki <ut...@gmail.com>
+
+[Copyright for third_party/cpplint]
+Portions Copyright (c) 2009 Google Inc
+
+[Copyright for third_party/farmhash]
+Copyright (c) 2014 Google, Inc.
+
+[Copyright for third_party/gflags]
+Copyright (c) 2006, Google Inc.
+
+[Copyright for third_party/glog]
+Copyright (c) 2008, Google Inc.
+
+[Copyright for third_party/gpertools]
+Copyright (c) 2005, Google Inc.
+
+[Copyright for third_party/linenoise]
+Copyright (c) 2010-2014, Salvatore Sanfilippo <antirez at gmail dot com>
+Copyright (c) 2010-2013, Pieter Noordhuis <pcnoordhuis at gmail dot com>
[12/13] incubator-quickstep git commit: Initial commit
Posted by ji...@apache.org.
Initial commit
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/43ed533b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/43ed533b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/43ed533b
Branch: refs/heads/LIP-for-tpch
Commit: 43ed533bd1ea7263cc541c5d9a2e96151181aafb
Parents: 1b07eaa
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Sat Jun 11 23:14:00 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Wed Aug 3 19:39:14 2016 -0500
----------------------------------------------------------------------
CMakeLists.txt | 1 +
catalog/CMakeLists.txt | 9 +
catalog/Catalog.proto | 5 +
catalog/CatalogRelationConstraints.cpp | 55 ++
catalog/CatalogRelationConstraints.hpp | 110 ++++
catalog/CatalogRelationSchema.cpp | 15 +
catalog/CatalogRelationSchema.hpp | 16 +-
cli/CommandExecutor.cpp | 25 +-
cli/QuickstepCli.cpp | 65 +++
compression/CompressionDictionaryLite.hpp | 42 ++
query_execution/CMakeLists.txt | 1 +
query_execution/QueryContext.cpp | 11 +-
query_execution/Worker.cpp | 5 +
query_optimizer/CMakeLists.txt | 2 +
query_optimizer/ExecutionGenerator.cpp | 74 +--
query_optimizer/ExecutionGenerator.hpp | 2 +-
query_optimizer/ExecutionHeuristics.cpp | 171 ++++---
query_optimizer/ExecutionHeuristics.hpp | 79 ++-
query_optimizer/PhysicalGenerator.cpp | 7 +-
query_optimizer/cost_model/SimpleCostModel.cpp | 4 +-
.../cost_model/StarSchemaSimpleCostModel.cpp | 42 +-
query_optimizer/expressions/ExpressionUtil.hpp | 8 +-
query_optimizer/physical/Aggregate.cpp | 5 +
query_optimizer/physical/Aggregate.hpp | 23 +-
query_optimizer/physical/HashJoin.cpp | 27 +
query_optimizer/physical/HashJoin.hpp | 23 +-
query_optimizer/physical/Physical.hpp | 55 ++
query_optimizer/physical/Selection.cpp | 6 +
query_optimizer/physical/Selection.hpp | 3 +
query_optimizer/physical/TableReference.cpp | 18 +
query_optimizer/physical/TableReference.hpp | 3 +
query_optimizer/rules/AttachBloomFilters.cpp | 308 ++++++++++++
query_optimizer/rules/AttachBloomFilters.hpp | 118 +++++
query_optimizer/rules/CMakeLists.txt | 17 +
.../StarSchemaHashJoinOrderOptimization.cpp | 277 ++++++----
.../StarSchemaHashJoinOrderOptimization.hpp | 100 ++--
.../tests/ExecutionHeuristics_unittest.cpp | 3 +-
relational_operators/HashJoinOperator.cpp | 10 +
relational_operators/HashJoinOperator.hpp | 25 +-
relational_operators/WorkOrder.hpp | 11 +-
storage/AggregationOperationState.cpp | 98 +++-
storage/AggregationOperationState.hpp | 10 +-
storage/AggregationOperationState.proto | 6 +
storage/BasicColumnStoreValueAccessor.hpp | 26 +-
storage/BloomFilterIndexSubBlock.cpp | 4 +-
storage/BloomFilterIndexSubBlock.hpp | 6 -
storage/CMakeLists.txt | 2 +
storage/CompressedColumnStoreValueAccessor.hpp | 22 +
.../CompressedPackedRowStoreValueAccessor.hpp | 22 +
storage/HashTable.hpp | 185 ++++---
storage/HashTable.proto | 10 +-
storage/HashTableFactory.hpp | 23 +-
storage/PackedRowStoreValueAccessor.hpp | 25 +-
storage/SplitRowStoreValueAccessor.hpp | 45 ++
storage/StorageBlock.cpp | 28 +-
storage/StorageBlock.hpp | 7 +-
storage/ValueAccessor.hpp | 36 ++
types/containers/ColumnVector.hpp | 35 ++
types/containers/ColumnVectorsValueAccessor.hpp | 17 +
utility/BloomFilter.hpp | 502 ++++++++++++++-----
utility/BloomFilter.proto | 6 +-
utility/BloomFilterAdapter.hpp | 142 ++++++
utility/CMakeLists.txt | 13 +
utility/DisjointTreeForest.hpp | 116 +++++
utility/EventProfiler.cpp | 29 ++
utility/EventProfiler.hpp | 188 +++++++
utility/PlanVisualizer.cpp | 42 +-
67 files changed, 2867 insertions(+), 559 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3192713..aff82d5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -770,6 +770,7 @@ target_link_libraries(quickstep_cli_shell
quickstep_queryoptimizer_QueryProcessor
quickstep_storage_PreloaderThread
quickstep_threading_ThreadIDBasedMap
+ quickstep_utility_EventProfiler
quickstep_utility_ExecutionDAGVisualizer
quickstep_utility_Macros
quickstep_utility_PtrVector
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/catalog/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/catalog/CMakeLists.txt b/catalog/CMakeLists.txt
index 64b4f16..0f50706 100644
--- a/catalog/CMakeLists.txt
+++ b/catalog/CMakeLists.txt
@@ -35,6 +35,9 @@ add_library(quickstep_catalog_CatalogDatabaseCache CatalogDatabaseCache.cpp Cata
add_library(quickstep_catalog_CatalogDatabaseLite ../empty_src.cpp CatalogDatabaseLite.hpp)
add_library(quickstep_catalog_CatalogErrors ../empty_src.cpp CatalogErrors.hpp)
add_library(quickstep_catalog_CatalogRelation CatalogRelation.cpp CatalogRelation.hpp)
+add_library(quickstep_catalog_CatalogRelationConstraints
+ CatalogRelationConstraints.cpp
+ CatalogRelationConstraints.hpp)
add_library(quickstep_catalog_CatalogRelationSchema
CatalogRelationSchema.cpp
CatalogRelationSchema.hpp)
@@ -117,6 +120,10 @@ target_link_libraries(quickstep_catalog_CatalogRelation
quickstep_threading_SpinSharedMutex
quickstep_utility_Macros
quickstep_utility_PtrVector)
+target_link_libraries(quickstep_catalog_CatalogRelationConstraints
+ quickstep_catalog_CatalogTypedefs
+ quickstep_catalog_Catalog_proto
+ quickstep_utility_Macros)
target_link_libraries(quickstep_catalog_CatalogRelationStatistics
quickstep_catalog_CatalogTypedefs
quickstep_catalog_Catalog_proto
@@ -136,6 +143,7 @@ target_link_libraries(quickstep_catalog_CatalogRelationSchema
glog
quickstep_catalog_CatalogAttribute
quickstep_catalog_CatalogErrors
+ quickstep_catalog_CatalogRelationConstraints
quickstep_catalog_CatalogTypedefs
quickstep_catalog_Catalog_proto
quickstep_types_Type
@@ -182,6 +190,7 @@ target_link_libraries(quickstep_catalog
quickstep_catalog_CatalogDatabaseLite
quickstep_catalog_CatalogErrors
quickstep_catalog_CatalogRelation
+ quickstep_catalog_CatalogRelationConstraints
quickstep_catalog_CatalogRelationSchema
quickstep_catalog_CatalogRelationStatistics
quickstep_catalog_CatalogTypedefs
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/catalog/Catalog.proto
----------------------------------------------------------------------
diff --git a/catalog/Catalog.proto b/catalog/Catalog.proto
index ce4bc2e..a51172f 100644
--- a/catalog/Catalog.proto
+++ b/catalog/Catalog.proto
@@ -80,6 +80,10 @@ message IndexScheme {
repeated IndexEntry index_entries = 1;
}
+message CatalogRelationConstraints {
+ repeated int32 primary_key = 1;
+}
+
message CatalogRelationStatistics {
optional fixed64 num_tuples = 1;
@@ -96,6 +100,7 @@ message CatalogRelationSchema {
required bool temporary = 3;
repeated CatalogAttribute attributes = 4;
+ optional CatalogRelationConstraints constraints = 5;
extensions 16 to max;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/catalog/CatalogRelationConstraints.cpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelationConstraints.cpp b/catalog/CatalogRelationConstraints.cpp
new file mode 100644
index 0000000..4525a98
--- /dev/null
+++ b/catalog/CatalogRelationConstraints.cpp
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#include "catalog/CatalogRelationConstraints.hpp"
+
+#include "catalog/Catalog.pb.h"
+
+namespace quickstep {
+
+CatalogRelationConstraints::CatalogRelationConstraints(
+ const serialization::CatalogRelationConstraints &proto) {
+ if (proto.primary_key_size() > 0) {
+ primary_key_.reset(new std::set<attribute_id>());
+ for (std::size_t i = 0; i < proto.primary_key_size(); ++i) {
+ primary_key_->emplace(proto.primary_key(i));
+ }
+ }
+}
+
+serialization::CatalogRelationConstraints CatalogRelationConstraints::getProto() const {
+ serialization::CatalogRelationConstraints proto;
+ if (primary_key_ != nullptr) {
+ for (const auto attr_id : *primary_key_) {
+ proto.add_primary_key(attr_id);
+ }
+ }
+ return proto;
+}
+
+bool CatalogRelationConstraints::ProtoIsValid(
+ const serialization::CatalogRelationConstraints &proto,
+ const std::size_t num_attributes) {
+ for (std::size_t i = 0; i < proto.primary_key_size(); ++i) {
+ if (proto.primary_key(i) >= num_attributes) {
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/catalog/CatalogRelationConstraints.hpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelationConstraints.hpp b/catalog/CatalogRelationConstraints.hpp
new file mode 100644
index 0000000..896c072
--- /dev/null
+++ b/catalog/CatalogRelationConstraints.hpp
@@ -0,0 +1,110 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_CATALOG_CATALOG_RELATION_CONSTRAINTS_HPP_
+#define QUICKSTEP_CATALOG_CATALOG_RELATION_CONSTRAINTS_HPP_
+
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <set>
+#include <utility>
+
+#include "catalog/Catalog.pb.h"
+#include "catalog/CatalogTypedefs.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Catalog
+ * @{
+ */
+
+/**
+ * @brief Constraints on a catalog relation.
+ **/
+class CatalogRelationConstraints {
+ public:
+ /**
+ * @brief Constructor.
+ **/
+ CatalogRelationConstraints() {}
+
+ /**
+ * @brief Reconstruct a CatalogRelationConstraints object from its serialized
+ * Protocol Buffer form.
+ *
+ * @param proto The Protocol Buffer serialization of a CatalogRelationConstraints
+ * object, previously produced by getProto().
+ **/
+ explicit CatalogRelationConstraints(const serialization::CatalogRelationConstraints &proto);
+
+ /**
+ * @brief Serialize the CatalogRelationConstraints object as Protocol Buffer.
+ *
+ * @return The Protocol Buffer representation of the CatalogRelationConstraints
+ * object.
+ **/
+ serialization::CatalogRelationConstraints getProto() const;
+
+ static bool ProtoIsValid(const serialization::CatalogRelationConstraints &proto,
+ const std::size_t num_attributes);
+
+ bool hasPrimaryKey() const {
+ return (primary_key_ != nullptr);
+ }
+
+ const std::set<attribute_id>* getPrimaryKey() const {
+ return primary_key_.get();
+ }
+
+ template <typename IterableT>
+ void setPrimaryKey(IterableT &&primary_key) {
+ CHECK(!primary_key.empty());
+ primary_key_.reset(
+ new std::set<attribute_id>(primary_key.begin(), primary_key.end()));
+ }
+
+ void removePrimaryKey() {
+ primary_key_.reset();
+ }
+
+ bool impliesUniqueAttributes(const std::set<attribute_id> &attributes) const {
+ if (primary_key_ == nullptr) {
+ return false;
+ }
+
+ std::vector<attribute_id> attr_intersection;
+ std::set_intersection(primary_key_->begin(), primary_key_->end(),
+ attributes.begin(), attributes.end(),
+ std::back_inserter(attr_intersection));
+ return (attr_intersection.size() == primary_key_->size());
+ }
+
+ private:
+ std::unique_ptr<std::set<attribute_id>> primary_key_;
+
+ DISALLOW_COPY_AND_ASSIGN(CatalogRelationConstraints);
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_CATALOG_CATALOG_RELATION_CONSTRAINTS_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/catalog/CatalogRelationSchema.cpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelationSchema.cpp b/catalog/CatalogRelationSchema.cpp
index 97c834f..bf8217d 100644
--- a/catalog/CatalogRelationSchema.cpp
+++ b/catalog/CatalogRelationSchema.cpp
@@ -27,6 +27,7 @@
#include "catalog/Catalog.pb.h"
#include "catalog/CatalogAttribute.hpp"
#include "catalog/CatalogErrors.hpp"
+#include "catalog/CatalogRelationConstraints.hpp"
#include "catalog/CatalogTypedefs.hpp"
#include "types/Type.hpp"
#include "utility/PtrVector.hpp"
@@ -70,6 +71,12 @@ CatalogRelationSchema::CatalogRelationSchema(const serialization::CatalogRelatio
attr_vec_.push_back(nullptr);
}
}
+
+ if (proto.has_constraints()) {
+ constraints_.reset(new CatalogRelationConstraints(proto.constraints()));
+ } else {
+ constraints_.reset(new CatalogRelationConstraints());
+ }
}
bool CatalogRelationSchema::ProtoIsValid(const serialization::CatalogRelationSchema &proto) {
@@ -84,6 +91,12 @@ bool CatalogRelationSchema::ProtoIsValid(const serialization::CatalogRelationSch
}
}
+ if (proto.has_constraints()
+ && !CatalogRelationConstraints::ProtoIsValid(proto.constraints(),
+ proto.attributes_size())) {
+ return false;
+ }
+
return true;
}
@@ -104,6 +117,8 @@ serialization::CatalogRelationSchema CatalogRelationSchema::getProto() const {
}
}
+ proto.mutable_constraints()->CopyFrom(constraints_->getProto());
+
return proto;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/catalog/CatalogRelationSchema.hpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelationSchema.hpp b/catalog/CatalogRelationSchema.hpp
index d773bc7..0c6c207 100644
--- a/catalog/CatalogRelationSchema.hpp
+++ b/catalog/CatalogRelationSchema.hpp
@@ -21,12 +21,14 @@
#define QUICKSTEP_CATALOG_CATALOG_RELATION_SCHEMA_HPP_
#include <cstddef>
+#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "catalog/Catalog.pb.h"
#include "catalog/CatalogAttribute.hpp"
+#include "catalog/CatalogRelationConstraints.hpp"
#include "catalog/CatalogTypedefs.hpp"
#include "utility/Macros.hpp"
#include "utility/PtrVector.hpp"
@@ -427,6 +429,14 @@ class CatalogRelationSchema {
return max_byte_lengths_;
}
+ const CatalogRelationConstraints& getConstraints() const {
+ return *constraints_;
+ }
+
+ CatalogRelationConstraints* getConstraintsMutable() {
+ return constraints_.get();
+ }
+
protected:
/**
* @brief Create a new relation.
@@ -456,7 +466,8 @@ class CatalogRelationSchema {
min_variable_byte_length_excluding_nullable_(0),
estimated_variable_byte_length_(0),
current_nullable_attribute_index_(-1),
- current_variable_length_attribute_index_(-1) {
+ current_variable_length_attribute_index_(-1),
+ constraints_(new CatalogRelationConstraints()) {
}
/**
@@ -532,6 +543,9 @@ class CatalogRelationSchema {
std::vector<int> variable_length_attribute_indices_;
int current_variable_length_attribute_index_;
+ // Primary key, foreign keys, etc.
+ std::unique_ptr<CatalogRelationConstraints> constraints_;
+
private:
friend class CatalogDatabase;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/cli/CommandExecutor.cpp
----------------------------------------------------------------------
diff --git a/cli/CommandExecutor.cpp b/cli/CommandExecutor.cpp
index 8acfae8..5b302c0 100644
--- a/cli/CommandExecutor.cpp
+++ b/cli/CommandExecutor.cpp
@@ -251,7 +251,8 @@ inline TypedValue executeQueryForSingleResult(
return value;
}
-void executeAnalyze(const tmb::client_id main_thread_client_id,
+void executeAnalyze(const PtrVector<ParseString> *arguments,
+ const tmb::client_id main_thread_client_id,
const tmb::client_id foreman_client_id,
MessageBus *bus,
QueryProcessor *query_processor,
@@ -260,8 +261,19 @@ void executeAnalyze(const tmb::client_id main_thread_client_id,
StorageManager *storage_manager = query_processor->getStorageManager();
std::unique_ptr<SqlParserWrapper> parser_wrapper(new SqlParserWrapper());
- std::vector<std::reference_wrapper<const CatalogRelation>> relations(
- database.begin(), database.end());
+ std::vector<std::reference_wrapper<const CatalogRelation>> relations;
+ if (arguments->size() == 0) {
+ relations.insert(relations.begin(), database.begin(), database.end());
+ } else {
+ for (const auto &rel_name : *arguments) {
+ const CatalogRelation *rel = database.getRelationByName(rel_name.value());
+ if (rel == nullptr) {
+ THROW_SQL_ERROR_AT(&rel_name) << "Table does not exist";
+ } else {
+ relations.emplace_back(*rel);
+ }
+ }
+ }
// Analyze each relation in the database.
for (const CatalogRelation &relation : relations) {
@@ -341,8 +353,11 @@ void executeCommand(const ParseStatement &statement,
executeDescribeTable(arguments, catalog_database, out);
}
} else if (command_str == C::kAnalyzeCommand) {
- executeAnalyze(
- main_thread_client_id, foreman_client_id, bus, query_processor, out);
+ executeAnalyze(arguments,
+ main_thread_client_id,
+ foreman_client_id,
+ bus,
+ query_processor, out);
} else {
THROW_SQL_ERROR_AT(command.command()) << "Invalid Command";
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/cli/QuickstepCli.cpp
----------------------------------------------------------------------
diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp
index 154c689..5811d2c 100644
--- a/cli/QuickstepCli.cpp
+++ b/cli/QuickstepCli.cpp
@@ -52,6 +52,9 @@ typedef quickstep::LineReaderDumb LineReaderImpl;
#include <gperftools/profiler.h>
#endif
+#include "catalog/CatalogDatabase.hpp"
+#include "catalog/CatalogRelation.hpp"
+#include "catalog/CatalogRelationConstraints.hpp"
#include "cli/DefaultsConfigurator.hpp"
#include "cli/InputParserUtil.hpp"
#include "cli/PrintToScreen.hpp"
@@ -75,6 +78,7 @@ typedef quickstep::LineReaderDumb LineReaderImpl;
#include "storage/PreloaderThread.hpp"
#include "threading/ThreadIDBasedMap.hpp"
+#include "utility/EventProfiler.hpp"
#include "utility/ExecutionDAGVisualizer.hpp"
#include "utility/Macros.hpp"
#include "utility/PtrVector.hpp"
@@ -90,6 +94,8 @@ typedef quickstep::LineReaderDumb LineReaderImpl;
#include "tmb/message_bus.h"
#include "tmb/message_style.h"
+#include "google/protobuf/text_format.h"
+
namespace quickstep {
class CatalogRelation;
}
@@ -190,9 +196,48 @@ DEFINE_bool(visualize_execution_dag, false,
"If true, visualize the execution plan DAG into a graph in DOT "
"format (DOT is a plain text graph description language) which is "
"then printed via stderr.");
+DEFINE_string(profile_output, "",
+ "Output file name for writing the profiled events.");
} // namespace quickstep
+void addPrimaryKeyInfoForTPCHTables(quickstep::CatalogDatabase *database) {
+ const std::vector<std::pair<std::string, std::vector<std::string>>> rel_pkeys = {
+ { "region", { "r_regionkey" } },
+ { "nation", { "n_nationkey" } },
+ { "supplier", { "s_suppkey" } },
+ { "customer", { "c_custkey" } },
+ { "part", { "p_partkey" } },
+ { "partsupp", { "ps_partkey", "ps_suppkey" } },
+ { "orders", { "o_orderkey" } }
+ };
+ for (const auto &rel_pair : rel_pkeys) {
+ CatalogRelation *rel = database->getRelationByNameMutable(rel_pair.first);
+ std::vector<quickstep::attribute_id> attrs;
+ for (const auto &pkey : rel_pair.second) {
+ attrs.emplace_back(rel->getAttributeByName(pkey)->getID());
+ }
+ rel->getConstraintsMutable()->setPrimaryKey(attrs);
+ }
+}
+
+void addPrimaryKeyInfoForSSBTables(quickstep::CatalogDatabase *database) {
+ const std::vector<std::pair<std::string, std::vector<std::string>>> rel_pkeys = {
+ { "supplier", { "s_suppkey" } },
+ { "customer", { "c_custkey" } },
+ { "part", { "p_partkey" } },
+ { "ddate", { "d_datekey" } }
+ };
+ for (const auto &rel_pair : rel_pkeys) {
+ CatalogRelation *rel = database->getRelationByNameMutable(rel_pair.first);
+ std::vector<quickstep::attribute_id> attrs;
+ for (const auto &pkey : rel_pair.second) {
+ attrs.emplace_back(rel->getAttributeByName(pkey)->getID());
+ }
+ rel->getConstraintsMutable()->setPrimaryKey(attrs);
+ }
+}
+
int main(int argc, char* argv[]) {
google::InitGoogleLogging(argv[0]);
gflags::ParseCommandLineFlags(&argc, &argv, true);
@@ -300,6 +345,15 @@ int main(int argc, char* argv[]) {
LOG(FATAL) << "NON-STANDARD EXCEPTION DURING STARTUP";
}
+// addPrimaryKeyInfoForTPCHTables(query_processor->getDefaultDatabase());
+// addPrimaryKeyInfoForSSBTables(query_processor->getDefaultDatabase());
+// std::string proto_str;
+// google::protobuf::TextFormat::PrintToString(
+// query_processor->getDefaultDatabase()->getProto(), &proto_str);
+// std::cerr << proto_str << "\n";
+// query_processor->markCatalogAltered();
+// query_processor->saveCatalog();
+
// Parse the CPU affinities for workers and the preloader thread, if enabled
// to warm up the buffer pool.
const vector<int> worker_cpu_affinities =
@@ -445,6 +499,7 @@ int main(int argc, char* argv[]) {
new quickstep::ExecutionDAGVisualizer(*query_handle->getQueryPlanMutable()));
}
+ quickstep::simple_profiler.clear();
start = std::chrono::steady_clock::now();
QueryExecutionUtil::ConstructAndSendAdmitRequestMessage(
main_thread_client_id,
@@ -457,6 +512,11 @@ int main(int argc, char* argv[]) {
main_thread_client_id, &bus);
end = std::chrono::steady_clock::now();
+ if (quickstep::FLAGS_visualize_dag) {
+ quickstep::DAGVisualizer visualizer(*query_handle->getQueryPlanMutable());
+ std::cerr << "\n" << visualizer.toDOT() << "\n";
+ }
+
const CatalogRelation *query_result_relation = query_handle->getQueryResultRelation();
if (query_result_relation) {
PrintToScreen::PrintRelation(*query_result_relation,
@@ -488,6 +548,11 @@ int main(int argc, char* argv[]) {
dag_visualizer->bindProfilingStats(profiling_stats);
std::cerr << "\n" << dag_visualizer->toDOT() << "\n";
}
+ if (!quickstep::FLAGS_profile_output.empty()) {
+ std::ofstream ofs(quickstep::FLAGS_profile_output, std::ios::out);
+ quickstep::simple_profiler.writeToStream(ofs);
+ ofs.close();
+ }
} catch (const std::exception &e) {
fprintf(stderr, "QUERY EXECUTION ERROR: %s\n", e.what());
break;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/compression/CompressionDictionaryLite.hpp
----------------------------------------------------------------------
diff --git a/compression/CompressionDictionaryLite.hpp b/compression/CompressionDictionaryLite.hpp
index 45019c0..8c7741f 100644
--- a/compression/CompressionDictionaryLite.hpp
+++ b/compression/CompressionDictionaryLite.hpp
@@ -174,6 +174,15 @@ class CompressionDictionaryLite {
}
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthForCode(const std::uint32_t code) const {
+ if (type_is_variable_length_) {
+ return variableLengthGetUntypedValueAndByteLengthHelper<std::uint32_t, check_null>(code);
+ } else {
+ return fixedLengthGetUntypedValueAndByteLengthHelper<std::uint32_t, check_null>(code);
+ }
+ }
+
/**
* @brief Get the value represented by the specified code as a TypedValue.
* @note This version is for codes of 8 bits or less. Also see
@@ -255,6 +264,39 @@ class CompressionDictionaryLite {
return retval;
}
+ template <typename CodeType, bool check_null = true>
+ inline std::pair<const void*, std::size_t> fixedLengthGetUntypedValueAndByteLengthHelper(
+ const CodeType code) const {
+ if (check_null && (code == getNullCode())) {
+ return std::make_pair(nullptr, 0);
+ }
+ DCHECK_LT(code, numberOfCodes());
+ return std::make_pair(static_cast<const char*>(dictionary_memory_)
+ + 2 * sizeof(std::uint32_t) // Header.
+ + code * type_fixed_byte_length_, // Index into value array.
+ type_fixed_byte_length_);
+ }
+
+ template <typename CodeType, bool check_null = true>
+ inline std::pair<const void*, std::size_t> variableLengthGetUntypedValueAndByteLengthHelper(
+ const CodeType code) const {
+ if (check_null && (code == getNullCode())) {
+ return std::make_pair(nullptr, 0);
+ }
+ DCHECK_LT(code, numberOfCodes());
+
+ const std::uint32_t value_offset = static_cast<const std::uint32_t*>(dictionary_memory_)[code + 2];
+ const void *data_ptr = variable_length_data_region_ + value_offset;
+ DCHECK_LT(data_ptr, static_cast<const char*>(dictionary_memory_) + dictionary_memory_size_);
+
+ std::size_t data_size = (code == *static_cast<const std::uint32_t*>(dictionary_memory_) - 1) ?
+ (static_cast<const char*>(dictionary_memory_)
+ + dictionary_memory_size_
+ - static_cast<const char*>(data_ptr))
+ : (static_cast<const std::uint32_t*>(dictionary_memory_)[code + 3] - value_offset);
+ return std::make_pair(data_ptr, data_size);
+ }
+
template <typename CodeType>
inline TypedValue fixedLengthGetTypedValueHelper(const CodeType code) const {
if (code == getNullCode()) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_execution/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_execution/CMakeLists.txt b/query_execution/CMakeLists.txt
index 8bf1ab1..6b872c0 100644
--- a/query_execution/CMakeLists.txt
+++ b/query_execution/CMakeLists.txt
@@ -260,6 +260,7 @@ target_link_libraries(quickstep_queryexecution_Worker
quickstep_threading_Thread
quickstep_threading_ThreadIDBasedMap
quickstep_threading_ThreadUtil
+ quickstep_utility_EventProfiler
quickstep_utility_Macros
tmb)
target_link_libraries(quickstep_queryexecution_WorkerDirectory
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_execution/QueryContext.cpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryContext.cpp b/query_execution/QueryContext.cpp
index 7019b6a..fd0ed08 100644
--- a/query_execution/QueryContext.cpp
+++ b/query_execution/QueryContext.cpp
@@ -61,15 +61,16 @@ QueryContext::QueryContext(const serialization::QueryContext &proto,
<< "Attempted to create QueryContext from an invalid proto description:\n"
<< proto.DebugString();
+ for (int i = 0; i < proto.bloom_filters_size(); ++i) {
+ bloom_filters_.emplace_back(new BloomFilter(proto.bloom_filters(i)));
+ }
+
for (int i = 0; i < proto.aggregation_states_size(); ++i) {
aggregation_states_.emplace_back(
AggregationOperationState::ReconstructFromProto(proto.aggregation_states(i),
database,
- storage_manager));
- }
-
- for (int i = 0; i < proto.bloom_filters_size(); ++i) {
- bloom_filters_.emplace_back(new BloomFilter(proto.bloom_filters(i)));
+ storage_manager,
+ bloom_filters_));
}
for (int i = 0; i < proto.generator_functions_size(); ++i) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_execution/Worker.cpp
----------------------------------------------------------------------
diff --git a/query_execution/Worker.cpp b/query_execution/Worker.cpp
index a582132..444c98d 100644
--- a/query_execution/Worker.cpp
+++ b/query_execution/Worker.cpp
@@ -29,6 +29,7 @@
#include "relational_operators/WorkOrder.hpp"
#include "threading/ThreadIDBasedMap.hpp"
#include "threading/ThreadUtil.hpp"
+#include "utility/EventProfiler.hpp"
#include "glog/logging.h"
@@ -116,8 +117,12 @@ void Worker::executeWorkOrderHelper(const TaggedMessage &tagged_message,
const size_t query_id_for_workorder = worker_message.getWorkOrder()->getQueryID();
// Start measuring the execution time.
+ auto *container = relop_profiler.getContainer();
+ auto *line = container->getEventLine(worker_message.getRelationalOpIndex());
start = std::chrono::steady_clock::now();
+ line->emplace_back();
worker_message.getWorkOrder()->execute();
+ line->back().endEvent();
end = std::chrono::steady_clock::now();
delete worker_message.getWorkOrder();
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 04e54d3..7626305 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -79,6 +79,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
quickstep_queryoptimizer_QueryPlan
quickstep_queryoptimizer_costmodel_CostModel
quickstep_queryoptimizer_costmodel_SimpleCostModel
+ quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
quickstep_queryoptimizer_expressions_AggregateFunction
quickstep_queryoptimizer_expressions_Alias
quickstep_queryoptimizer_expressions_AttributeReference
@@ -196,6 +197,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
quickstep_queryoptimizer_LogicalToPhysicalMapper
quickstep_queryoptimizer_logical_Logical
quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_rules_AttachBloomFilters
quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
quickstep_queryoptimizer_rules_SwapProbeBuild
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 2ae6a4b..4204174 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -59,6 +59,7 @@
#include "query_optimizer/QueryHandle.hpp"
#include "query_optimizer/QueryPlan.hpp"
#include "query_optimizer/cost_model/SimpleCostModel.hpp"
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
#include "query_optimizer/expressions/AggregateFunction.hpp"
#include "query_optimizer/expressions/Alias.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
@@ -167,6 +168,8 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) {
cost_model_.reset(
new cost::SimpleCostModel(top_level_physical_plan_->shared_subplans()));
+ star_schema_cost_model_.reset(
+ new cost::StarSchemaSimpleCostModel(top_level_physical_plan_->shared_subplans()));
const CatalogRelation *result_relation = nullptr;
@@ -600,8 +603,10 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
std::vector<attribute_id> probe_attribute_ids;
std::vector<attribute_id> build_attribute_ids;
- std::vector<attribute_id> probe_original_attribute_ids;
- std::vector<attribute_id> build_original_attribute_ids;
+ const P::BloomFilterConfig &bloom_filter_config =
+ physical_plan->bloom_filter_config();
+ std::vector<attribute_id> probe_side_bloom_filter_attribute_ids;
+ std::vector<attribute_id> build_side_bloom_filter_attribute_ids;
const CatalogRelation *referenced_stored_probe_relation = nullptr;
const CatalogRelation *referenced_stored_build_relation = nullptr;
@@ -616,18 +621,6 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
const std::vector<E::AttributeReferencePtr> &left_join_attributes =
physical_plan->left_join_attributes();
for (const E::AttributeReferencePtr &left_join_attribute : left_join_attributes) {
- // Try to determine the original stored relation referenced in the Hash Join.
- referenced_stored_probe_relation =
- optimizer_context_->catalog_database()->getRelationByName(left_join_attribute->relation_name());
- if (referenced_stored_probe_relation == nullptr) {
- // Hash Join optimizations are not possible, if the referenced relation cannot be determined.
- skip_hash_join_optimization = true;
- } else {
- const attribute_id probe_operator_attribute_id =
- referenced_stored_probe_relation->getAttributeByName(left_join_attribute->attribute_name())->getID();
- probe_original_attribute_ids.emplace_back(probe_operator_attribute_id);
- }
-
const CatalogAttribute *probe_catalog_attribute
= attribute_substitution_map_[left_join_attribute->id()];
probe_attribute_ids.emplace_back(probe_catalog_attribute->getID());
@@ -640,18 +633,6 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
const std::vector<E::AttributeReferencePtr> &right_join_attributes =
physical_plan->right_join_attributes();
for (const E::AttributeReferencePtr &right_join_attribute : right_join_attributes) {
- // Try to determine the original stored relation referenced in the Hash Join.
- referenced_stored_build_relation =
- optimizer_context_->catalog_database()->getRelationByName(right_join_attribute->relation_name());
- if (referenced_stored_build_relation == nullptr) {
- // Hash Join optimizations are not possible, if the referenced relation cannot be determined.
- skip_hash_join_optimization = true;
- } else {
- const attribute_id build_operator_attribute_id =
- referenced_stored_build_relation->getAttributeByName(right_join_attribute->attribute_name())->getID();
- build_original_attribute_ids.emplace_back(build_operator_attribute_id);
- }
-
const CatalogAttribute *build_catalog_attribute
= attribute_substitution_map_[right_join_attribute->id()];
build_attribute_ids.emplace_back(build_catalog_attribute->getID());
@@ -661,6 +642,20 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
}
}
+ for (const auto &bf : bloom_filter_config.probe_side_bloom_filters) {
+ const CatalogAttribute *probe_bf_catalog_attribute
+ = attribute_substitution_map_[bf.attribute->id()];
+ probe_side_bloom_filter_attribute_ids.emplace_back(
+ probe_bf_catalog_attribute->getID());
+ }
+
+ for (const auto &bf : bloom_filter_config.build_side_bloom_filters) {
+ const CatalogAttribute *build_bf_catalog_attribute
+ = attribute_substitution_map_[bf.attribute->id()];
+ build_side_bloom_filter_attribute_ids.emplace_back(
+ build_bf_catalog_attribute->getID());
+ }
+
// Remember key types for call to SimplifyHashTableImplTypeProto() below.
std::vector<const Type*> key_types;
for (std::vector<E::AttributeReferencePtr>::size_type attr_idx = 0;
@@ -675,6 +670,8 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
key_types.push_back(&left_attribute_type);
}
+ std::size_t build_cardinality = cost_model_->estimateCardinality(build_physical);
+
// Convert the residual predicate proto.
QueryContext::predicate_id residual_predicate_index = QueryContext::kInvalidPredicateId;
if (physical_plan->residual_predicate()) {
@@ -835,9 +832,11 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
join_operator_index,
referenced_stored_build_relation,
referenced_stored_probe_relation,
- std::move(build_original_attribute_ids),
- std::move(probe_original_attribute_ids),
- join_hash_table_index);
+ bloom_filter_config,
+ std::move(build_side_bloom_filter_attribute_ids),
+ std::move(probe_side_bloom_filter_attribute_ids),
+ join_hash_table_index,
+ star_schema_cost_model_->estimateCardinality(build_physical));
}
}
@@ -1351,6 +1350,16 @@ void ExecutionGenerator::convertAggregate(
findRelationInfoOutputByPhysical(physical_plan->input());
aggr_state_proto->set_relation_id(input_relation_info->relation->getID());
+ const P::BloomFilterConfig &bloom_filter_config =
+ physical_plan->bloom_filter_config();
+ std::vector<attribute_id> bloom_filter_attribute_ids;
+
+ for (const auto &bf : bloom_filter_config.probe_side_bloom_filters) {
+ const CatalogAttribute *bf_catalog_attribute
+ = attribute_substitution_map_[bf.attribute->id()];
+ bloom_filter_attribute_ids.emplace_back(bf_catalog_attribute->getID());
+ }
+
std::vector<const Type*> group_by_types;
for (const E::NamedExpressionPtr &grouping_expression : physical_plan->grouping_expressions()) {
unique_ptr<const Scalar> execution_group_by_expression;
@@ -1465,6 +1474,13 @@ void ExecutionGenerator::convertAggregate(
std::forward_as_tuple(finalize_aggregation_operator_index, output_relation));
temporary_relation_info_vec_.emplace_back(finalize_aggregation_operator_index,
output_relation);
+
+ if (FLAGS_optimize_joins) {
+ execution_heuristics_->addAggregateInfo(aggregation_operator_index,
+ bloom_filter_config,
+ std::move(bloom_filter_attribute_ids),
+ aggr_state_index);
+ }
}
void ExecutionGenerator::convertSort(const P::SortPtr &physical_sort) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp
index 9186707..14939ff 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -37,7 +37,6 @@
#include "query_optimizer/QueryHandle.hpp"
#include "query_optimizer/QueryPlan.hpp"
#include "query_optimizer/cost_model/CostModel.hpp"
-#include "query_optimizer/cost_model/SimpleCostModel.hpp"
#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/expressions/NamedExpression.hpp"
#include "query_optimizer/expressions/Predicate.hpp"
@@ -423,6 +422,7 @@ class ExecutionGenerator {
* @brief The cost model to use for creating the execution plan.
*/
std::unique_ptr<cost::CostModel> cost_model_;
+ std::unique_ptr<cost::CostModel> star_schema_cost_model_;
physical::TopLevelPlanPtr top_level_physical_plan_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/ExecutionHeuristics.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.cpp b/query_optimizer/ExecutionHeuristics.cpp
index fc31c53..7d12745 100644
--- a/query_optimizer/ExecutionHeuristics.cpp
+++ b/query_optimizer/ExecutionHeuristics.cpp
@@ -25,6 +25,8 @@
#include "catalog/CatalogTypedefs.hpp"
#include "query_execution/QueryContext.pb.h"
#include "query_optimizer/QueryPlan.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
#include "utility/Macros.hpp"
#include "glog/logging.h"
@@ -32,95 +34,106 @@
namespace quickstep {
namespace optimizer {
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+static const std::size_t kNumBitsPerByte = 8;
+DEFINE_double(bloom_num_bits_per_tuple, kNumBitsPerByte,
+ "Number of bits per tuple used to size the Bloom filter.");
+
+DEFINE_int32(bloom_num_hash_fns, 1,
+ "Number of hash functions used in the Bloom filter.");
+
void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
serialization::QueryContext *query_context_proto) {
- // Currently this only optimizes left deep joins using bloom filters.
- // It uses a simple algorithm to discover the left deep joins.
- // It starts with the first hash join in the plan and keeps on iterating
- // over the next hash joins, till a probe on a different relation id is found.
- // The set of hash joins found in this way forms a chain and can be recognized
- // as a left deep join. It becomes a candidate for optimization.
-
- // The optimization is done by modifying each of the build operators in the chain
- // to generate a bloom filter on the build key during their hash table creation.
- // The leaf-level probe operator is then modified to query all the bloom
- // filters generated from all the build operators in the chain. These
- // bloom filters are queried to test the membership of the probe key
- // just prior to probing the hash table.
-
- QueryPlan::DAGNodeIndex origin_node = 0;
- while (origin_node < hash_joins_.size() - 1) {
- std::vector<std::size_t> chained_nodes;
- chained_nodes.push_back(origin_node);
- for (std::size_t i = origin_node + 1; i < hash_joins_.size(); ++i) {
- const relation_id checked_relation_id = hash_joins_[origin_node].referenced_stored_probe_relation_->getID();
- const relation_id expected_relation_id = hash_joins_[i].referenced_stored_probe_relation_->getID();
- if (checked_relation_id == expected_relation_id) {
- chained_nodes.push_back(i);
- } else {
- break;
- }
+ std::map<std::pair<E::ExprId, P::PhysicalPtr>,
+ std::pair<QueryContext::bloom_filter_id, QueryPlan::DAGNodeIndex>> bloom_filter_map;
+ for (const auto &info : hash_joins_) {
+ auto *hash_table_proto =
+ query_context_proto->mutable_join_hash_tables(info.join_hash_table_id_);
+ const auto &bloom_filter_config = info.bloom_filter_config_;
+
+ for (std::size_t i = 0; i < info.build_side_bloom_filter_ids_.size(); ++i) {
+ const QueryContext::bloom_filter_id bloom_filter_id = query_context_proto->bloom_filters_size();
+ serialization::BloomFilter *bloom_filter_proto = query_context_proto->add_bloom_filters();
+ setBloomFilterProperties(bloom_filter_proto, info.estimated_build_relation_cardinality_);
+
+ const auto &build_side_bf =
+ bloom_filter_config.build_side_bloom_filters[i];
+ bloom_filter_map.emplace(
+ std::make_pair(build_side_bf.attribute->id(),
+ bloom_filter_config.builder),
+ std::make_pair(bloom_filter_id, info.build_operator_index_));
+
+ auto *build_side_bloom_filter = hash_table_proto->add_build_side_bloom_filters();
+ build_side_bloom_filter->set_bloom_filter_id(bloom_filter_id);
+ build_side_bloom_filter->set_attr_id(info.build_side_bloom_filter_ids_[i]);
+ std::cerr << "Build " << build_side_bf.attribute->toString()
+ << " @" << bloom_filter_config.builder << "\n";
}
+ }
- // Only chains of length greater than one are suitable candidates for semi-join optimization.
- if (chained_nodes.size() > 1) {
- std::unordered_map<QueryContext::bloom_filter_id, std::vector<attribute_id>> probe_bloom_filter_info;
- for (const std::size_t node : chained_nodes) {
- // Provision for a new bloom filter to be used by the build operator.
- const QueryContext::bloom_filter_id bloom_filter_id = query_context_proto->bloom_filters_size();
- serialization::BloomFilter *bloom_filter_proto = query_context_proto->add_bloom_filters();
-
- // Modify the bloom filter properties based on the statistics of the relation.
- setBloomFilterProperties(bloom_filter_proto, hash_joins_[node].referenced_stored_build_relation_);
-
- // Add build-side bloom filter information to the corresponding hash table proto.
- query_context_proto->mutable_join_hash_tables(hash_joins_[node].join_hash_table_id_)
- ->add_build_side_bloom_filter_id(bloom_filter_id);
-
- probe_bloom_filter_info.insert(std::make_pair(bloom_filter_id, hash_joins_[node].probe_attributes_));
- }
-
- // Add probe-side bloom filter information to the corresponding hash table proto for each build-side bloom filter.
- for (const std::pair<QueryContext::bloom_filter_id, std::vector<attribute_id>>
- &bloom_filter_info : probe_bloom_filter_info) {
- auto *probe_side_bloom_filter =
- query_context_proto->mutable_join_hash_tables(hash_joins_[origin_node].join_hash_table_id_)
- ->add_probe_side_bloom_filters();
- probe_side_bloom_filter->set_probe_side_bloom_filter_id(bloom_filter_info.first);
- for (const attribute_id &probe_attribute_id : bloom_filter_info.second) {
- probe_side_bloom_filter->add_probe_side_attr_ids(probe_attribute_id);
- }
- }
-
- // Add node dependencies from chained build nodes to origin node probe.
- for (std::size_t i = 1; i < chained_nodes.size(); ++i) { // Note: It starts from index 1.
- query_plan->addDirectDependency(hash_joins_[origin_node].join_operator_index_,
- hash_joins_[origin_node + i].build_operator_index_,
- true /* is_pipeline_breaker */);
- }
+ for (const auto &info : hash_joins_) {
+ auto *hash_table_proto =
+ query_context_proto->mutable_join_hash_tables(info.join_hash_table_id_);
+ const auto &bloom_filter_config = info.bloom_filter_config_;
+
+ for (std::size_t i = 0; i < info.probe_side_bloom_filter_ids_.size(); ++i) {
+ auto *probe_side_bloom_filter = hash_table_proto->add_probe_side_bloom_filters();
+ const auto &probe_side_bf =
+ bloom_filter_config.probe_side_bloom_filters[i];
+ std::cerr << "HashJoin probe " << probe_side_bf.attribute->toString()
+ << " @" << probe_side_bf.builder << "\n";
+
+ const auto &build_side_info =
+ bloom_filter_map.at(
+ std::make_pair(probe_side_bf.source_attribute->id(),
+ probe_side_bf.builder));
+ probe_side_bloom_filter->set_bloom_filter_id(build_side_info.first);
+ probe_side_bloom_filter->set_attr_id(info.probe_side_bloom_filter_ids_[i]);
+// std::cerr << "HashJoin probe attr_id = " << info.probe_side_bloom_filter_ids_[i] << "\n";
+
+ query_plan->addDirectDependency(info.join_operator_index_,
+ build_side_info.second,
+ true /* is_pipeline_breaker */);
}
+ }
- // Update the origin node.
- origin_node = chained_nodes.back() + 1;
+ for (const auto &info : aggregates_) {
+ auto *aggregate_proto =
+ query_context_proto->mutable_aggregation_states(info.aggregate_state_id_);
+ const auto &bloom_filter_config = info.bloom_filter_config_;
+
+ for (std::size_t i = 0; i < info.bloom_filter_ids_.size(); ++i) {
+ auto *bloom_filter = aggregate_proto->add_bloom_filters();
+ const auto &bf =
+ bloom_filter_config.probe_side_bloom_filters[i];
+ std::cerr << "Aggregate probe " << bf.attribute->toString()
+ << " @" << bf.builder << "\n";
+
+ const auto &build_side_info =
+ bloom_filter_map.at(
+ std::make_pair(bf.source_attribute->id(),
+ bf.builder));
+ bloom_filter->set_bloom_filter_id(build_side_info.first);
+ bloom_filter->set_attr_id(info.bloom_filter_ids_[i]);
+// std::cerr << "Aggregate probe attr_id = "
+// << info.bloom_filter_ids_[i] << "\n";
+
+ query_plan->addDirectDependency(info.aggregate_operator_index_,
+ build_side_info.second,
+ true /* is_pipeline_breaker */);
+ }
}
}
void ExecutionHeuristics::setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
- const CatalogRelation *relation) {
- const std::size_t cardinality = relation->estimateTupleCardinality();
- if (cardinality < kOneThousand) {
- bloom_filter_proto->set_bloom_filter_size(kOneThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kVeryLowSparsityHash);
- } else if (cardinality < kTenThousand) {
- bloom_filter_proto->set_bloom_filter_size(kTenThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kLowSparsityHash);
- } else if (cardinality < kHundredThousand) {
- bloom_filter_proto->set_bloom_filter_size(kHundredThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kMediumSparsityHash);
- } else {
- bloom_filter_proto->set_bloom_filter_size(kMillion / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kHighSparsityHash);
- }
+ const std::size_t cardinality) {
+ bloom_filter_proto->set_bloom_filter_size(
+ BloomFilter::getNearestAllowedSize(
+ (FLAGS_bloom_num_bits_per_tuple * cardinality) / kNumBitsPerByte));
+// std::cerr << "bf size = " << bloom_filter_proto->bloom_filter_size() << "\n";
+ bloom_filter_proto->set_number_of_hashes(FLAGS_bloom_num_hash_fns);
}
} // namespace optimizer
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/ExecutionHeuristics.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.hpp b/query_optimizer/ExecutionHeuristics.hpp
index 92a7fe8..0755124 100644
--- a/query_optimizer/ExecutionHeuristics.hpp
+++ b/query_optimizer/ExecutionHeuristics.hpp
@@ -25,6 +25,7 @@
#include "query_execution/QueryContext.hpp"
#include "query_execution/QueryContext.pb.h"
#include "query_optimizer/QueryPlan.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
#include "utility/Macros.hpp"
#include "glog/logging.h"
@@ -49,7 +50,7 @@ class ExecutionHeuristics {
static const std::size_t kHundredThousand = 100000;
static const std::size_t kMillion = 1000000;
- static const std::size_t kCompressionFactor = 10;
+ static const std::size_t kCompressionFactor = 1;
static const std::size_t kVeryLowSparsityHash = 1;
static const std::size_t kLowSparsityHash = 2;
@@ -65,25 +66,48 @@ class ExecutionHeuristics {
const QueryPlan::DAGNodeIndex join_operator_index,
const CatalogRelation *referenced_stored_build_relation,
const CatalogRelation *referenced_stored_probe_relation,
- std::vector<attribute_id> &&build_attributes,
- std::vector<attribute_id> &&probe_attributes,
- const QueryContext::join_hash_table_id join_hash_table_id)
+ const physical::BloomFilterConfig &bloom_filter_config,
+ std::vector<attribute_id> &&build_side_bloom_filter_ids,
+ std::vector<attribute_id> &&probe_side_bloom_filter_ids,
+ const QueryContext::join_hash_table_id join_hash_table_id,
+ const std::size_t estimated_build_relation_cardinality)
: build_operator_index_(build_operator_index),
join_operator_index_(join_operator_index),
referenced_stored_build_relation_(referenced_stored_build_relation),
referenced_stored_probe_relation_(referenced_stored_probe_relation),
- build_attributes_(std::move(build_attributes)),
- probe_attributes_(std::move(probe_attributes)),
- join_hash_table_id_(join_hash_table_id) {
+ bloom_filter_config_(bloom_filter_config),
+ build_side_bloom_filter_ids_(std::move(build_side_bloom_filter_ids)),
+ probe_side_bloom_filter_ids_(std::move(probe_side_bloom_filter_ids)),
+ join_hash_table_id_(join_hash_table_id),
+ estimated_build_relation_cardinality_(estimated_build_relation_cardinality) {
}
const QueryPlan::DAGNodeIndex build_operator_index_;
const QueryPlan::DAGNodeIndex join_operator_index_;
const CatalogRelation *referenced_stored_build_relation_;
const CatalogRelation *referenced_stored_probe_relation_;
- const std::vector<attribute_id> build_attributes_;
- const std::vector<attribute_id> probe_attributes_;
+ const physical::BloomFilterConfig &bloom_filter_config_;
+ const std::vector<attribute_id> build_side_bloom_filter_ids_;
+ const std::vector<attribute_id> probe_side_bloom_filter_ids_;
const QueryContext::join_hash_table_id join_hash_table_id_;
+ const std::size_t estimated_build_relation_cardinality_;
+ };
+
+ struct AggregateInfo {
+ AggregateInfo(const QueryPlan::DAGNodeIndex aggregate_operator_index,
+ const physical::BloomFilterConfig &bloom_filter_config,
+ std::vector<attribute_id> &&bloom_filter_ids,
+ const QueryContext::aggregation_state_id aggregate_state_id)
+ : aggregate_operator_index_(aggregate_operator_index),
+ bloom_filter_config_(bloom_filter_config),
+ bloom_filter_ids_(bloom_filter_ids),
+ aggregate_state_id_(aggregate_state_id) {
+ }
+
+ const QueryPlan::DAGNodeIndex aggregate_operator_index_;
+ const physical::BloomFilterConfig &bloom_filter_config_;
+ const std::vector<attribute_id> bloom_filter_ids_;
+ const QueryContext::aggregation_state_id aggregate_state_id_;
};
@@ -109,16 +133,30 @@ class ExecutionHeuristics {
const QueryPlan::DAGNodeIndex join_operator_index,
const CatalogRelation *referenced_stored_build_relation,
const CatalogRelation *referenced_stored_probe_relation,
- std::vector<attribute_id> &&build_attributes,
- std::vector<attribute_id> &&probe_attributes,
- const QueryContext::join_hash_table_id join_hash_table_id) {
- hash_joins_.push_back(HashJoinInfo(build_operator_index,
- join_operator_index,
- referenced_stored_build_relation,
- referenced_stored_probe_relation,
- std::move(build_attributes),
- std::move(probe_attributes),
- join_hash_table_id));
+ const physical::BloomFilterConfig &bloom_filter_config,
+ std::vector<attribute_id> &&build_side_bloom_filter_ids,
+ std::vector<attribute_id> &&probe_side_bloom_filter_ids,
+ const QueryContext::join_hash_table_id join_hash_table_id,
+ const std::size_t estimated_build_relation_cardinality) {
+ hash_joins_.emplace_back(build_operator_index,
+ join_operator_index,
+ referenced_stored_build_relation,
+ referenced_stored_probe_relation,
+ bloom_filter_config,
+ std::move(build_side_bloom_filter_ids),
+ std::move(probe_side_bloom_filter_ids),
+ join_hash_table_id,
+ estimated_build_relation_cardinality);
+ }
+
+ inline void addAggregateInfo(const QueryPlan::DAGNodeIndex aggregate_operator_index,
+ const physical::BloomFilterConfig &bloom_filter_config,
+ std::vector<attribute_id> &&bloom_filter_ids,
+ const QueryContext::aggregation_state_id aggregate_state_id) {
+ aggregates_.emplace_back(aggregate_operator_index,
+ bloom_filter_config,
+ std::move(bloom_filter_ids),
+ aggregate_state_id);
}
/**
@@ -139,10 +177,11 @@ class ExecutionHeuristics {
* @param relation The catalog relation on which bloom filter is being built.
**/
void setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
- const CatalogRelation *relation);
+ const std::size_t cardinality);
private:
std::vector<HashJoinInfo> hash_joins_;
+ std::vector<AggregateInfo> aggregates_;
DISALLOW_COPY_AND_ASSIGN(ExecutionHeuristics);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 897b212..b22291b 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -26,6 +26,7 @@
#include "query_optimizer/Validator.hpp"
#include "query_optimizer/logical/Logical.hpp"
#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/AttachBloomFilters.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
#include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
#include "query_optimizer/rules/SwapProbeBuild.hpp"
@@ -96,10 +97,12 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan(
P::PhysicalPtr PhysicalGenerator::optimizePlan() {
std::vector<std::unique_ptr<Rule<P::Physical>>> rules;
if (FLAGS_reorder_hash_joins) {
+ rules.emplace_back(new PruneColumns());
rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
}
rules.emplace_back(new PruneColumns());
- rules.emplace_back(new SwapProbeBuild());
+ // rules.emplace_back(new SwapProbeBuild());
+ rules.emplace_back(new AttachBloomFilters());
for (std::unique_ptr<Rule<P::Physical>> &rule : rules) {
physical_plan_ = rule->apply(physical_plan_);
@@ -110,7 +113,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
DVLOG(4) << "Optimized physical plan:\n" << physical_plan_->toString();
if (FLAGS_visualize_plan) {
- quickstep::PlanVisualizer plan_visualizer;
+ quickstep::PlanVisualizer plan_visualizer;
std::cerr << "\n" << plan_visualizer.visualize(physical_plan_) << "\n";
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/cost_model/SimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/SimpleCostModel.cpp b/query_optimizer/cost_model/SimpleCostModel.cpp
index e5222ff..6794f21 100644
--- a/query_optimizer/cost_model/SimpleCostModel.cpp
+++ b/query_optimizer/cost_model/SimpleCostModel.cpp
@@ -88,7 +88,7 @@ std::size_t SimpleCostModel::estimateCardinalityForTopLevelPlan(
std::size_t SimpleCostModel::estimateCardinalityForTableReference(
const P::TableReferencePtr &physical_plan) {
- return physical_plan->relation()->estimateTupleCardinality();
+ return physical_plan->relation()->getStatistics().getNumTuples();
}
std::size_t SimpleCostModel::estimateCardinalityForSelection(
@@ -119,7 +119,7 @@ std::size_t SimpleCostModel::estimateCardinalityForAggregate(
return 1;
}
return std::max(static_cast<std::size_t>(1),
- estimateCardinality(physical_plan->input()) / 10);
+ estimateCardinality(physical_plan->input()));
}
std::size_t SimpleCostModel::estimateCardinalityForWindowAggregate(
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index badfeb1..ea21a2e 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -121,12 +121,26 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinalityForTableGenerator(
std::size_t StarSchemaSimpleCostModel::estimateCardinalityForHashJoin(
const P::HashJoinPtr &physical_plan) {
- std::size_t left_cardinality = estimateCardinality(physical_plan->left());
- std::size_t right_cardinality = estimateCardinality(physical_plan->right());
- double left_selectivity = estimateSelectivity(physical_plan->left());
- double right_selectivity = estimateSelectivity(physical_plan->right());
- return std::max(static_cast<std::size_t>(left_cardinality * right_selectivity) + 1,
- static_cast<std::size_t>(right_cardinality * left_selectivity) + 1);
+ const P::PhysicalPtr &left_child = physical_plan->left();
+ const P::PhysicalPtr &right_child = physical_plan->right();
+
+ std::size_t left_cardinality = estimateCardinality(left_child);
+ std::size_t right_cardinality = estimateCardinality(right_child);
+
+ std::size_t estimated_cardinality = std::max(left_cardinality, right_cardinality);
+ if (left_child->impliesUniqueAttributes(physical_plan->left_join_attributes())) {
+ double left_selectivity = estimateSelectivity(left_child);
+ estimated_cardinality =
+ std::min(estimated_cardinality,
+ static_cast<std::size_t>(right_cardinality * left_selectivity));
+ }
+ if (right_child->impliesUniqueAttributes(physical_plan->right_join_attributes())) {
+ double right_selectivity = estimateSelectivity(right_child);
+ estimated_cardinality =
+ std::min(estimated_cardinality,
+ static_cast<std::size_t>(left_cardinality * right_selectivity));
+ }
+ return estimated_cardinality;
}
std::size_t StarSchemaSimpleCostModel::estimateCardinalityForNestedLoopsJoin(
@@ -141,7 +155,7 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinalityForAggregate(
return 1;
}
return std::max(static_cast<std::size_t>(1),
- estimateCardinality(physical_plan->input()) / 10);
+ estimateCardinality(physical_plan->input()) / 100);
}
std::size_t StarSchemaSimpleCostModel::estimateCardinalityForWindowAggregate(
@@ -159,8 +173,14 @@ double StarSchemaSimpleCostModel::estimateSelectivity(
case P::PhysicalType::kHashJoin: {
const P::HashJoinPtr &hash_join =
std::static_pointer_cast<const P::HashJoin>(physical_plan);
- return std::min(estimateSelectivity(hash_join->left()),
- estimateSelectivity(hash_join->right()));
+ double left_selectivity = estimateSelectivity(hash_join->left());
+ double right_selectivity = estimateSelectivity(hash_join->right());
+ double min_sel = std::min(left_selectivity, right_selectivity);
+ double max_sel = std::max(left_selectivity, right_selectivity);
+ if (max_sel < 1) {
+ min_sel *= std::max(max_sel, 0.9);
+ }
+ return min_sel;
}
case P::PhysicalType::kNestedLoopsJoin: {
const P::NestedLoopsJoinPtr &nested_loop_join =
@@ -213,7 +233,7 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
case E::ExpressionType::kComparisonExpression: {
// Case 1 - Number of distinct values statistics available
// Case 1.1 - Equality comparison: 1.0 / num_distinct_values
- // Case 1.2 - Otherwise: 5.0 / num_distinct_values
+ // Case 1.2 - Otherwise: 0.5
// Case 2 - Number of distinct values statistics not available
// Case 2.1 - Equality comparison: 0.1
// Case 2.2 - Otherwise: 0.5
@@ -229,7 +249,7 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
double unit_selectivity = 1.0 / it->second;
return comparison_expression->isEqualityComparisonPredicate()
? unit_selectivity
- : std::min(0.5, unit_selectivity * 5.0);
+ : 0.5;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/expressions/ExpressionUtil.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/ExpressionUtil.hpp b/query_optimizer/expressions/ExpressionUtil.hpp
index 4c35719..5e9d29d 100644
--- a/query_optimizer/expressions/ExpressionUtil.hpp
+++ b/query_optimizer/expressions/ExpressionUtil.hpp
@@ -103,12 +103,12 @@ bool ContainsExpression(
* contain the other operand).
* @return True if \p left is a subset of \p right.
*/
-template <class NamedExpressionType>
+template <class NamedExpressionType1, class NamedExpressionType2>
bool SubsetOfExpressions(
- const std::vector<std::shared_ptr<const NamedExpressionType>> &left,
- const std::vector<std::shared_ptr<const NamedExpressionType>> &right) {
+ const std::vector<std::shared_ptr<const NamedExpressionType1>> &left,
+ const std::vector<std::shared_ptr<const NamedExpressionType2>> &right) {
UnorderedNamedExpressionSet supset(right.begin(), right.end());
- for (const std::shared_ptr<const NamedExpressionType> &expr : left) {
+ for (const std::shared_ptr<const NamedExpressionType1> &expr : left) {
if (supset.find(expr) == supset.end()) {
return false;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/physical/Aggregate.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/Aggregate.cpp b/query_optimizer/physical/Aggregate.cpp
index c582bba..969daa7 100644
--- a/query_optimizer/physical/Aggregate.cpp
+++ b/query_optimizer/physical/Aggregate.cpp
@@ -87,6 +87,11 @@ std::vector<E::AttributeReferencePtr> Aggregate::getReferencedAttributes()
return referenced_attributes;
}
+bool Aggregate::impliesUniqueAttributes(
+ const std::vector<expressions::AttributeReferencePtr> &attributes) const {
+ return E::SubsetOfExpressions(grouping_expressions_, attributes);
+}
+
void Aggregate::getFieldStringItems(
std::vector<std::string> *inline_field_names,
std::vector<std::string> *inline_field_values,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/physical/Aggregate.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/Aggregate.hpp b/query_optimizer/physical/Aggregate.hpp
index 2c2aee7..b40997c 100644
--- a/query_optimizer/physical/Aggregate.hpp
+++ b/query_optimizer/physical/Aggregate.hpp
@@ -98,6 +98,13 @@ class Aggregate : public Physical {
return false;
}
+ bool impliesUniqueAttributes(
+ const std::vector<expressions::AttributeReferencePtr> &attributes) const override;
+
+ const BloomFilterConfig &bloom_filter_config() const {
+ return bloom_filter_config_;
+ }
+
/**
* @brief Creates an Aggregate physical node.
*
@@ -111,9 +118,14 @@ class Aggregate : public Physical {
PhysicalPtr input,
const std::vector<expressions::NamedExpressionPtr> &grouping_expressions,
const std::vector<expressions::AliasPtr> &aggregate_expressions,
- const expressions::PredicatePtr &filter_predicate) {
+ const expressions::PredicatePtr &filter_predicate,
+ const BloomFilterConfig bloom_filter_config = BloomFilterConfig()) {
return AggregatePtr(
- new Aggregate(input, grouping_expressions, aggregate_expressions, filter_predicate));
+ new Aggregate(input,
+ grouping_expressions,
+ aggregate_expressions,
+ filter_predicate,
+ bloom_filter_config));
}
protected:
@@ -130,11 +142,13 @@ class Aggregate : public Physical {
PhysicalPtr input,
const std::vector<expressions::NamedExpressionPtr> &grouping_expressions,
const std::vector<expressions::AliasPtr> &aggregate_expressions,
- const expressions::PredicatePtr &filter_predicate)
+ const expressions::PredicatePtr &filter_predicate,
+ const BloomFilterConfig &bloom_filter_config)
: input_(input),
grouping_expressions_(grouping_expressions),
aggregate_expressions_(aggregate_expressions),
- filter_predicate_(filter_predicate) {
+ filter_predicate_(filter_predicate),
+ bloom_filter_config_(bloom_filter_config) {
addChild(input_);
}
@@ -142,6 +156,7 @@ class Aggregate : public Physical {
std::vector<expressions::NamedExpressionPtr> grouping_expressions_;
std::vector<expressions::AliasPtr> aggregate_expressions_;
expressions::PredicatePtr filter_predicate_;
+ BloomFilterConfig bloom_filter_config_;
DISALLOW_COPY_AND_ASSIGN(Aggregate);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/physical/HashJoin.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.cpp b/query_optimizer/physical/HashJoin.cpp
index 71c3692..f0e72e8 100644
--- a/query_optimizer/physical/HashJoin.cpp
+++ b/query_optimizer/physical/HashJoin.cpp
@@ -85,6 +85,15 @@ bool HashJoin::maybeCopyWithPrunedExpressions(
return false;
}
+bool HashJoin::impliesUniqueAttributes(
+ const std::vector<expressions::AttributeReferencePtr> &attributes) const {
+ return (left()->impliesUniqueAttributes(left_join_attributes_)
+ && right()->impliesUniqueAttributes(attributes))
+ || (right()->impliesUniqueAttributes(right_join_attributes_)
+ && left()->impliesUniqueAttributes(attributes));
+
+}
+
void HashJoin::getFieldStringItems(
std::vector<std::string> *inline_field_names,
std::vector<std::string> *inline_field_values,
@@ -106,6 +115,24 @@ void HashJoin::getFieldStringItems(
container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(left_join_attributes_));
container_child_field_names->push_back("right_join_attributes");
container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(right_join_attributes_));
+
+ if (!bloom_filter_config_.build_side_bloom_filters.empty()) {
+ container_child_field_names->push_back("build_side_bloom_filters");
+ container_child_fields->emplace_back();
+ auto &container = container_child_fields->back();
+ for (const auto& bf : bloom_filter_config_.build_side_bloom_filters) {
+ container.emplace_back(bf.attribute);
+ }
+ }
+
+ if (!bloom_filter_config_.probe_side_bloom_filters.empty()) {
+ container_child_field_names->push_back("probe_side_bloom_filters");
+ container_child_fields->emplace_back();
+ auto &container = container_child_fields->back();
+ for (const auto& bf : bloom_filter_config_.probe_side_bloom_filters) {
+ container.emplace_back(bf.attribute);
+ }
+ }
}
} // namespace physical
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/physical/HashJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.hpp b/query_optimizer/physical/HashJoin.hpp
index 988b139..2c62d9d 100644
--- a/query_optimizer/physical/HashJoin.hpp
+++ b/query_optimizer/physical/HashJoin.hpp
@@ -116,7 +116,8 @@ class HashJoin : public BinaryJoin {
right_join_attributes_,
residual_predicate_,
project_expressions(),
- join_type_);
+ join_type_,
+ bloom_filter_config_);
}
std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override;
@@ -125,6 +126,13 @@ class HashJoin : public BinaryJoin {
const expressions::UnorderedNamedExpressionSet &referenced_expressions,
PhysicalPtr *output) const override;
+ bool impliesUniqueAttributes(
+ const std::vector<expressions::AttributeReferencePtr> &attributes) const override;
+
+ const BloomFilterConfig &bloom_filter_config() const {
+ return bloom_filter_config_;
+ }
+
/**
* @brief Creates a physical HashJoin. The left/right operand does not correspond to
* probe/build operand.
@@ -145,7 +153,8 @@ class HashJoin : public BinaryJoin {
const std::vector<expressions::AttributeReferencePtr> &right_join_attributes,
const expressions::PredicatePtr &residual_predicate,
const std::vector<expressions::NamedExpressionPtr> &project_expressions,
- const JoinType join_type) {
+ const JoinType join_type,
+ const BloomFilterConfig bloom_filter_config = BloomFilterConfig()) {
return HashJoinPtr(
new HashJoin(left,
right,
@@ -153,7 +162,8 @@ class HashJoin : public BinaryJoin {
right_join_attributes,
residual_predicate,
project_expressions,
- join_type));
+ join_type,
+ bloom_filter_config));
}
protected:
@@ -173,18 +183,21 @@ class HashJoin : public BinaryJoin {
const std::vector<expressions::AttributeReferencePtr> &right_join_attributes,
const expressions::PredicatePtr &residual_predicate,
const std::vector<expressions::NamedExpressionPtr> &project_expressions,
- const JoinType join_type)
+ const JoinType join_type,
+ const BloomFilterConfig &bloom_filter_config)
: BinaryJoin(left, right, project_expressions),
left_join_attributes_(left_join_attributes),
right_join_attributes_(right_join_attributes),
residual_predicate_(residual_predicate),
- join_type_(join_type) {
+ join_type_(join_type),
+ bloom_filter_config_(bloom_filter_config) {
}
std::vector<expressions::AttributeReferencePtr> left_join_attributes_;
std::vector<expressions::AttributeReferencePtr> right_join_attributes_;
expressions::PredicatePtr residual_predicate_;
JoinType join_type_;
+ BloomFilterConfig bloom_filter_config_;
DISALLOW_COPY_AND_ASSIGN(HashJoin);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/physical/Physical.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/Physical.hpp b/query_optimizer/physical/Physical.hpp
index 9fdbeb5..389cd05 100644
--- a/query_optimizer/physical/Physical.hpp
+++ b/query_optimizer/physical/Physical.hpp
@@ -39,6 +39,56 @@ namespace physical {
class Physical;
typedef std::shared_ptr<const Physical> PhysicalPtr;
+struct BloomFilterConfig {
+ struct BuildSide {
+ BuildSide(const expressions::AttributeReferencePtr &attribute_in)
+ : attribute(attribute_in) {
+ }
+ expressions::AttributeReferencePtr attribute;
+ };
+ struct ProbeSide {
+ ProbeSide(const expressions::AttributeReferencePtr &attribute_in,
+ const expressions::AttributeReferencePtr &source_attribute_in,
+ const physical::PhysicalPtr &builder_in)
+ : attribute(attribute_in),
+ source_attribute(source_attribute_in),
+ builder(builder_in) {
+ }
+ expressions::AttributeReferencePtr attribute;
+ expressions::AttributeReferencePtr source_attribute;
+ PhysicalPtr builder;
+ };
+ BloomFilterConfig() {}
+ BloomFilterConfig(const PhysicalPtr &builder_in)
+ : builder(builder_in) {
+ }
+ BloomFilterConfig(const PhysicalPtr &builder_in,
+ const std::vector<BuildSide> &build_side_bloom_filters_in,
+ const std::vector<ProbeSide> &probe_side_bloom_filters_in)
+ : builder(builder_in),
+ build_side_bloom_filters(build_side_bloom_filters_in),
+ probe_side_bloom_filters(probe_side_bloom_filters_in) {
+ }
+ void addBuildSideBloomFilter(const expressions::AttributeReferencePtr &attribute_in) {
+ for (const auto &build_bf : build_side_bloom_filters) {
+ if (attribute_in == build_bf.attribute) {
+ return;
+ }
+ }
+ build_side_bloom_filters.emplace_back(attribute_in);
+ }
+ void addProbeSideBloomFilter(const expressions::AttributeReferencePtr &attribute_in,
+ const expressions::AttributeReferencePtr &source_attribute_in,
+ const physical::PhysicalPtr &builder_in) {
+ probe_side_bloom_filters.emplace_back(attribute_in,
+ source_attribute_in,
+ builder_in);
+ }
+ PhysicalPtr builder;
+ std::vector<BuildSide> build_side_bloom_filters;
+ std::vector<ProbeSide> probe_side_bloom_filters;
+};
+
/**
* @brief Base class for physical plan nodes.
*/
@@ -84,6 +134,11 @@ class Physical : public OptimizerTree<Physical> {
const expressions::UnorderedNamedExpressionSet &referenced_expressions,
PhysicalPtr *output) const = 0;
+ virtual bool impliesUniqueAttributes(
+ const std::vector<expressions::AttributeReferencePtr> &attributes) const {
+ return false;
+ }
+
protected:
/**
* @brief Constructor.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/physical/Selection.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/Selection.cpp b/query_optimizer/physical/Selection.cpp
index 5e1a03f..f4cdd1a 100644
--- a/query_optimizer/physical/Selection.cpp
+++ b/query_optimizer/physical/Selection.cpp
@@ -80,6 +80,12 @@ bool Selection::maybeCopyWithPrunedExpressions(
return false;
}
+bool Selection::impliesUniqueAttributes(
+ const std::vector<expressions::AttributeReferencePtr> &attributes) const {
+ return input()->impliesUniqueAttributes(attributes);
+}
+
+
void Selection::getFieldStringItems(
std::vector<std::string> *inline_field_names,
std::vector<std::string> *inline_field_values,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/physical/Selection.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/Selection.hpp b/query_optimizer/physical/Selection.hpp
index d8c1319..68cae65 100644
--- a/query_optimizer/physical/Selection.hpp
+++ b/query_optimizer/physical/Selection.hpp
@@ -84,6 +84,9 @@ class Selection : public Physical {
const expressions::UnorderedNamedExpressionSet &referenced_attributes,
PhysicalPtr *output) const override;
+ bool impliesUniqueAttributes(
+ const std::vector<expressions::AttributeReferencePtr> &attributes) const override;
+
/**
* @brief Creates a Selection.
*
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/physical/TableReference.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/TableReference.cpp b/query_optimizer/physical/TableReference.cpp
index 4a66ddf..bc73046 100644
--- a/query_optimizer/physical/TableReference.cpp
+++ b/query_optimizer/physical/TableReference.cpp
@@ -18,6 +18,7 @@
#include "query_optimizer/physical/TableReference.hpp"
#include <string>
+#include <set>
#include <vector>
#include "catalog/CatalogRelation.hpp"
@@ -30,6 +31,23 @@ namespace physical {
namespace E = ::quickstep::optimizer::expressions;
+bool TableReference::impliesUniqueAttributes(
+ const std::vector<expressions::AttributeReferencePtr> &attributes) const {
+ std::set<E::ExprId> attr_ids;
+ for (const auto &attr : attributes) {
+ attr_ids.emplace(attr->id());
+ }
+
+ std::set<attribute_id> rel_attr_ids;
+ for (std::size_t i = 0; i < attribute_list_.size(); ++i) {
+ if (attr_ids.find(attribute_list_[i]->id()) != attr_ids.end()) {
+ rel_attr_ids.emplace(i);
+ }
+ }
+
+ return relation_->getConstraints().impliesUniqueAttributes(rel_attr_ids);
+}
+
void TableReference::getFieldStringItems(
std::vector<std::string> *inline_field_names,
std::vector<std::string> *inline_field_values,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/43ed533b/query_optimizer/physical/TableReference.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/TableReference.hpp b/query_optimizer/physical/TableReference.hpp
index bde9b97..bc07043 100644
--- a/query_optimizer/physical/TableReference.hpp
+++ b/query_optimizer/physical/TableReference.hpp
@@ -88,6 +88,9 @@ class TableReference : public Physical {
return false;
}
+ bool impliesUniqueAttributes(
+ const std::vector<expressions::AttributeReferencePtr> &attributes) const override;
+
/**
* @brief Creates a TableReference.
*
[07/13] incubator-quickstep git commit: QUICKSTEP-38. Add support for
python3 to utility scripts
Posted by ji...@apache.org.
QUICKSTEP-38. Add support for python3 to utility scripts
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/d4e714ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/d4e714ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/d4e714ce
Branch: refs/heads/LIP-for-tpch
Commit: d4e714ce32e195c95a4e603db7ec8302865f9418
Parents: a61b99e
Author: Caleb Welton <cw...@apache.org>
Authored: Tue Aug 2 10:44:41 2016 -0700
Committer: Zuyu Zhang <zu...@twitter.com>
Committed: Wed Aug 3 11:48:27 2016 -0700
----------------------------------------------------------------------
cyclic_dependency.py | 28 ++++++++++++++++------------
validate_cmakelists.py | 42 +++++++++++++++++++++++-------------------
2 files changed, 39 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d4e714ce/cyclic_dependency.py
----------------------------------------------------------------------
diff --git a/cyclic_dependency.py b/cyclic_dependency.py
index 8bf7d80..a5cca25 100755
--- a/cyclic_dependency.py
+++ b/cyclic_dependency.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
# Script to do analyze the dependencies in Quickstep particularly cycles in the
# dependency graph. This script can be used to find:
@@ -33,6 +33,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
import itertools
import networkx as nx
from optparse import OptionParser
@@ -110,18 +114,18 @@ def process_cmakelists_file(cmakelists_filename, qs_module_dirs):
# target mapping, and target to node mapping.
def create_graph(deps_in_cmake):
nodes = set()
- for source, dest_set in deps_in_cmake.iteritems():
+ for source, dest_set in iter(deps_in_cmake.items()):
nodes.add(source)
nodes.update(dest_set)
nodes_list = list(nodes)
nodes_map = {}
- for i, n in zip(xrange(len(nodes_list)), nodes_list):
+ for i, n in zip(range(len(nodes_list)), nodes_list):
nodes_map[n] = i
G = nx.DiGraph()
- for source, dest_set in deps_in_cmake.iteritems():
+ for source, dest_set in iter(deps_in_cmake.items()):
source_node = nodes_map[source]
for dest in dest_set:
if source == dest: continue
@@ -137,17 +141,17 @@ def find_strongly_connected_components(G, nodes_list):
if len(n) > 1:
components += 1
# Only output components bigger than 1.
- print [nodes_list[i] for i in n]
+ print([nodes_list[i] for i in n])
return components
# Lists cycles in the graph truncating to 100 cycles.
def find_cycles(G, nodes_list, truncate):
cycles = 0
for n in nx.simple_cycles(G):
- print [nodes_list[i] for i in n]
+ print([nodes_list[i] for i in n])
cycles += 1
if cycles >= truncate:
- print "Many cycles found. Truncating to {0} cycles.".format(truncate)
+ print("Many cycles found. Truncating to {0} cycles.".format(truncate))
break
return cycles
@@ -156,16 +160,16 @@ def find_path(G, nodes_list, nodes_map, source, target):
source_node = nodes_map[source]
target_node = nodes_map[target]
if nx.has_path(G, source_node, target_node):
- print [nodes_list[i] for i in nx.shortest_path(G,
+ print([nodes_list[i] for i in nx.shortest_path(G,
source_node,
- target_node)]
+ target_node)])
else:
- print 'No path.'
+ print('No path.')
def main():
if not os.getcwd().endswith("quickstep"):
- print ("WARNING: you don't appear to be running in the root quickstep "
- "source directory. Don't blame me if something goes wrong.")
+ print("WARNING: you don't appear to be running in the root quickstep "
+ "source directory. Don't blame me if something goes wrong.")
qs_module_dirs = []
for filename in os.listdir("."):
if (os.path.isdir(filename)
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d4e714ce/validate_cmakelists.py
----------------------------------------------------------------------
diff --git a/validate_cmakelists.py b/validate_cmakelists.py
index 7dd6fc5..cf25d28 100755
--- a/validate_cmakelists.py
+++ b/validate_cmakelists.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
"""Script to do basic sanity checking for target_link_libraries() commands in
CMakeLists.txt files.
@@ -31,6 +31,10 @@ TODO List / Known Issues & Limitations:
# See the License for the specific language governing permissions and
# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
import os
import sys
@@ -334,8 +338,8 @@ def process_cmakelists_file(cmakelists_filename, qs_module_dirs):
if "CMAKE_VALIDATE_IGNORE_END" in line:
scan_state = previous_state
elif "CMAKE_VALIDATE_IGNORE_BEGIN" in line:
- print "Nested IGNORE_BEGIN directives found in: "\
- + cmakelists_filename + ", exiting"
+ print("Nested IGNORE_BEGIN directives found in: "
+ + cmakelists_filename + ", exiting")
exit(-1)
else:
continue
@@ -397,25 +401,25 @@ def process_cmakelists_file(cmakelists_filename, qs_module_dirs):
stitched_string = ""
scan_state = CMAKE_SCANNING_NONE
# After scanning, report any missing dependencies.
- for target, include_deps in deps_from_includes.iteritems():
+ for target, include_deps in iter(deps_from_includes.items()):
if target in skipped_targets:
pass
elif len(include_deps) != 0:
if target not in deps_in_cmake:
if not (target in include_deps and len(include_deps) == 1):
validation_failed_targets.add(target)
- print "Missing target_link_libraries() for " + target + ":"
+ print("Missing target_link_libraries() for " + target + ":")
for dep in sorted(include_deps):
- print "\t" + dep
+ print("\t" + dep)
else:
missing_deps = (include_deps
- deps_in_cmake[target]
- IGNORED_DEPENDENCIES)
if len(missing_deps) != 0:
validation_failed_targets.add(target)
- print "Missing target_link_libraries() for " + target + ":"
+ print("Missing target_link_libraries() for " + target + ":")
for dep in sorted(missing_deps):
- print "\t" + dep
+ print("\t" + dep)
elif target == module_targetname:
# Special case hack for module all-in-one library
missing_deps = (frozenset(deps_from_includes.keys())
@@ -427,21 +431,21 @@ def process_cmakelists_file(cmakelists_filename, qs_module_dirs):
true_missing_deps.add(dep)
if len(true_missing_deps) != 0:
validation_failed_targets.add(target)
- print "Missing target_link_libraries() for " + target + ":"
+ print("Missing target_link_libraries() for " + target + ":")
for dep in sorted(true_missing_deps):
- print "\t" + dep
+ print("\t" + dep)
# Also report possibly superfluous extra dependencies.
- for target, cmake_deps in deps_in_cmake.iteritems():
+ for target, cmake_deps in iter(deps_in_cmake.items()):
if (target not in skipped_targets) and (target in deps_from_includes):
extra_deps = cmake_deps - deps_from_includes[target]
if target in extra_deps:
extra_deps.remove(target)
if len(extra_deps) != 0 and target != module_targetname:
validation_failed_targets.add(target)
- print ("Possibly superfluous target_link_libraries() for "
+ print("Possibly superfluous target_link_libraries() for "
+ target + ":")
for dep in sorted(extra_deps):
- print "\t" + dep
+ print("\t" + dep)
return (validation_failed_targets, skipped_targets, generated_targets)
def main(cmakelists_to_process):
@@ -461,8 +465,8 @@ def main(cmakelists_to_process):
missing or superfluous dependencies.
"""
if not os.getcwd().endswith("quickstep"):
- print ("WARNING: you don't appear to be running in the root quickstep "
- "source directory. Don't blame me if something goes wrong.")
+ print("WARNING: you don't appear to be running in the root quickstep "
+ "source directory. Don't blame me if something goes wrong.")
qs_module_dirs = []
for filename in os.listdir("."):
if (os.path.isdir(filename)
@@ -493,17 +497,17 @@ def main(cmakelists_to_process):
global_skipped_targets.update(local_skipped_targets)
global_generated_targets.update(local_generated_targets)
if len(global_skipped_targets) != 0:
- print ("WARNING: The following targets had multiple add_library() "
+ print("WARNING: The following targets had multiple add_library() "
+ "commands and were NOT checked by this script (they should "
+ "be manually checked):")
for target in sorted(global_skipped_targets):
- print "\t" + target
+ print("\t" + target)
if len(global_generated_targets) != 0:
- print ("INFO: The add_library() commands for the following targets "
+ print("INFO: The add_library() commands for the following targets "
+ "appear to reference generated sources, so they were not "
+ "checked):")
for target in sorted(global_generated_targets):
- print "\t" + target
+ print("\t" + target)
return len(global_validation_failed_targets)
if __name__ == "__main__":
[08/13] incubator-quickstep git commit: Removed the redundant query
id in the optimizer.
Posted by ji...@apache.org.
Removed the redundant query id in the optimizer.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/8cd5a56c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/8cd5a56c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/8cd5a56c
Branch: refs/heads/LIP-for-tpch
Commit: 8cd5a56c92f2e0e6c2acb2a979a2ae5fb2e54792
Parents: d4e714c
Author: Zuyu Zhang <zu...@twitter.com>
Authored: Mon Aug 1 23:02:37 2016 -0700
Committer: Zuyu Zhang <zu...@twitter.com>
Committed: Wed Aug 3 11:53:46 2016 -0700
----------------------------------------------------------------------
cli/tests/CommandExecutorTestRunner.cpp | 5 ++---
query_optimizer/CMakeLists.txt | 3 ---
query_optimizer/ExecutionGenerator.cpp | 4 +++-
query_optimizer/Optimizer.hpp | 9 ++-------
query_optimizer/OptimizerContext.hpp | 20 ++------------------
query_optimizer/QueryProcessor.cpp | 10 ++--------
query_optimizer/QueryProcessor.hpp | 7 +++++++
.../tests/ExecutionGeneratorTestRunner.cpp | 5 ++---
query_optimizer/tests/OptimizerTest.cpp | 3 +--
.../tests/OptimizerTextTestRunner.cpp | 3 +--
10 files changed, 22 insertions(+), 47 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8cd5a56c/cli/tests/CommandExecutorTestRunner.cpp
----------------------------------------------------------------------
diff --git a/cli/tests/CommandExecutorTestRunner.cpp b/cli/tests/CommandExecutorTestRunner.cpp
index bd7082f..dc7e43f 100644
--- a/cli/tests/CommandExecutorTestRunner.cpp
+++ b/cli/tests/CommandExecutorTestRunner.cpp
@@ -76,8 +76,7 @@ void CommandExecutorTestRunner::runTestCase(
while (true) {
ParseResult result = sql_parser_.getNextStatement();
- O::OptimizerContext optimizer_context(0 /* query_id */,
- test_database_loader_.catalog_database(),
+ O::OptimizerContext optimizer_context(test_database_loader_.catalog_database(),
test_database_loader_.storage_manager());
if (result.condition != ParseResult::kSuccess) {
@@ -99,7 +98,7 @@ void CommandExecutorTestRunner::runTestCase(
nullptr,
output_stream.file());
} else {
- QueryHandle query_handle(optimizer_context.query_id());
+ QueryHandle query_handle(0 /* query_id */);
O::LogicalGenerator logical_generator(&optimizer_context);
O::PhysicalGenerator physical_generator;
O::ExecutionGenerator execution_generator(&optimizer_context, &query_handle);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8cd5a56c/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index c55881f..04e54d3 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -185,7 +185,6 @@ target_link_libraries(quickstep_queryoptimizer_Optimizer
quickstep_queryoptimizer_PhysicalGenerator
quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_OptimizerContext
- quickstep_catalog_CatalogTypedefs
quickstep_queryoptimizer_expressions_ExprId
quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_OptimizerTree
@@ -219,8 +218,6 @@ target_link_libraries(quickstep_queryoptimizer_QueryPlan
quickstep_utility_Macros)
target_link_libraries(quickstep_queryoptimizer_QueryProcessor
quickstep_catalog_Catalog
- quickstep_catalog_CatalogDatabase
- quickstep_catalog_CatalogRelation
quickstep_catalog_Catalog_proto
quickstep_parser_ParseStatement
quickstep_queryoptimizer_Optimizer
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8cd5a56c/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index fb24489..2ae6a4b 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -26,6 +26,8 @@
#include <type_traits>
#include <unordered_map>
+#include "query_optimizer/QueryOptimizerConfig.h" // For QUICKSTEP_DISTRIBUTED.
+
#ifdef QUICKSTEP_DISTRIBUTED
#include <unordered_set>
#endif
@@ -299,7 +301,7 @@ void ExecutionGenerator::generatePlanInternal(
std::string ExecutionGenerator::getNewRelationName() {
std::ostringstream out;
out << OptimizerContext::kInternalTemporaryRelationNamePrefix
- << optimizer_context_->query_id() << "_" << rel_id_;
+ << query_handle_->query_id() << "_" << rel_id_;
++rel_id_;
return out.str();
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8cd5a56c/query_optimizer/Optimizer.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/Optimizer.hpp b/query_optimizer/Optimizer.hpp
index 9177414..18c927d 100644
--- a/query_optimizer/Optimizer.hpp
+++ b/query_optimizer/Optimizer.hpp
@@ -18,8 +18,6 @@
#ifndef QUICKSTEP_QUERY_OPTIMIZER_OPTIMIZER_HPP_
#define QUICKSTEP_QUERY_OPTIMIZER_OPTIMIZER_HPP_
-#include <cstddef>
-
#include "query_optimizer/OptimizerContext.hpp"
#include "utility/Macros.hpp"
@@ -45,15 +43,12 @@ class Optimizer {
/**
* @brief Constructor.
*
- * @param query_id The query id. Used to identify a query and create distinct
- * names for temporary relations.
* @param database The database that the query is executed on.
* @param storage_manager The storage manager for the database.
*/
- Optimizer(const std::size_t query_id,
- CatalogDatabase *database,
+ Optimizer(CatalogDatabase *database,
StorageManager *storage_manager)
- : optimizer_context_(query_id, database, storage_manager) {}
+ : optimizer_context_(database, storage_manager) {}
/**
* @brief Destructor.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8cd5a56c/query_optimizer/OptimizerContext.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/OptimizerContext.hpp b/query_optimizer/OptimizerContext.hpp
index abdc7f4..bcab9d0 100644
--- a/query_optimizer/OptimizerContext.hpp
+++ b/query_optimizer/OptimizerContext.hpp
@@ -18,18 +18,12 @@
#ifndef QUICKSTEP_QUERY_OPTIMIZER_OPTIMIZER_CONTEXT_HPP_
#define QUICKSTEP_QUERY_OPTIMIZER_OPTIMIZER_CONTEXT_HPP_
-#include <cstddef>
-#include <cstdlib>
-#include <string>
-
-#include "catalog/CatalogTypedefs.hpp"
#include "query_optimizer/expressions/ExprId.hpp"
#include "utility/Macros.hpp"
namespace quickstep {
class CatalogDatabase;
-class CatalogRelation;
class StorageManager;
namespace optimizer {
@@ -49,16 +43,13 @@ class OptimizerContext {
/**
* @brief Constructor.
*
- * @param query_id The query ID.
* @param catalog_database The catalog database where this query is executed.
* @param storage_manager The storage manager to use for allocating storage
* blocks.
*/
- OptimizerContext(const std::size_t query_id,
- CatalogDatabase *catalog_database,
+ OptimizerContext(CatalogDatabase *catalog_database,
StorageManager *storage_manager)
- : query_id_(query_id),
- current_expr_id_(-1),
+ : current_expr_id_(-1),
catalog_database_(catalog_database),
storage_manager_(storage_manager),
has_nested_queries_(false),
@@ -94,11 +85,6 @@ class OptimizerContext {
StorageManager* storage_manager() { return storage_manager_; }
/**
- * @return Query ID.
- */
- std::size_t query_id() const { return query_id_; }
-
- /**
* @brief Gets the next ExprId.
*
* @return A new ExprId.
@@ -133,8 +119,6 @@ class OptimizerContext {
bool is_catalog_changed() const { return is_catalog_changed_; }
private:
- const std::size_t query_id_;
-
expressions::ExprId current_expr_id_;
CatalogDatabase *catalog_database_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8cd5a56c/query_optimizer/QueryProcessor.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/QueryProcessor.cpp b/query_optimizer/QueryProcessor.cpp
index 6381d3a..8af4408 100644
--- a/query_optimizer/QueryProcessor.cpp
+++ b/query_optimizer/QueryProcessor.cpp
@@ -17,20 +17,15 @@
#include "query_optimizer/QueryProcessor.hpp"
-#include <cstdint>
-#include <cstdlib>
#include <fstream>
#include <memory>
#include <string>
#include "catalog/Catalog.hpp"
#include "catalog/Catalog.pb.h"
-#include "catalog/CatalogDatabase.hpp"
-#include "catalog/CatalogRelation.hpp"
#include "parser/ParseStatement.hpp"
#include "query_optimizer/Optimizer.hpp"
#include "query_optimizer/QueryHandle.hpp"
-#include "storage/StorageManager.hpp"
using std::ifstream;
using std::ofstream;
@@ -41,10 +36,9 @@ QueryHandle* QueryProcessor::generateQueryHandle(const ParseStatement &statement
std::unique_ptr<QueryHandle> query_handle(
new QueryHandle(query_id_, statement.getPriority()));
- optimizer::Optimizer optimizer(query_id_, getDefaultDatabase(), storage_manager_.get());
- optimizer.generateQueryHandle(statement, query_handle.get());
+ optimizer_->generateQueryHandle(statement, query_handle.get());
- if (optimizer.isCatalogChanged() && !catalog_altered_) {
+ if (optimizer_->isCatalogChanged() && !catalog_altered_) {
catalog_altered_ = true;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8cd5a56c/query_optimizer/QueryProcessor.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/QueryProcessor.hpp b/query_optimizer/QueryProcessor.hpp
index 32739dc..f3844a0 100644
--- a/query_optimizer/QueryProcessor.hpp
+++ b/query_optimizer/QueryProcessor.hpp
@@ -26,6 +26,7 @@
#include <string>
#include "catalog/Catalog.hpp"
+#include "query_optimizer/Optimizer.hpp"
#include "storage/StorageManager.hpp"
#include "utility/Macros.hpp"
@@ -139,6 +140,9 @@ class QueryProcessor {
query_id_(0) {
loadCatalog();
storage_manager_.reset(new StorageManager(storage_path));
+
+ // Construct after Catalog loads and StorageManager initializes.
+ optimizer_.reset(new optimizer::Optimizer(getDefaultDatabase(), storage_manager_.get()));
}
/**
@@ -190,6 +194,9 @@ class QueryProcessor {
std::unique_ptr<Catalog> catalog_;
std::unique_ptr<StorageManager> storage_manager_;
+
+ std::unique_ptr<optimizer::Optimizer> optimizer_;
+
bool catalog_altered_;
std::size_t query_id_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8cd5a56c/query_optimizer/tests/ExecutionGeneratorTestRunner.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/ExecutionGeneratorTestRunner.cpp b/query_optimizer/tests/ExecutionGeneratorTestRunner.cpp
index 563a777..fd1bb86 100644
--- a/query_optimizer/tests/ExecutionGeneratorTestRunner.cpp
+++ b/query_optimizer/tests/ExecutionGeneratorTestRunner.cpp
@@ -73,8 +73,7 @@ void ExecutionGeneratorTestRunner::runTestCase(
while (true) {
ParseResult result = sql_parser_.getNextStatement();
- OptimizerContext optimizer_context(0 /* query_id */,
- test_database_loader_.catalog_database(),
+ OptimizerContext optimizer_context(test_database_loader_.catalog_database(),
test_database_loader_.storage_manager());
if (result.condition != ParseResult::kSuccess) {
@@ -85,7 +84,7 @@ void ExecutionGeneratorTestRunner::runTestCase(
} else {
std::printf("%s\n", result.parsed_statement->toString().c_str());
try {
- QueryHandle query_handle(optimizer_context.query_id());
+ QueryHandle query_handle(0 /* query_id */);
LogicalGenerator logical_generator(&optimizer_context);
PhysicalGenerator physical_generator;
ExecutionGenerator execution_generator(&optimizer_context,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8cd5a56c/query_optimizer/tests/OptimizerTest.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/OptimizerTest.cpp b/query_optimizer/tests/OptimizerTest.cpp
index 3734719..57e2d67 100644
--- a/query_optimizer/tests/OptimizerTest.cpp
+++ b/query_optimizer/tests/OptimizerTest.cpp
@@ -59,8 +59,7 @@ OptimizerTest::OptimizerTest()
: catalog_(new Catalog),
catalog_database_(
new CatalogDatabase(catalog_.get(), "TestDatabase" /* name */, 0)),
- optimizer_context_(new OptimizerContext(0 /* query_id */,
- catalog_database_.get(),
+ optimizer_context_(new OptimizerContext(catalog_database_.get(),
nullptr /* storage_manager */)),
physical_generator_(new PhysicalGenerator()) {}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8cd5a56c/query_optimizer/tests/OptimizerTextTestRunner.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/OptimizerTextTestRunner.cpp b/query_optimizer/tests/OptimizerTextTestRunner.cpp
index 251b64b..d790c33 100644
--- a/query_optimizer/tests/OptimizerTextTestRunner.cpp
+++ b/query_optimizer/tests/OptimizerTextTestRunner.cpp
@@ -47,8 +47,7 @@ void OptimizerTextTestRunner::runTestCase(const std::string &input,
sql_parser_.feedNextBuffer(new std::string(input));
ParseResult result = sql_parser_.getNextStatement();
- OptimizerContext optimizer_context(0 /* query_id */,
- test_database_loader_.catalog_database(),
+ OptimizerContext optimizer_context(test_database_loader_.catalog_database(),
nullptr /* storage_manager */);
if (result.condition != ParseResult::kSuccess) {
*output = result.error_message;
[13/13] incubator-quickstep git commit: Updates
Posted by ji...@apache.org.
Updates
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/5e22b396
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/5e22b396
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/5e22b396
Branch: refs/heads/LIP-for-tpch
Commit: 5e22b396c6e26339b01f7bd891f2be6e91db2291
Parents: 43ed533
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Wed Aug 3 20:56:18 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Wed Aug 3 20:56:18 2016 -0500
----------------------------------------------------------------------
cli/QuickstepCli.cpp | 5 -----
query_optimizer/ExecutionGenerator.cpp | 2 --
2 files changed, 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5e22b396/cli/QuickstepCli.cpp
----------------------------------------------------------------------
diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp
index 5811d2c..1a01a84 100644
--- a/cli/QuickstepCli.cpp
+++ b/cli/QuickstepCli.cpp
@@ -512,11 +512,6 @@ int main(int argc, char* argv[]) {
main_thread_client_id, &bus);
end = std::chrono::steady_clock::now();
- if (quickstep::FLAGS_visualize_dag) {
- quickstep::DAGVisualizer visualizer(*query_handle->getQueryPlanMutable());
- std::cerr << "\n" << visualizer.toDOT() << "\n";
- }
-
const CatalogRelation *query_result_relation = query_handle->getQueryResultRelation();
if (query_result_relation) {
PrintToScreen::PrintRelation(*query_result_relation,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5e22b396/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 4204174..d589f58 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -670,8 +670,6 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
key_types.push_back(&left_attribute_type);
}
- std::size_t build_cardinality = cost_model_->estimateCardinality(build_physical);
-
// Convert the residual predicate proto.
QueryContext::predicate_id residual_predicate_index = QueryContext::kInvalidPredicateId;
if (physical_plan->residual_predicate()) {