You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/04/21 17:01:06 UTC
[2/5] incubator-quickstep git commit: Adds support for left/right
outer join (#178)
Adds support for left/right outer join (#178)
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/ef383ef2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/ef383ef2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/ef383ef2
Branch: refs/heads/master
Commit: ef383ef280529a4eecb3cf522ec5217b05af1812
Parents: 2577cf7
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Wed Apr 20 12:39:03 2016 -0500
Committer: Jignesh Patel <pa...@users.noreply.github.com>
Committed: Wed Apr 20 12:39:03 2016 -0500
----------------------------------------------------------------------
query_optimizer/ExecutionGenerator.cpp | 16 ++
query_optimizer/expressions/ExpressionUtil.cpp | 19 ++
query_optimizer/expressions/ExpressionUtil.hpp | 39 ++++
query_optimizer/logical/CMakeLists.txt | 1 +
query_optimizer/logical/HashJoin.hpp | 38 +++-
query_optimizer/physical/HashJoin.hpp | 5 +-
query_optimizer/resolver/CMakeLists.txt | 2 +
query_optimizer/resolver/NameResolver.hpp | 24 +++
query_optimizer/resolver/Resolver.cpp | 44 +++-
query_optimizer/rules/CMakeLists.txt | 4 +
query_optimizer/rules/GenerateJoins.cpp | 90 +++++++++
query_optimizer/rules/PushDownFilter.cpp | 18 +-
query_optimizer/rules/PushDownSemiAntiJoin.cpp | 9 +-
query_optimizer/strategy/Join.cpp | 3 +
.../tests/execution_generator/Join.test | 99 +++++++++
.../tests/logical_generator/Join.test | 182 +++++++++++++++++
.../tests/physical_generator/Join.test | 164 +++++++++++++++
query_optimizer/tests/resolver/Join.test | 141 ++++++++++++-
relational_operators/CMakeLists.txt | 2 +-
relational_operators/HashJoinOperator.cpp | 198 +++++++++++++++++-
relational_operators/HashJoinOperator.hpp | 136 ++++++++++++-
relational_operators/WorkOrder.proto | 44 ++--
relational_operators/WorkOrderFactory.cpp | 202 +++++++++++++------
storage/HashTable.hpp | 30 +--
24 files changed, 1393 insertions(+), 117 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 38ab09e..7a8e996 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -607,6 +607,18 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
const CatalogRelationInfo *probe_operator_info =
findRelationInfoOutputByPhysical(probe_physical);
+ // Create a vector that indicates whether each project expression is using
+ // attributes from the build relation as input. This information is required
+ // by the current implementation of hash left outer join
+ std::unique_ptr<std::vector<bool>> is_selection_on_build;
+ if (physical_plan->join_type() == P::HashJoin::JoinType::kLeftOuterJoin) {
+ is_selection_on_build.reset(
+ new std::vector<bool>(
+ E::MarkExpressionsReferingAnyAttribute(
+ physical_plan->project_expressions(),
+ build_physical->getOutputAttributes())));
+ }
+
// FIXME(quickstep-team): Add support for self-join.
if (build_relation_info->relation == probe_operator_info->relation) {
THROW_SQL_ERROR() << "Self-join is not supported";
@@ -664,6 +676,9 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
case P::HashJoin::JoinType::kLeftAntiJoin:
join_type = HashJoinOperator::JoinType::kLeftAntiJoin;
break;
+ case P::HashJoin::JoinType::kLeftOuterJoin:
+ join_type = HashJoinOperator::JoinType::kLeftOuterJoin;
+ break;
default:
LOG(FATAL) << "Invalid physical::HashJoin::JoinType: "
<< static_cast<typename std::underlying_type<P::HashJoin::JoinType>::type>(
@@ -684,6 +699,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
join_hash_table_index,
residual_predicate_index,
project_expressions_group_index,
+ is_selection_on_build.get(),
join_type));
insert_destination_proto->set_relational_op_index(join_operator_index);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/expressions/ExpressionUtil.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/ExpressionUtil.cpp b/query_optimizer/expressions/ExpressionUtil.cpp
index 5cf7759..3ccd3c9 100644
--- a/query_optimizer/expressions/ExpressionUtil.cpp
+++ b/query_optimizer/expressions/ExpressionUtil.cpp
@@ -37,6 +37,25 @@ AttributeReferencePtr ToRef(const NamedExpressionPtr &expression) {
AttributeReferenceScope::kLocal);
}
+std::vector<AttributeReferencePtr> GetNullableAttributeVector(
+ const std::vector<AttributeReferencePtr> &attributes) {
+ std::vector<AttributeReferencePtr> nullable_attributes;
+ for (const auto &attr : attributes) {
+ if (!attr->getValueType().isNullable()) {
+ nullable_attributes.emplace_back(
+ AttributeReference::Create(attr->id(),
+ attr->attribute_name(),
+ attr->attribute_alias(),
+ attr->relation_name(),
+ attr->getValueType().getNullableVersion(),
+ attr->scope()));
+ } else {
+ nullable_attributes.emplace_back(attr);
+ }
+ }
+ return nullable_attributes;
+}
+
std::vector<AttributeReferencePtr> GetAttributeReferencesWithinScope(
const std::vector<AttributeReferencePtr> &attributes,
const AttributeReferenceScope scope) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/expressions/ExpressionUtil.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/ExpressionUtil.hpp b/query_optimizer/expressions/ExpressionUtil.hpp
index cc9fbbe..4c35719 100644
--- a/query_optimizer/expressions/ExpressionUtil.hpp
+++ b/query_optimizer/expressions/ExpressionUtil.hpp
@@ -117,6 +117,16 @@ bool SubsetOfExpressions(
}
/**
+ * @brief Make a copy of the input attribute vector with each attribute's type
+ * converted as nullable.
+ *
+ * @param attributes The input attribute vector.
+ * @return The nullable copy of the input attribute vector.
+ */
+std::vector<AttributeReferencePtr> GetNullableAttributeVector(
+ const std::vector<AttributeReferencePtr> &attributes);
+
+/**
* @brief Returns a reference to this named expression.
*
* @return An AttributeReference of this named expression.
@@ -124,6 +134,35 @@ bool SubsetOfExpressions(
AttributeReferencePtr ToRef(const NamedExpressionPtr &expression);
/**
+ * @brief Given a vector of expressions and a vector of attributes, return a
+ * bool vector that indicates whether each expression is refering to
+ * ANY attribute in the attribute vector.
+ *
+ * @param expressions The vector of expressions to be checked.
+ * @param attributes The vector of attributes to be checked.
+ * @return A vector of bools indicating whether each expression is refering to
+ * any attribute in the attribute vector. Note that the length of this
+ * bool vector equals the length of \p expressions.
+ */
+template <class NamedExpressionType>
+std::vector<bool> MarkExpressionsReferingAnyAttribute(
+ const std::vector<std::shared_ptr<const NamedExpressionType>> &expressions,
+ const std::vector<AttributeReferencePtr> &attributes) {
+ std::vector<bool> matches;
+ UnorderedAttributeSet attr_set(attributes.begin(), attributes.end());
+ for (std::size_t i = 0; i < expressions.size(); ++i) {
+ for (const auto referenced_attr : expressions[i]->getReferencedAttributes()) {
+ if (attr_set.find(referenced_attr) != attr_set.end()) {
+ matches.emplace_back(true);
+ } else {
+ matches.emplace_back(false);
+ }
+ }
+ }
+ return matches;
+}
+
+/**
* @brief Filter a vector of AttributeReferencePtr to get those which are in
* the specified scope.
*
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/logical/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/logical/CMakeLists.txt b/query_optimizer/logical/CMakeLists.txt
index 0467233..61c6234 100644
--- a/query_optimizer/logical/CMakeLists.txt
+++ b/query_optimizer/logical/CMakeLists.txt
@@ -125,6 +125,7 @@ target_link_libraries(quickstep_queryoptimizer_logical_HashJoin
glog
quickstep_queryoptimizer_OptimizerTree
quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExpressionUtil
quickstep_queryoptimizer_expressions_Predicate
quickstep_queryoptimizer_logical_BinaryJoin
quickstep_queryoptimizer_logical_Logical
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/logical/HashJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/logical/HashJoin.hpp b/query_optimizer/logical/HashJoin.hpp
index 2ae30cf..9052a23 100644
--- a/query_optimizer/logical/HashJoin.hpp
+++ b/query_optimizer/logical/HashJoin.hpp
@@ -27,6 +27,7 @@
#include "query_optimizer/OptimizerTree.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
#include "query_optimizer/expressions/Predicate.hpp"
#include "query_optimizer/logical/BinaryJoin.hpp"
#include "query_optimizer/logical/Logical.hpp"
@@ -57,10 +58,10 @@ class HashJoin : public BinaryJoin {
enum class JoinType {
kInnerJoin = 0,
kLeftSemiJoin,
- kLeftAntiJoin
+ kLeftAntiJoin,
+ kLeftOuterJoin
};
-
LogicalType getLogicalType() const override { return LogicalType::kHashJoin; }
std::string getName() const override {
@@ -71,6 +72,8 @@ class HashJoin : public BinaryJoin {
return "HashLeftSemiJoin";
case JoinType::kLeftAntiJoin:
return "HashLeftAntiJoin";
+ case JoinType::kLeftOuterJoin:
+ return "HashLeftOuterJoin";
default:
LOG(FATAL) << "Invalid JoinType: "
<< static_cast<typename std::underlying_type<JoinType>::type>(join_type_);
@@ -116,12 +119,36 @@ class HashJoin : public BinaryJoin {
join_type_);
}
+ std::vector<expressions::AttributeReferencePtr> getOutputAttributes() const override {
+ if (join_type_ != JoinType::kLeftOuterJoin) {
+ return BinaryJoin::getOutputAttributes();
+ }
+
+ // For left outer join, all the output attributes from the right relation
+ // must be nullable.
+ std::vector<expressions::AttributeReferencePtr> output_attributes =
+ left()->getOutputAttributes();
+ const std::vector<expressions::AttributeReferencePtr> right_nullable_output_attributes =
+ GetNullableAttributeVector(right()->getOutputAttributes());
+ output_attributes.insert(output_attributes.end(),
+ right_nullable_output_attributes.begin(),
+ right_nullable_output_attributes.end());
+ return output_attributes;
+ }
+
std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override {
std::vector<expressions::AttributeReferencePtr> referenced_attributes =
left_join_attributes_;
referenced_attributes.insert(referenced_attributes.end(),
right_join_attributes_.begin(),
right_join_attributes_.end());
+ if (residual_predicate_ != nullptr) {
+ const std::vector<expressions::AttributeReferencePtr> referenced_attributes_in_residual =
+ residual_predicate_->getReferencedAttributes();
+ referenced_attributes.insert(referenced_attributes.end(),
+ referenced_attributes_in_residual.begin(),
+ referenced_attributes_in_residual.end());
+ }
return referenced_attributes;
}
@@ -170,6 +197,11 @@ class HashJoin : public BinaryJoin {
container_child_field_names,
container_child_fields);
+ if (residual_predicate_ != nullptr) {
+ non_container_child_field_names->push_back("residual_predicate");
+ non_container_child_fields->push_back(residual_predicate_);
+ }
+
container_child_field_names->push_back("left_join_attributes");
container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(left_join_attributes_));
container_child_field_names->push_back("right_join_attributes");
@@ -189,7 +221,7 @@ class HashJoin : public BinaryJoin {
residual_predicate_(residual_predicate),
join_type_(join_type) {
DCHECK_EQ(left_join_attributes.size(), right_join_attributes.size());
- DCHECK(!left_join_attributes.empty());
+ DCHECK(!left_join_attributes.empty() || residual_predicate != nullptr);
if (residual_predicate_ != nullptr) {
addInputExpression(residual_predicate_);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/physical/HashJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.hpp b/query_optimizer/physical/HashJoin.hpp
index 80e7c2f..b904b5f 100644
--- a/query_optimizer/physical/HashJoin.hpp
+++ b/query_optimizer/physical/HashJoin.hpp
@@ -56,7 +56,8 @@ class HashJoin : public BinaryJoin {
enum class JoinType {
kInnerJoin = 0,
kLeftSemiJoin,
- kLeftAntiJoin
+ kLeftAntiJoin,
+ kLeftOuterJoin
};
PhysicalType getPhysicalType() const override { return PhysicalType::kHashJoin; }
@@ -69,6 +70,8 @@ class HashJoin : public BinaryJoin {
return "HashLeftSemiJoin";
case JoinType::kLeftAntiJoin:
return "HashLeftAntiJoin";
+ case JoinType::kLeftOuterJoin:
+ return "HashLeftOuterJoin";
default:
LOG(FATAL) << "Invalid JoinType: "
<< static_cast<typename std::underlying_type<JoinType>::type>(join_type_);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/resolver/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/resolver/CMakeLists.txt b/query_optimizer/resolver/CMakeLists.txt
index 854ace1..db2a8af 100644
--- a/query_optimizer/resolver/CMakeLists.txt
+++ b/query_optimizer/resolver/CMakeLists.txt
@@ -26,6 +26,7 @@ target_link_libraries(quickstep_queryoptimizer_resolver_NameResolver
quickstep_catalog_CatalogRelation
quickstep_parser_ParseString
quickstep_queryoptimizer_expressions_AttributeReference
+ quickstep_queryoptimizer_expressions_ExpressionUtil
quickstep_queryoptimizer_logical_Logical
quickstep_utility_Macros
quickstep_utility_SqlError
@@ -94,6 +95,7 @@ target_link_libraries(quickstep_queryoptimizer_resolver_Resolver
quickstep_queryoptimizer_logical_DeleteTuples
quickstep_queryoptimizer_logical_DropTable
quickstep_queryoptimizer_logical_Filter
+ quickstep_queryoptimizer_logical_HashJoin
quickstep_queryoptimizer_logical_InsertSelection
quickstep_queryoptimizer_logical_InsertTuple
quickstep_queryoptimizer_logical_Logical
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/resolver/NameResolver.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/resolver/NameResolver.hpp b/query_optimizer/resolver/NameResolver.hpp
index ea3f1e4..1618f4e 100644
--- a/query_optimizer/resolver/NameResolver.hpp
+++ b/query_optimizer/resolver/NameResolver.hpp
@@ -25,6 +25,7 @@
#include <vector>
#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
#include "query_optimizer/logical/Logical.hpp"
#include "utility/Macros.hpp"
#include "utility/StringUtil.hpp"
@@ -118,6 +119,29 @@ class NameResolver {
return std::string("$").append(name);
}
+ /**
+ * @return Get the index in the NameResolver for the next relation.
+ */
+ std::size_t nextScopedRelationPosition() const {
+ return relations_.size();
+ }
+
+ /**
+ * @brief Make the output attributes of all the relations with indices between
+ * start_relation_id and end_relation_id (excluded) to be nullable.
+ *
+ * @param start_relation_id The starting index of the relation for which
+ * the nullability of output attributes should be changed.
+ * @param end_relation_id The ending relation index.
+ */
+ void makeOutputAttributesNullable(std::size_t start_relation_id,
+ std::size_t end_relation_id) {
+ for (std::size_t idx = start_relation_id; idx < end_relation_id; ++idx) {
+ relations_[idx]->attributes =
+ expressions::GetNullableAttributeVector(relations_[idx]->attributes);
+ }
+ }
+
private:
/**
* @brief Info of a relation and its visible attributes in the name scope.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/resolver/Resolver.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/resolver/Resolver.cpp b/query_optimizer/resolver/Resolver.cpp
index 242a1f0..f70dae0 100644
--- a/query_optimizer/resolver/Resolver.cpp
+++ b/query_optimizer/resolver/Resolver.cpp
@@ -19,6 +19,7 @@
#include "query_optimizer/resolver/Resolver.hpp"
+#include <algorithm>
#include <map>
#include <memory>
#include <set>
@@ -90,6 +91,7 @@
#include "query_optimizer/logical/DeleteTuples.hpp"
#include "query_optimizer/logical/DropTable.hpp"
#include "query_optimizer/logical/Filter.hpp"
+#include "query_optimizer/logical/HashJoin.hpp"
#include "query_optimizer/logical/InsertSelection.hpp"
#include "query_optimizer/logical/InsertTuple.hpp"
#include "query_optimizer/logical/MultiwayCartesianJoin.hpp"
@@ -1606,24 +1608,52 @@ L::LogicalPtr Resolver::resolveGeneratorTableReference(
L::LogicalPtr Resolver::resolveJoinedTableReference(
const ParseJoinedTableReference &joined_table_reference,
NameResolver *name_resolver) {
- const L::LogicalPtr left_table =
+ std::size_t start_relation_idx_for_left = name_resolver->nextScopedRelationPosition();
+ L::LogicalPtr left_table =
resolveTableReference(*joined_table_reference.left_table(), name_resolver);
- const L::LogicalPtr right_table =
+
+ std::size_t start_relation_idx_for_right = name_resolver->nextScopedRelationPosition();
+ L::LogicalPtr right_table =
resolveTableReference(*joined_table_reference.right_table(), name_resolver);
+ std::size_t end_relation_idx = name_resolver->nextScopedRelationPosition();
+
ExpressionResolutionInfo resolution_info(*name_resolver,
"join clause" /* clause_name */,
nullptr /* select_list_info */);
const E::PredicatePtr on_predicate =
resolvePredicate(*joined_table_reference.join_predicate(), &resolution_info);
- if (joined_table_reference.join_type() == ParseJoinedTableReference::JoinType::kInnerJoin) {
- return L::Filter::Create(
- L::MultiwayCartesianJoin::Create({ left_table, right_table }),
- on_predicate);
+ switch (joined_table_reference.join_type()) {
+ case ParseJoinedTableReference::JoinType::kInnerJoin: {
+ return L::Filter::Create(
+ L::MultiwayCartesianJoin::Create({ left_table, right_table }),
+ on_predicate);
+ }
+ case ParseJoinedTableReference::JoinType::kRightOuterJoin: {
+ // Swap the two tables so it becomes a left outer join.
+ std::swap(left_table, right_table);
+ end_relation_idx = start_relation_idx_for_right;
+ start_relation_idx_for_right = start_relation_idx_for_left;
+ } // Fall through
+ case ParseJoinedTableReference::JoinType::kLeftOuterJoin: {
+ name_resolver->makeOutputAttributesNullable(start_relation_idx_for_right,
+ end_relation_idx);
+
+ // left_join_attributes and right_join_attributes will be identified by
+ // GenerateJoins during logical optimization.
+ return L::HashJoin::Create(left_table,
+ right_table,
+ {} /* left_join_attributes */,
+ {} /* right_join_attributes */,
+ on_predicate,
+ L::HashJoin::JoinType::kLeftOuterJoin);
+ }
+ default:
+ break;
}
- THROW_SQL_ERROR_AT(&joined_table_reference) << "Outer joins are not supported yet";
+ THROW_SQL_ERROR_AT(&joined_table_reference) << "Full outer join is not supported yet";
}
void Resolver::resolveSelectClause(
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 3461a5e..bc70d2d 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -51,7 +51,9 @@ target_link_libraries(quickstep_queryoptimizer_rules_GenerateJoins
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ComparisonExpression
quickstep_queryoptimizer_expressions_ExpressionUtil
+ quickstep_queryoptimizer_expressions_LogicalAnd
quickstep_queryoptimizer_expressions_PatternMatcher
+ quickstep_queryoptimizer_expressions_Predicate
quickstep_queryoptimizer_logical_Filter
quickstep_queryoptimizer_logical_HashJoin
quickstep_queryoptimizer_logical_Logical
@@ -64,6 +66,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_GenerateJoins
quickstep_types_operations_comparisons_ComparisonFactory
quickstep_types_operations_comparisons_ComparisonID
quickstep_utility_Macros
+ quickstep_utility_SqlError
quickstep_utility_VectorUtil)
target_link_libraries(quickstep_queryoptimizer_rules_PruneColumns
quickstep_queryoptimizer_expressions_AttributeReference
@@ -79,6 +82,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_PushDownFilter
quickstep_queryoptimizer_expressions_AttributeReference
quickstep_queryoptimizer_expressions_ExpressionUtil
quickstep_queryoptimizer_logical_Filter
+ quickstep_queryoptimizer_logical_HashJoin
quickstep_queryoptimizer_logical_Logical
quickstep_queryoptimizer_logical_PatternMatcher
quickstep_queryoptimizer_rules_Rule
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/rules/GenerateJoins.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/GenerateJoins.cpp b/query_optimizer/rules/GenerateJoins.cpp
index 05fa397..174cf03 100644
--- a/query_optimizer/rules/GenerateJoins.cpp
+++ b/query_optimizer/rules/GenerateJoins.cpp
@@ -19,6 +19,7 @@
#include "query_optimizer/rules/GenerateJoins.hpp"
+#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>
@@ -26,7 +27,9 @@
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ComparisonExpression.hpp"
#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/LogicalAnd.hpp"
#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
#include "query_optimizer/logical/Filter.hpp"
#include "query_optimizer/logical/HashJoin.hpp"
#include "query_optimizer/logical/Logical.hpp"
@@ -37,6 +40,7 @@
#include "query_optimizer/rules/RuleHelper.hpp"
#include "types/operations/comparisons/ComparisonFactory.hpp"
#include "types/operations/comparisons/ComparisonID.hpp"
+#include "utility/SqlError.hpp"
#include "utility/VectorUtil.hpp"
#include "glog/logging.h"
@@ -245,6 +249,7 @@ void AddNestedLoopsJoin(const L::LogicalPtr left_logical,
L::LogicalPtr GenerateJoins::applyToNode(const L::LogicalPtr &input) {
L::FilterPtr filter;
L::MultiwayCartesianJoinPtr cartesian_join;
+ L::HashJoinPtr hash_join;
// Merge filter predicates with cartesian joins to generate NestedLoopsJoin or
// HashJoin.
@@ -391,6 +396,91 @@ L::LogicalPtr GenerateJoins::applyToNode(const L::LogicalPtr &input) {
L::LogicalPtr output = MaybeGenerateNestedLoopsCartesianJoin(cartesian_join->operands());
LOG_APPLYING_RULE(input, output);
return output;
+ } else if (L::SomeHashJoin::MatchesWithConditionalCast(input, &hash_join) &&
+ hash_join->join_type() == L::HashJoin::JoinType::kLeftOuterJoin) {
+ L::LogicalPtr left_logical = hash_join->left();
+ L::LogicalPtr right_logical = hash_join->right();
+
+ const std::vector<E::AttributeReferencePtr> left_relation_attributes =
+ left_logical->getOutputAttributes();
+ const std::vector<E::AttributeReferencePtr> right_relation_attributes =
+ right_logical->getOutputAttributes();
+
+ DCHECK(hash_join->residual_predicate() != nullptr);
+ const std::vector<E::PredicatePtr> on_predicate_items =
+ GetConjunctivePredicates(hash_join->residual_predicate());
+
+ std::vector<E::AttributeReferencePtr> left_join_attributes;
+ std::vector<E::AttributeReferencePtr> right_join_attributes;
+ std::vector<E::PredicatePtr> left_filter_predicates;
+ std::vector<E::PredicatePtr> right_filter_predicates;
+
+ for (const E::PredicatePtr &predicate_item : on_predicate_items) {
+ std::vector<E::AttributeReferencePtr> referenced_attrs =
+ predicate_item->getReferencedAttributes();
+
+ if (E::SubsetOfExpressions(referenced_attrs, left_relation_attributes)) {
+ // The predicate refers attributes only from the left relation.
+ left_filter_predicates.emplace_back(predicate_item);
+ } else if (E::SubsetOfExpressions(referenced_attrs, right_relation_attributes)) {
+ // The predicate refers attributes only from the right relation.
+ right_filter_predicates.emplace_back(predicate_item);
+ } else {
+ // The predicate refers attributes from both relations.
+ //
+ // NOTE(jianqiao): In this case, since currently in the backend we do not
+ // support residual predicates for hash outer joins, so the predicate can
+ // only be an equal comparison between two attributes (one from left and
+ // one from right).
+ E::ComparisonExpressionPtr comp_expr;
+ if (E::SomeComparisonExpression::MatchesWithConditionalCast(predicate_item,
+ &comp_expr)) {
+ E::AttributeReferencePtr left_attr;
+ E::AttributeReferencePtr right_attr;
+ if (comp_expr->isEqualityComparisonPredicate() &&
+ E::SomeAttributeReference::MatchesWithConditionalCast(comp_expr->left(), &left_attr) &&
+ E::SomeAttributeReference::MatchesWithConditionalCast(comp_expr->right(), &right_attr)) {
+ if (E::ContainsExpression(left_relation_attributes, left_attr)) {
+ DCHECK(E::ContainsExpression(right_relation_attributes, right_attr));
+
+ left_join_attributes.emplace_back(left_attr);
+ right_join_attributes.emplace_back(right_attr);
+ } else {
+ DCHECK(E::ContainsExpression(left_relation_attributes, right_attr));
+ DCHECK(E::ContainsExpression(right_relation_attributes, left_attr));
+
+ left_join_attributes.emplace_back(right_attr);
+ right_join_attributes.emplace_back(left_attr);
+ }
+ } else {
+ THROW_SQL_ERROR() << "Join predicate for outer joins must be an "
+ << "equality comparison between attributes";
+ }
+ } else {
+ THROW_SQL_ERROR() << "Non-equality join predicate is not allowed for outer joins";
+ }
+ }
+ }
+
+ if (!left_filter_predicates.empty()) {
+ // NOTE(jianqiao): Filter predicates on left table cannot be pushed down
+ // but can be treated as residual predicates once we have support for that.
+ THROW_SQL_ERROR() << "Filter predicates on left table is not allowed for outer joins";
+ }
+
+ if (!right_filter_predicates.empty()) {
+ const E::PredicatePtr right_predicate =
+ (right_filter_predicates.size() == 1u ? right_filter_predicates[0]
+ : E::LogicalAnd::Create(right_filter_predicates));
+ right_logical = L::Filter::Create(right_logical, right_predicate);
+ }
+
+ return L::HashJoin::Create(left_logical,
+ right_logical,
+ left_join_attributes,
+ right_join_attributes,
+ nullptr /* residual_predicate */,
+ L::HashJoin::JoinType::kLeftOuterJoin);
}
LOG_IGNORING_RULE(input);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/rules/PushDownFilter.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/PushDownFilter.cpp b/query_optimizer/rules/PushDownFilter.cpp
index 41c21e7..5bf5545 100644
--- a/query_optimizer/rules/PushDownFilter.cpp
+++ b/query_optimizer/rules/PushDownFilter.cpp
@@ -1,6 +1,8 @@
/**
* Copyright 2011-2015 Quickstep Technologies LLC.
* Copyright 2015 Pivotal Software, Inc.
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin—Madison.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -17,11 +19,13 @@
#include "query_optimizer/rules/PushDownFilter.hpp"
+#include <cstddef>
#include <vector>
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ExpressionUtil.hpp"
#include "query_optimizer/logical/Filter.hpp"
+#include "query_optimizer/logical/HashJoin.hpp"
#include "query_optimizer/logical/PatternMatcher.hpp"
#include "query_optimizer/rules/Rule.hpp"
#include "query_optimizer/rules/RuleHelper.hpp"
@@ -48,13 +52,23 @@ L::LogicalPtr PushDownFilter::applyToNode(const L::LogicalPtr &input) {
// We consider if the filter predicates can be pushed under the input of the
// Filter.
const L::LogicalPtr &input = filter->input();
- const std::vector<L::LogicalPtr> input_children = input->children();
+ const std::vector<L::LogicalPtr> &input_children = input->children();
// Store the predicates that can be pushed down to be upon each child node
// of the Filter input.
std::vector<std::vector<E::PredicatePtr>> predicates_to_be_pushed(
input_children.size());
if (!input_children.empty()) {
+ std::size_t last_input_index = input_children.size();
+
+ // Cannot push down a Filter down the right child of LeftOuterJoin.
+ L::HashJoinPtr hash_join;
+ if (L::SomeHashJoin::MatchesWithConditionalCast(input, &hash_join) &&
+ hash_join->join_type() == L::HashJoin::JoinType::kLeftOuterJoin) {
+ DCHECK_EQ(2u, input_children.size());
+ last_input_index = 1u;
+ }
+
for (const E::PredicatePtr &conjuntion_item : conjunction_items) {
bool can_be_pushed = false;
const std::vector<E::AttributeReferencePtr> referenced_attributes =
@@ -64,7 +78,7 @@ L::LogicalPtr PushDownFilter::applyToNode(const L::LogicalPtr &input) {
// referenced by the predicate is a subset of output attributes
// of a child.
for (std::vector<L::LogicalPtr>::size_type input_index = 0;
- input_index < input_children.size();
+ input_index < last_input_index;
++input_index) {
if (SubsetOfExpressions(
referenced_attributes,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/rules/PushDownSemiAntiJoin.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/PushDownSemiAntiJoin.cpp b/query_optimizer/rules/PushDownSemiAntiJoin.cpp
index 54af764..d53970e 100644
--- a/query_optimizer/rules/PushDownSemiAntiJoin.cpp
+++ b/query_optimizer/rules/PushDownSemiAntiJoin.cpp
@@ -64,7 +64,14 @@ L::LogicalPtr PushDownSemiAntiJoin::pushDownSemiAntiJoin(
std::vector<L::LogicalPtr> left_input_children = left_input->children();
if (!left_input_children.empty()) {
- const std::vector<L::LogicalPtr>::size_type last_input_index = left_input_children.size();
+ std::vector<L::LogicalPtr>::size_type last_input_index = left_input_children.size();
+ // Cannot push down a Filter down the right child of LeftOuterJoin.
+ L::HashJoinPtr hash_join;
+ if (L::SomeHashJoin::MatchesWithConditionalCast(left_input, &hash_join) &&
+ hash_join->join_type() == L::HashJoin::JoinType::kLeftOuterJoin) {
+ DCHECK_EQ(2u, left_input_children.size());
+ last_input_index = 1u;
+ }
std::vector<L::LogicalPtr>::size_type input_index = 0;
while (input_index < last_input_index) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/strategy/Join.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/strategy/Join.cpp b/query_optimizer/strategy/Join.cpp
index becea61..ceb3f4f 100644
--- a/query_optimizer/strategy/Join.cpp
+++ b/query_optimizer/strategy/Join.cpp
@@ -314,6 +314,9 @@ void Join::addHashJoin(const logical::ProjectPtr &logical_project,
case L::HashJoin::JoinType::kLeftAntiJoin:
join_type = P::HashJoin::JoinType::kLeftAntiJoin;
break;
+ case L::HashJoin::JoinType::kLeftOuterJoin:
+ join_type = P::HashJoin::JoinType::kLeftOuterJoin;
+ break;
default:
LOG(FATAL) << "Invalid logical::HashJoin::JoinType: "
<< static_cast<typename std::underlying_type<L::HashJoin::JoinType>::type>(
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/tests/execution_generator/Join.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/execution_generator/Join.test b/query_optimizer/tests/execution_generator/Join.test
index 6d6d2e1..011d365 100644
--- a/query_optimizer/tests/execution_generator/Join.test
+++ b/query_optimizer/tests/execution_generator/Join.test
@@ -130,3 +130,102 @@ WHERE a1.w = b1.w
| 6| 61| 601| C6|
| 18| 181| 1799| C18|
+-----------+--------------------+------------------------+----------------+
+==
+
+SELECT a.w AS "a.w", b.x AS "b.x", c.y AS "c.y", d.z AS "d.z"
+FROM a LEFT JOIN b ON a.w = b.w
+ LEFT JOIN c ON a.x = c.x
+ LEFT JOIN d ON a.y = d.y
+ORDER BY a.w;
+--
++-----------+--------------------+------------------------+----------------+
+|a.w |b.x |c.y |d.z |
++-----------+--------------------+------------------------+----------------+
+| 0| 0| -1| C0|
+| 1| NULL| NULL| C1|
+| 2| 21| NULL| C2|
+| 3| NULL| 300| C3|
+| 4| 40| NULL| C4|
+| 5| NULL| NULL| C5|
+| 6| 61| 601| C6|
+| 7| NULL| NULL| C7|
+| 8| 80| NULL| C8|
+| 9| NULL| 899| C9|
+| 10| 101| NULL| C10|
+| 11| NULL| NULL| C11|
+| 12| 120| 1200| C12|
+| 13| NULL| NULL| C13|
+| 14| 141| NULL| C14|
+| 15| NULL| 1501| C15|
+| 16| 160| NULL| C16|
+| 17| NULL| NULL| C17|
+| 18| 181| 1799| C18|
+| 19| NULL| NULL| C19|
++-----------+--------------------+------------------------+----------------+
+==
+
+SELECT a.w AS "a.w", b.x AS "b.x", c.y AS "c.y", d.z AS "d.z"
+FROM a LEFT JOIN b ON a.w = b.w
+ LEFT JOIN c ON b.x = c.x
+ LEFT JOIN d ON c.y = d.y
+ORDER BY a.w;
+--
++-----------+--------------------+------------------------+----------------+
+|a.w |b.x |c.y |d.z |
++-----------+--------------------+------------------------+----------------+
+| 0| 0| -1| NULL|
+| 1| NULL| NULL| NULL|
+| 2| 21| NULL| NULL|
+| 3| NULL| NULL| NULL|
+| 4| 40| NULL| NULL|
+| 5| NULL| NULL| NULL|
+| 6| 61| NULL| NULL|
+| 7| NULL| NULL| NULL|
+| 8| 80| NULL| NULL|
+| 9| NULL| NULL| NULL|
+| 10| 101| NULL| NULL|
+| 11| NULL| NULL| NULL|
+| 12| 120| 1200| C12|
+| 13| NULL| NULL| NULL|
+| 14| 141| NULL| NULL|
+| 15| NULL| NULL| NULL|
+| 16| 160| NULL| NULL|
+| 17| NULL| NULL| NULL|
+| 18| 181| NULL| NULL|
+| 19| NULL| NULL| NULL|
++-----------+--------------------+------------------------+----------------+
+==
+
+SELECT b.x AS "b.x", c1.x AS "c1.x", c2.x AS "c2.x"
+FROM b LEFT JOIN c c1 ON (b.x = c1.x)
+ LEFT JOIN c c2 ON (b.x = c2.x AND c2.x > 10)
+ORDER BY b.x;
+--
++--------------------+--------------------+--------------------+
+|b.x |c1.x |c2.x |
++--------------------+--------------------+--------------------+
+| 0| 0| NULL|
+| 21| NULL| NULL|
+| 40| NULL| NULL|
+| 61| NULL| NULL|
+| 80| NULL| NULL|
+| 101| NULL| NULL|
+| 120| 120| 120|
+| 141| NULL| NULL|
+| 160| NULL| NULL|
+| 181| NULL| NULL|
++--------------------+--------------------+--------------------+
+==
+
+SELECT a.w AS "a.w", b.x AS "b.x", c.y AS "c.y", d.z AS "d.z"
+FROM a RIGHT JOIN b ON a.w = b.w
+ JOIN c ON a.x = c.x
+ LEFT JOIN d ON a.y = d.y
+WHERE b.x > 10
+ AND c.y < 1000;
+--
++-----------+--------------------+------------------------+----------------+
+|a.w |b.x |c.y |d.z |
++-----------+--------------------+------------------------+----------------+
+| 6| 61| 601| C6|
++-----------+--------------------+------------------------+----------------+
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/tests/logical_generator/Join.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/logical_generator/Join.test b/query_optimizer/tests/logical_generator/Join.test
index d48df4c..a6452cc 100644
--- a/query_optimizer/tests/logical_generator/Join.test
+++ b/query_optimizer/tests/logical_generator/Join.test
@@ -215,3 +215,185 @@ TopLevelPlan
| +-AttributeReference[id=3,name=z,relation=a1,type=Int]
+-output_attributes=
+-AttributeReference[id=3,name=z,relation=a1,type=Int]
+==
+
+
+# Outer joins
+SELECT a.w, b.x, c.y, d.z
+FROM a LEFT JOIN b ON a.w = b.w
+ LEFT JOIN c ON a.x = c.x
+ LEFT JOIN d ON a.y = d.y;
+--
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=HashLeftOuterJoin
+| | | +-left=HashLeftOuterJoin
+| | | | +-left=TableReference[relation_name=a]
+| | | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | | | +-AttributeReference[id=3,name=z,relation=a,type=Int]
+| | | | +-right=TableReference[relation_name=b]
+| | | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | +-left_join_attributes=
+| | | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | +-right_join_attributes=
+| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | +-right=TableReference[relation_name=c]
+| | | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | +-left_join_attributes=
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | +-right_join_attributes=
+| | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | +-right=TableReference[relation_name=d]
+| | | +-AttributeReference[id=8,name=y,relation=d,type=Int]
+| | | +-AttributeReference[id=9,name=z,relation=d,type=Int]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | +-right_join_attributes=
+| | +-AttributeReference[id=8,name=y,relation=d,type=Int]
+| +-project_list=
+| +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+| +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
++-output_attributes=
+ +-AttributeReference[id=0,name=w,relation=a,type=Int]
+ +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+ +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+ +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
+==
+
+# Push down filtering
+SELECT a.w, b.x, c.y, d.z
+FROM a RIGHT JOIN b ON a.w = b.w
+ JOIN c ON a.x = c.x
+ LEFT JOIN d ON a.y = d.y
+WHERE b.x > 10
+ AND c.y > 20;
+--
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=HashJoin
+| | | +-left=HashLeftOuterJoin
+| | | | +-left=Filter
+| | | | | +-input=TableReference[relation_name=b]
+| | | | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | | +-filter_predicate=Greater
+| | | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | | +-Literal[value=10,type=Int]
+| | | | +-right=TableReference[relation_name=a]
+| | | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | | | +-AttributeReference[id=3,name=z,relation=a,type=Int]
+| | | | +-left_join_attributes=
+| | | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | +-right_join_attributes=
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | +-right=Filter
+| | | | +-input=TableReference[relation_name=c]
+| | | | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | | +-filter_predicate=Greater
+| | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | | +-Literal[value=20,type=Int]
+| | | +-left_join_attributes=
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int NULL]
+| | | +-right_join_attributes=
+| | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | +-right=TableReference[relation_name=d]
+| | | +-AttributeReference[id=8,name=y,relation=d,type=Int]
+| | | +-AttributeReference[id=9,name=z,relation=d,type=Int]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=2,name=y,relation=a,type=Int NULL]
+| | +-right_join_attributes=
+| | +-AttributeReference[id=8,name=y,relation=d,type=Int]
+| +-project_list=
+| +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+| +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
++-output_attributes=
+ +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+ +-AttributeReference[id=5,name=x,relation=b,type=Int]
+ +-AttributeReference[id=7,name=y,relation=c,type=Int]
+ +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
+==
+
+SELECT *
+FROM b LEFT JOIN c ON (b.x = c.x AND c.x > 10);
+--
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=TableReference[relation_name=b]
+| | | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | +-right=Filter
+| | | +-input=TableReference[relation_name=c]
+| | | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| | | +-filter_predicate=Greater
+| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | +-Literal[value=10,type=Int]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | +-right_join_attributes=
+| | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| +-project_list=
+| +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+| +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
++-output_attributes=
+ +-AttributeReference[id=0,name=w,relation=b,type=Int]
+ +-AttributeReference[id=1,name=x,relation=b,type=Int]
+ +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+ +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
+==
+
+# Consider the semantics of this query, a value of b.x WILL BE in the output even if it is no greater than 10.
+SELECT *
+FROM b LEFT JOIN c ON (b.x = c.x AND b.x > 10);
+--
+ERROR: Filter predicates on left table is not allowed for outer joins
+==
+
+# Consider the semantics of this query, a value of b.x WILL NOT BE in the output if it is no greater than 10.
+SELECT *
+FROM b LEFT JOIN c ON b.x = c.x
+WHERE b.x > 10;
+--
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=Filter
+| | | +-input=TableReference[relation_name=b]
+| | | | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | | +-filter_predicate=Greater
+| | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | | +-Literal[value=10,type=Int]
+| | +-right=TableReference[relation_name=c]
+| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | +-right_join_attributes=
+| | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| +-project_list=
+| +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+| +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
++-output_attributes=
+ +-AttributeReference[id=0,name=w,relation=b,type=Int]
+ +-AttributeReference[id=1,name=x,relation=b,type=Int]
+ +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+ +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/tests/physical_generator/Join.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/physical_generator/Join.test b/query_optimizer/tests/physical_generator/Join.test
index c1f49b0..45c9cbd 100644
--- a/query_optimizer/tests/physical_generator/Join.test
+++ b/query_optimizer/tests/physical_generator/Join.test
@@ -235,3 +235,167 @@ TopLevelPlan
| +-AttributeReference[id=9,name=z,relation=d1,type=Int]
+-output_attributes=
+-AttributeReference[id=3,name=z,relation=a1,type=Int]
+==
+
+SELECT a.w, b.x, c.y, d.z
+FROM a LEFT JOIN b ON a.w = b.w
+ LEFT JOIN c ON a.x = c.x
+ LEFT JOIN d ON a.y = d.y;
+--
+TopLevelPlan
++-plan=HashLeftOuterJoin
+| +-left=HashLeftOuterJoin
+| | +-left=HashLeftOuterJoin
+| | | +-left=TableReference[relation=a]
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | | +-AttributeReference[id=3,name=z,relation=a,type=Int]
+| | | +-right=TableReference[relation=b]
+| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | +-project_expressions=
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| | | +-left_join_attributes=
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | +-right_join_attributes=
+| | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | +-right=TableReference[relation=c]
+| | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | +-project_expressions=
+| | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| | | +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | +-right_join_attributes=
+| | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| +-right=TableReference[relation=d]
+| | +-AttributeReference[id=8,name=y,relation=d,type=Int]
+| | +-AttributeReference[id=9,name=z,relation=d,type=Int]
+| +-project_expressions=
+| | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| | +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+| | +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
+| +-left_join_attributes=
+| | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| +-right_join_attributes=
+| +-AttributeReference[id=8,name=y,relation=d,type=Int]
++-output_attributes=
+ +-AttributeReference[id=0,name=w,relation=a,type=Int]
+ +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+ +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+ +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
+==
+
+SELECT a.w, b.x, c.y, d.z
+FROM a RIGHT JOIN b ON a.w = b.w
+ JOIN c ON a.x = c.x
+ LEFT JOIN d ON a.y = d.y
+WHERE b.x > 10
+ AND c.y > 20;
+--
+TopLevelPlan
++-plan=HashLeftOuterJoin
+| +-left=HashJoin
+| | +-left=HashLeftOuterJoin
+| | | +-left=Selection
+| | | | +-input=TableReference[relation=b]
+| | | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | +-filter_predicate=Greater
+| | | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | | +-Literal[value=10,type=Int]
+| | | | +-project_expressions=
+| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | +-right=TableReference[relation=a]
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | | +-AttributeReference[id=3,name=z,relation=a,type=Int]
+| | | +-project_expressions=
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int NULL]
+| | | | +-AttributeReference[id=2,name=y,relation=a,type=Int NULL]
+| | | +-left_join_attributes=
+| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | +-right_join_attributes=
+| | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | +-right=Selection
+| | | +-input=TableReference[relation=c]
+| | | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | +-filter_predicate=Greater
+| | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | | +-Literal[value=20,type=Int]
+| | | +-project_expressions=
+| | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | +-project_expressions=
+| | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+| | | +-AttributeReference[id=2,name=y,relation=a,type=Int NULL]
+| | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=1,name=x,relation=a,type=Int NULL]
+| | +-right_join_attributes=
+| | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| +-right=TableReference[relation=d]
+| | +-AttributeReference[id=8,name=y,relation=d,type=Int]
+| | +-AttributeReference[id=9,name=z,relation=d,type=Int]
+| +-project_expressions=
+| | +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+| | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
+| +-left_join_attributes=
+| | +-AttributeReference[id=2,name=y,relation=a,type=Int NULL]
+| +-right_join_attributes=
+| +-AttributeReference[id=8,name=y,relation=d,type=Int]
++-output_attributes=
+ +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+ +-AttributeReference[id=5,name=x,relation=b,type=Int]
+ +-AttributeReference[id=7,name=y,relation=c,type=Int]
+ +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
+==
+
+SELECT *
+FROM b LEFT JOIN c ON (b.x = c.x AND c.x > 10);
+--
+TopLevelPlan
++-plan=HashLeftOuterJoin
+| +-left=TableReference[relation=b]
+| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| +-right=Selection
+| | +-input=TableReference[relation=c]
+| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| | +-filter_predicate=Greater
+| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | +-Literal[value=10,type=Int]
+| | +-project_expressions=
+| | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| +-project_expressions=
+| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+| | +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
+| +-left_join_attributes=
+| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| +-right_join_attributes=
+| +-AttributeReference[id=2,name=x,relation=c,type=Int]
++-output_attributes=
+ +-AttributeReference[id=0,name=w,relation=b,type=Int]
+ +-AttributeReference[id=1,name=x,relation=b,type=Int]
+ +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+ +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/tests/resolver/Join.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/resolver/Join.test b/query_optimizer/tests/resolver/Join.test
index f501f97..3443e2b 100644
--- a/query_optimizer/tests/resolver/Join.test
+++ b/query_optimizer/tests/resolver/Join.test
@@ -156,17 +156,144 @@ TopLevelPlan
+-AttributeReference[id=3,name=z,relation=a1,type=Int]
==
+
+# Outer joins
SELECT *
-FROM b LEFT JOIN c ON b.x = c.x JOIN d ON c.y = d.y;
+FROM b LEFT JOIN c ON (b.x = c.x AND c.x > 10);
--
-ERROR: Outer joins are not supported yet (2 : 13)
-FROM b LEFT JOIN c ON b.x = c.x JOIN d ON ...
- ^
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=TableReference[relation_name=b]
+| | | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | +-right=TableReference[relation_name=c]
+| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| | +-residual_predicate=And
+| | | +-Equal
+| | | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | +-Greater
+| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | +-Literal[value=10,type=Int]
+| | +-left_join_attributes=
+| | | +-[]
+| | +-right_join_attributes=
+| | +-[]
+| +-project_list=
+| +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+| +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
++-output_attributes=
+ +-AttributeReference[id=0,name=w,relation=b,type=Int]
+ +-AttributeReference[id=1,name=x,relation=b,type=Int]
+ +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+ +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
+==
+
+SELECT a.w, b.x, c.y, a.w + b.x + c.y
+FROM a LEFT JOIN b ON a.w = b.w RIGHT JOIN c ON b.x = c.x;
+--
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=TableReference[relation_name=c]
+| | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | +-right=HashLeftOuterJoin
+| | | +-left=TableReference[relation_name=a]
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | | +-AttributeReference[id=3,name=z,relation=a,type=Int]
+| | | +-right=TableReference[relation_name=b]
+| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | +-residual_predicate=Equal
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | +-left_join_attributes=
+| | | | +-[]
+| | | +-right_join_attributes=
+| | | +-[]
+| | +-residual_predicate=Equal
+| | | +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | +-left_join_attributes=
+| | | +-[]
+| | +-right_join_attributes=
+| | +-[]
+| +-project_list=
+| +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+| +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| +-Alias[id=8,name=,alias=((a.w+b.x)+c.y),relation=,type=Int NULL]
+| +-Add
+| +-Add
+| | +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+| | +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| +-AttributeReference[id=7,name=y,relation=c,type=Int]
++-output_attributes=
+ +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+ +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+ +-AttributeReference[id=7,name=y,relation=c,type=Int]
+ +-AttributeReference[id=8,name=,alias=((a.w+b.x)+c.y),relation=,type=Int NULL]
+==
+
+SELECT a.w, b.x, c.y, a.w + b.x + c.y
+FROM a LEFT JOIN (b RIGHT JOIN c ON b.x = c.x) ON a.w = b.w;
+--
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=TableReference[relation_name=a]
+| | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | +-AttributeReference[id=3,name=z,relation=a,type=Int]
+| | +-right=HashLeftOuterJoin
+| | | +-left=TableReference[relation_name=c]
+| | | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | +-right=TableReference[relation_name=b]
+| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | +-residual_predicate=Equal
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | +-left_join_attributes=
+| | | | +-[]
+| | | +-right_join_attributes=
+| | | +-[]
+| | +-residual_predicate=Equal
+| | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | +-AttributeReference[id=4,name=w,relation=b,type=Int NULL]
+| | +-left_join_attributes=
+| | | +-[]
+| | +-right_join_attributes=
+| | +-[]
+| +-project_list=
+| +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+| +-Alias[id=8,name=,alias=((a.w+b.x)+c.y),relation=,type=Int NULL]
+| +-Add
+| +-Add
+| | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
++-output_attributes=
+ +-AttributeReference[id=0,name=w,relation=a,type=Int]
+ +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+ +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+ +-AttributeReference[id=8,name=,alias=((a.w+b.x)+c.y),relation=,type=Int NULL]
==
SELECT *
-FROM b LEFT JOIN (c JOIN d ON c.y = d.y) ON b.x = c.x;
+FROM b FULL JOIN c ON b.x = c.x;
--
-ERROR: Outer joins are not supported yet (2 : 13)
-FROM b LEFT JOIN (c JOIN d ON c.y = d.y) O...
+ERROR: Full outer join is not supported yet (2 : 13)
+FROM b FULL JOIN c ON b.x = c.x;
^
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index b02bc6b..f204d67 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -177,12 +177,12 @@ target_link_libraries(quickstep_relationaloperators_HashJoinOperator
quickstep_storage_StorageBlockInfo
quickstep_storage_StorageManager
quickstep_storage_SubBlocksReference
+ quickstep_storage_TupleIdSequence
quickstep_storage_TupleReference
quickstep_storage_TupleStorageSubBlock
quickstep_storage_ValueAccessor
quickstep_types_containers_ColumnVectorsValueAccessor
quickstep_utility_Macros
- quickstep_utility_PtrList
tmb)
target_link_libraries(quickstep_relationaloperators_InsertOperator
glog
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index e0076e3..82f6b2a 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -1,6 +1,8 @@
/**
* Copyright 2011-2015 Quickstep Technologies LLC.
* Copyright 2015-2016 Pivotal Software, Inc.
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin—Madison.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -36,6 +38,7 @@
#include "storage/StorageBlockInfo.hpp"
#include "storage/StorageManager.hpp"
#include "storage/SubBlocksReference.hpp"
+#include "storage/TupleIdSequence.hpp"
#include "storage/TupleReference.hpp"
#include "storage/TupleStorageSubBlock.hpp"
#include "storage/ValueAccessor.hpp"
@@ -203,6 +206,38 @@ class SemiAntiJoinTupleCollector {
std::unique_ptr<TupleIdSequence> filter_;
};
+class OuterJoinTupleCollector {
+ public:
+ explicit OuterJoinTupleCollector(const TupleStorageSubBlock &tuple_store) {
+ filter_.reset(tuple_store.getExistenceMap());
+ }
+
+ template <typename ValueAccessorT>
+ inline void operator()(const ValueAccessorT &accessor,
+ const TupleReference &tref) {
+ joined_tuples_[tref.block].emplace_back(tref.tuple, accessor.getCurrentPosition());
+ }
+
+ template <typename ValueAccessorT>
+ inline void recordMatch(const ValueAccessorT &accessor) {
+ filter_->set(accessor.getCurrentPosition(), false);
+ }
+
+ inline std::unordered_map<block_id, std::vector<std::pair<tuple_id, tuple_id>>>*
+ getJoinedTupleMap() {
+ return &joined_tuples_;
+ }
+
+ const TupleIdSequence* filter() const {
+ return filter_.get();
+ }
+
+ private:
+ std::unordered_map<block_id, std::vector<std::pair<tuple_id, tuple_id>>> joined_tuples_;
+ // BitVector on the probe relation. 1 if the corresponding tuple has no match.
+ std::unique_ptr<TupleIdSequence> filter_;
+};
+
} // namespace
bool HashJoinOperator::getAllWorkOrders(
@@ -221,9 +256,13 @@ bool HashJoinOperator::getAllWorkOrders(
case JoinType::kLeftAntiJoin:
return getAllNonOuterJoinWorkOrders<HashAntiJoinWorkOrder>(
container, query_context, storage_manager);
+ case JoinType::kLeftOuterJoin:
+ return getAllOuterJoinWorkOrders(
+ container, query_context, storage_manager);
default:
LOG(FATAL) << "Unknown join type in HashJoinOperator::getAllWorkOrders()";
}
+ return false;
}
template <class JoinWorkOrderClass>
@@ -285,6 +324,65 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
return false;
}
+bool HashJoinOperator::getAllOuterJoinWorkOrders(
+ WorkOrdersContainer *container,
+ QueryContext *query_context,
+ StorageManager *storage_manager) {
+ // We wait until the building of global hash table is complete.
+ if (blocking_dependencies_met_) {
+ DCHECK(query_context != nullptr);
+
+ const vector<unique_ptr<const Scalar>> &selection =
+ query_context->getScalarGroup(selection_index_);
+
+ InsertDestination *output_destination =
+ query_context->getInsertDestination(output_destination_index_);
+ const JoinHashTable &hash_table =
+ *(query_context->getJoinHashTable(hash_table_index_));
+
+ if (probe_relation_is_stored_) {
+ if (!started_) {
+ for (const block_id probe_block_id : probe_relation_block_ids_) {
+ container->addNormalWorkOrder(
+ new HashOuterJoinWorkOrder(
+ build_relation_,
+ probe_relation_,
+ join_key_attributes_,
+ any_join_key_attributes_nullable_,
+ probe_block_id,
+ selection,
+ is_selection_on_build_,
+ hash_table,
+ output_destination,
+ storage_manager),
+ op_index_);
+ }
+ started_ = true;
+ }
+ return started_;
+ } else {
+ while (num_workorders_generated_ < probe_relation_block_ids_.size()) {
+ container->addNormalWorkOrder(
+ new HashOuterJoinWorkOrder(
+ build_relation_,
+ probe_relation_,
+ join_key_attributes_,
+ any_join_key_attributes_nullable_,
+ probe_relation_block_ids_[num_workorders_generated_],
+ selection,
+ is_selection_on_build_,
+ hash_table,
+ output_destination,
+ storage_manager),
+ op_index_);
+ ++num_workorders_generated_;
+ }
+ return done_feeding_input_relation_;
+ } // end else (probe_relation_is_stored_)
+ } // end if (blocking_dependencies_met_)
+ return false;
+}
+
void HashInnerJoinWorkOrder::execute() {
if (FLAGS_vector_based_joined_tuple_collector) {
executeWithCollectorType<VectorBasedJoinedTupleCollector>();
@@ -641,11 +739,107 @@ void HashAntiJoinWorkOrder::executeWithResidualPredicate() {
for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
selection_it != selection_.end();
++selection_it) {
- temp_result.addColumn((*selection_it)->getAllValues(probe_accessor_with_filter.get(),
- &sub_blocks_ref));
+ temp_result.addColumn(
+ (*selection_it)->getAllValues(probe_accessor_with_filter.get(),
+ &sub_blocks_ref));
}
output_destination_->bulkInsertTuples(&temp_result);
}
+void HashOuterJoinWorkOrder::execute() {
+ const relation_id build_relation_id = build_relation_.getID();
+ const relation_id probe_relation_id = probe_relation_.getID();
+
+ const BlockReference probe_block = storage_manager_->getBlock(block_id_,
+ probe_relation_);
+ const TupleStorageSubBlock &probe_store = probe_block->getTupleStorageSubBlock();
+
+ std::unique_ptr<ValueAccessor> probe_accessor(probe_store.createValueAccessor());
+ OuterJoinTupleCollector collector(probe_store);
+ if (join_key_attributes_.size() == 1) {
+ hash_table_.getAllFromValueAccessorWithExtraWorkForFirstMatch(
+ probe_accessor.get(),
+ join_key_attributes_.front(),
+ any_join_key_attributes_nullable_,
+ &collector);
+ } else {
+ hash_table_.getAllFromValueAccessorCompositeKeyWithExtraWorkForFirstMatch(
+ probe_accessor.get(),
+ join_key_attributes_,
+ any_join_key_attributes_nullable_,
+ &collector);
+ }
+
+ // Populate the output tuples for matches.
+ for (const std::pair<const block_id, std::vector<std::pair<tuple_id, tuple_id>>>
+ &build_block_entry : *collector.getJoinedTupleMap()) {
+ const BlockReference build_block =
+ storage_manager_->getBlock(build_block_entry.first, build_relation_);
+ const TupleStorageSubBlock &build_store =
+ build_block->getTupleStorageSubBlock();
+
+ std::unique_ptr<ValueAccessor> build_accessor(
+ build_store.createValueAccessor());
+ ColumnVectorsValueAccessor temp_result;
+ for (auto selection_it = selection_.begin();
+ selection_it != selection_.end();
+ ++selection_it) {
+ temp_result.addColumn(
+ (*selection_it)->getAllValuesForJoin(
+ build_relation_id,
+ build_accessor.get(),
+ probe_relation_id,
+ probe_accessor.get(),
+ build_block_entry.second));
+ }
+ output_destination_->bulkInsertTuples(&temp_result);
+ }
+
+ SubBlocksReference sub_blocks_ref(probe_store,
+ probe_block->getIndices(),
+ probe_block->getIndicesConsistent());
+
+ // Populate the output tuples for non-matches.
+ const TupleIdSequence *filter = collector.filter();
+ const TupleIdSequence::size_type num_tuples_without_matches = filter->size();
+ if (num_tuples_without_matches > 0) {
+ std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
+ probe_store.createValueAccessor(filter));
+ ColumnVectorsValueAccessor temp_result;
+
+ for (std::size_t i = 0; i < selection_.size(); ++i) {
+ if (is_selection_on_build_[i]) {
+ // NOTE(harshad, jianqiao): The assumption here is that any operation
+ // involving NULL as operands will return NULL result. This assumption
+ // will become invalid if later we add support for functions that can
+ // produce non-NULL result with NULL operands, e.g.
+ // CASE WHEN x IS NOT NULL THEN x ELSE 0
+ // or equivalently
+ // COALESCE(x, 0)
+ // where x is an attribute of the build relation.
+ // In that case, this HashOuterJoinWorkOrder needs to be updated to
+ // correctly handle the selections.
+ const Type& column_type = selection_[i]->getType().getNullableVersion();
+ if (NativeColumnVector::UsableForType(column_type)) {
+ NativeColumnVector *result = new NativeColumnVector(
+ column_type, num_tuples_without_matches);
+ result->fillWithNulls();
+ temp_result.addColumn(result);
+ } else {
+ IndirectColumnVector *result = new IndirectColumnVector(
+ column_type, num_tuples_without_matches);
+ result->fillWithValue(TypedValue(column_type.getTypeID()));
+ temp_result.addColumn(result);
+ }
+ } else {
+ temp_result.addColumn(
+ selection_[i]->getAllValues(probe_accessor_with_filter.get(),
+ &sub_blocks_ref));
+ }
+ }
+ output_destination_->bulkInsertTuples(&temp_result);
+ }
+}
+
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index c22f435..fcc087a 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -1,6 +1,8 @@
/**
* Copyright 2011-2015 Quickstep Technologies LLC.
* Copyright 2015-2016 Pivotal Software, Inc.
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin—Madison.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -31,7 +33,6 @@
#include "storage/HashTable.hpp"
#include "storage/StorageBlockInfo.hpp"
#include "utility/Macros.hpp"
-#include "utility/PtrList.hpp"
#include "glog/logging.h"
@@ -61,7 +62,8 @@ class HashJoinOperator : public RelationalOperator {
enum class JoinType {
kInnerJoin = 0,
kLeftSemiJoin,
- kLeftAntiJoin
+ kLeftAntiJoin,
+ kLeftOuterJoin
};
/**
@@ -101,10 +103,14 @@ class HashJoinOperator : public RelationalOperator {
* additional filter to pairs of tuples that match the hash-join (i.e.
* key equality) predicate. Effectively, this makes the join predicate
* the conjunction of the key-equality predicate and residual predicate.
+ * Note that this field does not apply to outer joins.
* @param selection_index The group index of Scalars in QueryContext,
* corresponding to the attributes of the relation referred by
* output_relation_id. Each Scalar is evaluated for the joined tuples,
* and the resulting value is inserted into the join result.
+ * @param is_selection_on_build Whether each selection Scalar is using attributes
+ * from the build relation as input. Can be NULL for inner/semi/anti
+ * joins since this information is not utilized by these joins.
* @param join_type The type of join corresponding to this operator.
**/
HashJoinOperator(const CatalogRelation &build_relation,
@@ -117,6 +123,7 @@ class HashJoinOperator : public RelationalOperator {
const QueryContext::join_hash_table_id hash_table_index,
const QueryContext::predicate_id residual_predicate_index,
const QueryContext::scalar_group_id selection_index,
+ const std::vector<bool> *is_selection_on_build = nullptr,
const JoinType join_type = JoinType::kInnerJoin)
: build_relation_(build_relation),
probe_relation_(probe_relation),
@@ -128,12 +135,19 @@ class HashJoinOperator : public RelationalOperator {
hash_table_index_(hash_table_index),
residual_predicate_index_(residual_predicate_index),
selection_index_(selection_index),
+ is_selection_on_build_(is_selection_on_build == nullptr
+ ? std::vector<bool>()
+ : *is_selection_on_build),
join_type_(join_type),
probe_relation_block_ids_(probe_relation_is_stored
? probe_relation.getBlocksSnapshot()
: std::vector<block_id>()),
num_workorders_generated_(0),
- started_(false) {}
+ started_(false) {
+ DCHECK(join_type != JoinType::kLeftOuterJoin ||
+ (is_selection_on_build != nullptr &&
+ residual_predicate_index == QueryContext::kInvalidPredicateId));
+ }
~HashJoinOperator() override {}
@@ -180,6 +194,10 @@ class HashJoinOperator : public RelationalOperator {
QueryContext *query_context,
StorageManager *storage_manager);
+ bool getAllOuterJoinWorkOrders(WorkOrdersContainer *container,
+ QueryContext *query_context,
+ StorageManager *storage_manager);
+
const CatalogRelation &build_relation_;
const CatalogRelation &probe_relation_;
const bool probe_relation_is_stored_;
@@ -190,6 +208,7 @@ class HashJoinOperator : public RelationalOperator {
const QueryContext::join_hash_table_id hash_table_index_;
const QueryContext::predicate_id residual_predicate_index_;
const QueryContext::scalar_group_id selection_index_;
+ const std::vector<bool> is_selection_on_build_;
const JoinType join_type_;
std::vector<block_id> probe_relation_block_ids_;
@@ -521,7 +540,7 @@ class HashAntiJoinWorkOrder : public WorkOrder {
StorageManager *storage_manager)
: build_relation_(build_relation),
probe_relation_(probe_relation),
- join_key_attributes_(join_key_attributes),
+ join_key_attributes_(std::move(join_key_attributes)),
any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
block_id_(lookup_block_id),
residual_predicate_(residual_predicate),
@@ -560,6 +579,115 @@ class HashAntiJoinWorkOrder : public WorkOrder {
DISALLOW_COPY_AND_ASSIGN(HashAntiJoinWorkOrder);
};
+/**
+ * @brief A left outer join WorkOrder produced by the HashJoinOperator.
+ **/
+class HashOuterJoinWorkOrder : public WorkOrder {
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * @param build_relation The relation that the hash table was originally built
+ * on (i.e. the inner relation in the join).
+ * @param probe_relation The relation to probe the hash table with (i.e. the
+ * outer relation in the join).
+ * @param join_key_attributes The IDs of equijoin attributes in \c
+ * probe_relation.
+ * @param any_join_key_attributes_nullable If any attribute is nullable.
+ * @param hash_table The JoinHashTable to use.
+ * @param selection A list of Scalars corresponding to the relation attributes
+ * in \c output_destination. Each Scalar is evaluated for the joined
+ * tuples, and the resulting value is inserted into the join result.
+ * @param is_selection_on_build Whether each Scalar in the \p selection vector
+ * is using attributes from the build relation as input. Note that the
+ * length of this vector should equal the length of \p selection.
+ * @param lookup_block_id The block id of the probe_relation.
+ * @param output_destination The InsertDestination to insert the join results.
+ * @param storage_manager The StorageManager to use.
+ **/
+ HashOuterJoinWorkOrder(const CatalogRelationSchema &build_relation,
+ const CatalogRelationSchema &probe_relation,
+ const std::vector<attribute_id> &join_key_attributes,
+ const bool any_join_key_attributes_nullable,
+ const block_id lookup_block_id,
+ const std::vector<std::unique_ptr<const Scalar>> &selection,
+ const std::vector<bool> &is_selection_on_build,
+ const JoinHashTable &hash_table,
+ InsertDestination *output_destination,
+ StorageManager *storage_manager)
+ : build_relation_(build_relation),
+ probe_relation_(probe_relation),
+ join_key_attributes_(join_key_attributes),
+ any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
+ block_id_(lookup_block_id),
+ selection_(selection),
+ is_selection_on_build_(is_selection_on_build),
+ hash_table_(hash_table),
+ output_destination_(output_destination),
+ storage_manager_(storage_manager) {}
+
+ /**
+ * @brief Constructor for the distributed version.
+ *
+ * @param build_relation The relation that the hash table was originally built
+ * on (i.e. the inner relation in the join).
+ * @param probe_relation The relation to probe the hash table with (i.e. the
+ * outer relation in the join).
+ * @param join_key_attributes The IDs of equijoin attributes in \c
+ * probe_relation.
+ * @param any_join_key_attributes_nullable If any attribute is nullable.
+ * @param hash_table The JoinHashTable to use.
+ * @param selection A list of Scalars corresponding to the relation attributes
+ * in \c output_destination. Each Scalar is evaluated for the joined
+ * tuples, and the resulting value is inserted into the join result.
+ * @param is_selection_on_build Whether each Scalar in the \p selection vector
+ * is using attributes from the build relation as input. Note that the
+ * length of this vector should equal the length of \p selection.
+ * @param lookup_block_id The block id of the probe_relation.
+ * @param output_destination The InsertDestination to insert the join results.
+ * @param storage_manager The StorageManager to use.
+ **/
+ HashOuterJoinWorkOrder(const CatalogRelationSchema &build_relation,
+ const CatalogRelationSchema &probe_relation,
+ std::vector<attribute_id> &&join_key_attributes,
+ const bool any_join_key_attributes_nullable,
+ const block_id lookup_block_id,
+ const std::vector<std::unique_ptr<const Scalar>> &selection,
+ std::vector<bool> &&is_selection_on_build,
+ const JoinHashTable &hash_table,
+ InsertDestination *output_destination,
+ StorageManager *storage_manager)
+ : build_relation_(build_relation),
+ probe_relation_(probe_relation),
+ join_key_attributes_(std::move(join_key_attributes)),
+ any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
+ block_id_(lookup_block_id),
+ selection_(selection),
+ is_selection_on_build_(std::move(is_selection_on_build)),
+ hash_table_(hash_table),
+ output_destination_(output_destination),
+ storage_manager_(storage_manager) {}
+
+ ~HashOuterJoinWorkOrder() override {}
+
+ void execute() override;
+
+ private:
+ const CatalogRelationSchema &build_relation_;
+ const CatalogRelationSchema &probe_relation_;
+ const std::vector<attribute_id> join_key_attributes_;
+ const bool any_join_key_attributes_nullable_;
+ const block_id block_id_;
+ const std::vector<std::unique_ptr<const Scalar>> &selection_;
+ const std::vector<bool> is_selection_on_build_;
+ const JoinHashTable &hash_table_;
+
+ InsertDestination *output_destination_;
+ StorageManager *storage_manager_;
+
+ DISALLOW_COPY_AND_ASSIGN(HashOuterJoinWorkOrder);
+};
+
/** @} */
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/relational_operators/WorkOrder.proto
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrder.proto b/relational_operators/WorkOrder.proto
index 1a0bcd1..8ed2080 100644
--- a/relational_operators/WorkOrder.proto
+++ b/relational_operators/WorkOrder.proto
@@ -29,20 +29,21 @@ enum WorkOrderType {
DESTROY_HASH = 6;
DROP_TABLE = 7;
FINALIZE_AGGREGATION = 8;
- HASH_INNER_JOIN = 9;
- HASH_SEMI_JOIN = 10;
- HASH_ANTI_JOIN = 11;
- INSERT = 12;
- NESTED_LOOP_JOIN = 13;
- SAMPLE = 14;
- SAVE_BLOCKS = 15;
- SELECT = 16;
- SORT_MERGE_RUN = 17;
- SORT_RUN_GENERATION = 18;
- TABLE_GENERATOR = 19;
- TEXT_SCAN = 20;
- TEXT_SPLIT = 21;
- UPDATE = 22;
+ HASH_ANTI_JOIN = 9;
+ HASH_INNER_JOIN = 10;
+ HASH_OUTER_JOIN = 11;
+ HASH_SEMI_JOIN = 12;
+ INSERT = 13;
+ NESTED_LOOP_JOIN = 14;
+ SAMPLE = 15;
+ SAVE_BLOCKS = 16;
+ SELECT = 17;
+ SORT_MERGE_RUN = 18;
+ SORT_RUN_GENERATION = 19;
+ TABLE_GENERATOR = 20;
+ TEXT_SCAN = 21;
+ TEXT_SPLIT = 22;
+ UPDATE = 23;
}
message WorkOrder {
@@ -151,6 +152,21 @@ message HashSemiJoinWorkOrder {
}
}
+message HashOuterJoinWorkOrder {
+ extend WorkOrder {
+ // All required.
+ optional int32 build_relation_id = 370;
+ optional int32 probe_relation_id = 371;
+ repeated int32 join_key_attributes = 372;
+ optional bool any_join_key_attributes_nullable = 373;
+ optional int32 insert_destination_index = 374;
+ optional uint32 join_hash_table_index = 375;
+ optional int32 selection_index = 376;
+ repeated bool is_selection_on_build = 377;
+ optional fixed64 block_id = 378;
+ }
+}
+
message InsertWorkOrder {
extend WorkOrder {
// All required.