You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/04/21 17:01:05 UTC

[1/5] incubator-quickstep git commit: Adds support for left/right outer join (#178) [Forced Update!]

Repository: incubator-quickstep
Updated Branches:
  refs/heads/master f9b48a8ab -> cd93fd743 (forced update)


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/relational_operators/WorkOrderFactory.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrderFactory.cpp b/relational_operators/WorkOrderFactory.cpp
index 92c1140..964c11c 100644
--- a/relational_operators/WorkOrderFactory.cpp
+++ b/relational_operators/WorkOrderFactory.cpp
@@ -135,11 +135,39 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder
           query_context->getInsertDestination(
               proto.GetExtension(serialization::FinalizeAggregationWorkOrder::insert_destination_index)));
     }
+    case serialization::HASH_ANTI_JOIN: {
+      LOG(INFO) << "Creating HashAntiJoinWorkOrder";
+      vector<attribute_id> join_key_attributes;
+      const int join_key_attributes_size =
+          proto.ExtensionSize(serialization::HashAntiJoinWorkOrder::join_key_attributes);
+      for (int i = 0; i < join_key_attributes_size; ++i) {
+        join_key_attributes.push_back(
+            proto.GetExtension(serialization::HashAntiJoinWorkOrder::join_key_attributes, i));
+      }
+
+      return new HashAntiJoinWorkOrder(
+          catalog_database->getRelationSchemaById(
+              proto.GetExtension(serialization::HashAntiJoinWorkOrder::build_relation_id)),
+          catalog_database->getRelationSchemaById(
+              proto.GetExtension(serialization::HashAntiJoinWorkOrder::probe_relation_id)),
+          move(join_key_attributes),
+          proto.GetExtension(serialization::HashAntiJoinWorkOrder::any_join_key_attributes_nullable),
+          proto.GetExtension(serialization::HashAntiJoinWorkOrder::block_id),
+          query_context->getPredicate(
+              proto.GetExtension(serialization::HashAntiJoinWorkOrder::residual_predicate_index)),
+          query_context->getScalarGroup(
+              proto.GetExtension(serialization::HashAntiJoinWorkOrder::selection_index)),
+          *query_context->getJoinHashTable(
+              proto.GetExtension(serialization::HashAntiJoinWorkOrder::join_hash_table_index)),
+          query_context->getInsertDestination(
+              proto.GetExtension(serialization::HashAntiJoinWorkOrder::insert_destination_index)),
+          storage_manager);
+    }
     case serialization::HASH_INNER_JOIN: {
       LOG(INFO) << "Creating HashInnerJoinWorkOrder";
       vector<attribute_id> join_key_attributes;
       const int join_key_attributes_size =
-          proto.ExtensionSize(serialization::HashAntiJoinWorkOrder::join_key_attributes);
+          proto.ExtensionSize(serialization::HashInnerJoinWorkOrder::join_key_attributes);
       for (int i = 0; i < join_key_attributes_size; ++i) {
         join_key_attributes.push_back(
             proto.GetExtension(serialization::HashInnerJoinWorkOrder::join_key_attributes, i));
@@ -163,60 +191,66 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder
               proto.GetExtension(serialization::HashInnerJoinWorkOrder::insert_destination_index)),
           storage_manager);
     }
-    case serialization::HASH_SEMI_JOIN: {
-      LOG(INFO) << "Creating HashSemiJoinWorkOrder";
+    case serialization::HASH_OUTER_JOIN: {
+      LOG(INFO) << "Creating HashOuterJoinWorkOrder";
       vector<attribute_id> join_key_attributes;
       const int join_key_attributes_size =
-          proto.ExtensionSize(serialization::HashAntiJoinWorkOrder::join_key_attributes);
+          proto.ExtensionSize(serialization::HashOuterJoinWorkOrder::join_key_attributes);
       for (int i = 0; i < join_key_attributes_size; ++i) {
         join_key_attributes.push_back(
-            proto.GetExtension(serialization::HashSemiJoinWorkOrder::join_key_attributes, i));
+            proto.GetExtension(serialization::HashOuterJoinWorkOrder::join_key_attributes, i));
+      }
+      vector<bool> is_selection_on_build;
+      const int is_selection_on_build_size =
+          proto.ExtensionSize(serialization::HashOuterJoinWorkOrder::is_selection_on_build);
+      for (int i = 0; i < is_selection_on_build_size; ++i) {
+        is_selection_on_build.push_back(
+            proto.GetExtension(serialization::HashOuterJoinWorkOrder::is_selection_on_build, i));
       }
 
-      return new HashSemiJoinWorkOrder(
+      return new HashOuterJoinWorkOrder(
           catalog_database->getRelationSchemaById(
-              proto.GetExtension(serialization::HashSemiJoinWorkOrder::build_relation_id)),
+              proto.GetExtension(serialization::HashOuterJoinWorkOrder::build_relation_id)),
           catalog_database->getRelationSchemaById(
-              proto.GetExtension(serialization::HashSemiJoinWorkOrder::probe_relation_id)),
+              proto.GetExtension(serialization::HashOuterJoinWorkOrder::probe_relation_id)),
           move(join_key_attributes),
-          proto.GetExtension(serialization::HashSemiJoinWorkOrder::any_join_key_attributes_nullable),
-          proto.GetExtension(serialization::HashSemiJoinWorkOrder::block_id),
-          query_context->getPredicate(
-              proto.GetExtension(serialization::HashSemiJoinWorkOrder::residual_predicate_index)),
+          proto.GetExtension(serialization::HashOuterJoinWorkOrder::any_join_key_attributes_nullable),
+          proto.GetExtension(serialization::HashOuterJoinWorkOrder::block_id),
           query_context->getScalarGroup(
-              proto.GetExtension(serialization::HashSemiJoinWorkOrder::selection_index)),
+              proto.GetExtension(serialization::HashOuterJoinWorkOrder::selection_index)),
+          move(is_selection_on_build),
           *query_context->getJoinHashTable(
-              proto.GetExtension(serialization::HashSemiJoinWorkOrder::join_hash_table_index)),
+              proto.GetExtension(serialization::HashOuterJoinWorkOrder::join_hash_table_index)),
           query_context->getInsertDestination(
-              proto.GetExtension(serialization::HashSemiJoinWorkOrder::insert_destination_index)),
+              proto.GetExtension(serialization::HashOuterJoinWorkOrder::insert_destination_index)),
           storage_manager);
     }
-    case serialization::HASH_ANTI_JOIN: {
-      LOG(INFO) << "Creating HashAntiJoinWorkOrder";
+    case serialization::HASH_SEMI_JOIN: {
+      LOG(INFO) << "Creating HashSemiJoinWorkOrder";
       vector<attribute_id> join_key_attributes;
       const int join_key_attributes_size =
-          proto.ExtensionSize(serialization::HashAntiJoinWorkOrder::join_key_attributes);
+          proto.ExtensionSize(serialization::HashSemiJoinWorkOrder::join_key_attributes);
       for (int i = 0; i < join_key_attributes_size; ++i) {
         join_key_attributes.push_back(
-            proto.GetExtension(serialization::HashAntiJoinWorkOrder::join_key_attributes, i));
+            proto.GetExtension(serialization::HashSemiJoinWorkOrder::join_key_attributes, i));
       }
 
-      return new HashAntiJoinWorkOrder(
+      return new HashSemiJoinWorkOrder(
           catalog_database->getRelationSchemaById(
-              proto.GetExtension(serialization::HashAntiJoinWorkOrder::build_relation_id)),
+              proto.GetExtension(serialization::HashSemiJoinWorkOrder::build_relation_id)),
           catalog_database->getRelationSchemaById(
-              proto.GetExtension(serialization::HashAntiJoinWorkOrder::probe_relation_id)),
+              proto.GetExtension(serialization::HashSemiJoinWorkOrder::probe_relation_id)),
           move(join_key_attributes),
-          proto.GetExtension(serialization::HashAntiJoinWorkOrder::any_join_key_attributes_nullable),
-          proto.GetExtension(serialization::HashAntiJoinWorkOrder::block_id),
+          proto.GetExtension(serialization::HashSemiJoinWorkOrder::any_join_key_attributes_nullable),
+          proto.GetExtension(serialization::HashSemiJoinWorkOrder::block_id),
           query_context->getPredicate(
-              proto.GetExtension(serialization::HashAntiJoinWorkOrder::residual_predicate_index)),
+              proto.GetExtension(serialization::HashSemiJoinWorkOrder::residual_predicate_index)),
           query_context->getScalarGroup(
-              proto.GetExtension(serialization::HashAntiJoinWorkOrder::selection_index)),
+              proto.GetExtension(serialization::HashSemiJoinWorkOrder::selection_index)),
           *query_context->getJoinHashTable(
-              proto.GetExtension(serialization::HashAntiJoinWorkOrder::join_hash_table_index)),
+              proto.GetExtension(serialization::HashSemiJoinWorkOrder::join_hash_table_index)),
           query_context->getInsertDestination(
-              proto.GetExtension(serialization::HashAntiJoinWorkOrder::insert_destination_index)),
+              proto.GetExtension(serialization::HashSemiJoinWorkOrder::insert_destination_index)),
           storage_manager);
     }
     case serialization::INSERT: {
@@ -452,6 +486,50 @@ bool WorkOrderFactory::ProtoIsValid(const serialization::WorkOrder &proto,
              query_context.isValidInsertDestinationId(
                  proto.GetExtension(serialization::FinalizeAggregationWorkOrder::insert_destination_index));
     }
+    case serialization::HASH_ANTI_JOIN: {
+      if (!proto.HasExtension(serialization::HashAntiJoinWorkOrder::build_relation_id) ||
+          !proto.HasExtension(serialization::HashAntiJoinWorkOrder::probe_relation_id)) {
+        return false;
+      }
+
+      const relation_id build_relation_id =
+          proto.GetExtension(serialization::HashAntiJoinWorkOrder::build_relation_id);
+      if (!catalog_database.hasRelationWithId(build_relation_id)) {
+        return false;
+      }
+
+      const relation_id probe_relation_id =
+          proto.GetExtension(serialization::HashAntiJoinWorkOrder::probe_relation_id);
+      if (!catalog_database.hasRelationWithId(probe_relation_id)) {
+        return false;
+      }
+
+      const CatalogRelationSchema &build_relation = catalog_database.getRelationSchemaById(build_relation_id);
+      const CatalogRelationSchema &probe_relation = catalog_database.getRelationSchemaById(probe_relation_id);
+      for (int i = 0; i < proto.ExtensionSize(serialization::HashAntiJoinWorkOrder::join_key_attributes); ++i) {
+        const attribute_id attr_id =
+            proto.GetExtension(serialization::HashAntiJoinWorkOrder::join_key_attributes, i);
+        if (!build_relation.hasAttributeWithId(attr_id) ||
+            !probe_relation.hasAttributeWithId(attr_id)) {
+          return false;
+        }
+      }
+
+      return proto.HasExtension(serialization::HashAntiJoinWorkOrder::any_join_key_attributes_nullable) &&
+             proto.HasExtension(serialization::HashAntiJoinWorkOrder::insert_destination_index) &&
+             query_context.isValidInsertDestinationId(
+                 proto.GetExtension(serialization::HashAntiJoinWorkOrder::insert_destination_index)) &&
+             proto.HasExtension(serialization::HashAntiJoinWorkOrder::join_hash_table_index) &&
+             query_context.isValidJoinHashTableId(
+                 proto.GetExtension(serialization::HashAntiJoinWorkOrder::join_hash_table_index)) &&
+             proto.HasExtension(serialization::HashAntiJoinWorkOrder::residual_predicate_index) &&
+             query_context.isValidPredicate(
+                 proto.GetExtension(serialization::HashAntiJoinWorkOrder::residual_predicate_index)) &&
+             proto.HasExtension(serialization::HashAntiJoinWorkOrder::selection_index) &&
+             query_context.isValidScalarGroupId(
+                 proto.GetExtension(serialization::HashAntiJoinWorkOrder::selection_index)) &&
+             proto.HasExtension(serialization::HashAntiJoinWorkOrder::block_id);
+    }
     case serialization::HASH_INNER_JOIN: {
       if (!proto.HasExtension(serialization::HashInnerJoinWorkOrder::build_relation_id) ||
           !proto.HasExtension(serialization::HashInnerJoinWorkOrder::probe_relation_id)) {
@@ -496,93 +574,91 @@ bool WorkOrderFactory::ProtoIsValid(const serialization::WorkOrder &proto,
                  proto.GetExtension(serialization::HashInnerJoinWorkOrder::selection_index)) &&
              proto.HasExtension(serialization::HashInnerJoinWorkOrder::block_id);
     }
-    case serialization::HASH_SEMI_JOIN: {
-      if (!proto.HasExtension(serialization::HashSemiJoinWorkOrder::build_relation_id) ||
-          !proto.HasExtension(serialization::HashSemiJoinWorkOrder::probe_relation_id)) {
+    case serialization::HASH_OUTER_JOIN: {
+      if (!proto.HasExtension(serialization::HashOuterJoinWorkOrder::build_relation_id) ||
+          !proto.HasExtension(serialization::HashOuterJoinWorkOrder::probe_relation_id)) {
         return false;
       }
 
       const relation_id build_relation_id =
-          proto.GetExtension(serialization::HashSemiJoinWorkOrder::build_relation_id);
+          proto.GetExtension(serialization::HashOuterJoinWorkOrder::build_relation_id);
       if (!catalog_database.hasRelationWithId(build_relation_id)) {
         return false;
       }
 
       const relation_id probe_relation_id =
-          proto.GetExtension(serialization::HashSemiJoinWorkOrder::probe_relation_id);
+          proto.GetExtension(serialization::HashOuterJoinWorkOrder::probe_relation_id);
       if (!catalog_database.hasRelationWithId(probe_relation_id)) {
         return false;
       }
 
       const CatalogRelationSchema &build_relation = catalog_database.getRelationSchemaById(build_relation_id);
       const CatalogRelationSchema &probe_relation = catalog_database.getRelationSchemaById(probe_relation_id);
-      for (int i = 0; i < proto.ExtensionSize(serialization::HashSemiJoinWorkOrder::join_key_attributes); ++i) {
+      for (int i = 0; i < proto.ExtensionSize(serialization::HashOuterJoinWorkOrder::join_key_attributes); ++i) {
         const attribute_id attr_id =
-            proto.GetExtension(serialization::HashSemiJoinWorkOrder::join_key_attributes, i);
+            proto.GetExtension(serialization::HashOuterJoinWorkOrder::join_key_attributes, i);
         if (!build_relation.hasAttributeWithId(attr_id) ||
             !probe_relation.hasAttributeWithId(attr_id)) {
           return false;
         }
       }
 
-      return proto.HasExtension(serialization::HashSemiJoinWorkOrder::any_join_key_attributes_nullable) &&
-             proto.HasExtension(serialization::HashSemiJoinWorkOrder::insert_destination_index) &&
+      return proto.HasExtension(serialization::HashOuterJoinWorkOrder::any_join_key_attributes_nullable) &&
+             proto.HasExtension(serialization::HashOuterJoinWorkOrder::insert_destination_index) &&
              query_context.isValidInsertDestinationId(
-                 proto.GetExtension(serialization::HashSemiJoinWorkOrder::insert_destination_index)) &&
-             proto.HasExtension(serialization::HashSemiJoinWorkOrder::join_hash_table_index) &&
+                 proto.GetExtension(serialization::HashOuterJoinWorkOrder::insert_destination_index)) &&
+             proto.HasExtension(serialization::HashOuterJoinWorkOrder::join_hash_table_index) &&
              query_context.isValidJoinHashTableId(
-                 proto.GetExtension(serialization::HashSemiJoinWorkOrder::join_hash_table_index)) &&
-             proto.HasExtension(serialization::HashSemiJoinWorkOrder::residual_predicate_index) &&
-             query_context.isValidPredicate(
-                 proto.GetExtension(serialization::HashSemiJoinWorkOrder::residual_predicate_index)) &&
-             proto.HasExtension(serialization::HashSemiJoinWorkOrder::selection_index) &&
+                 proto.GetExtension(serialization::HashOuterJoinWorkOrder::join_hash_table_index)) &&
+             proto.HasExtension(serialization::HashOuterJoinWorkOrder::selection_index) &&
              query_context.isValidScalarGroupId(
-                 proto.GetExtension(serialization::HashSemiJoinWorkOrder::selection_index)) &&
-             proto.HasExtension(serialization::HashSemiJoinWorkOrder::block_id);
+                 proto.GetExtension(serialization::HashOuterJoinWorkOrder::selection_index)) &&
+             proto.HasExtension(serialization::HashOuterJoinWorkOrder::is_selection_on_build) &&
+             proto.HasExtension(serialization::HashOuterJoinWorkOrder::block_id);
     }
-    case serialization::HASH_ANTI_JOIN: {
-      if (!proto.HasExtension(serialization::HashAntiJoinWorkOrder::build_relation_id) ||
-          !proto.HasExtension(serialization::HashAntiJoinWorkOrder::probe_relation_id)) {
+    case serialization::HASH_SEMI_JOIN: {
+      if (!proto.HasExtension(serialization::HashSemiJoinWorkOrder::build_relation_id) ||
+          !proto.HasExtension(serialization::HashSemiJoinWorkOrder::probe_relation_id)) {
         return false;
       }
 
       const relation_id build_relation_id =
-          proto.GetExtension(serialization::HashAntiJoinWorkOrder::build_relation_id);
+          proto.GetExtension(serialization::HashSemiJoinWorkOrder::build_relation_id);
       if (!catalog_database.hasRelationWithId(build_relation_id)) {
         return false;
       }
 
       const relation_id probe_relation_id =
-          proto.GetExtension(serialization::HashAntiJoinWorkOrder::probe_relation_id);
+          proto.GetExtension(serialization::HashSemiJoinWorkOrder::probe_relation_id);
       if (!catalog_database.hasRelationWithId(probe_relation_id)) {
         return false;
       }
 
       const CatalogRelationSchema &build_relation = catalog_database.getRelationSchemaById(build_relation_id);
       const CatalogRelationSchema &probe_relation = catalog_database.getRelationSchemaById(probe_relation_id);
-      for (int i = 0; i < proto.ExtensionSize(serialization::HashAntiJoinWorkOrder::join_key_attributes); ++i) {
+      for (int i = 0; i < proto.ExtensionSize(serialization::HashSemiJoinWorkOrder::join_key_attributes); ++i) {
         const attribute_id attr_id =
-            proto.GetExtension(serialization::HashAntiJoinWorkOrder::join_key_attributes, i);
+            proto.GetExtension(serialization::HashSemiJoinWorkOrder::join_key_attributes, i);
         if (!build_relation.hasAttributeWithId(attr_id) ||
             !probe_relation.hasAttributeWithId(attr_id)) {
           return false;
         }
       }
 
-      return proto.HasExtension(serialization::HashAntiJoinWorkOrder::any_join_key_attributes_nullable) &&
-             proto.HasExtension(serialization::HashAntiJoinWorkOrder::insert_destination_index) &&
+      return proto.HasExtension(serialization::HashSemiJoinWorkOrder::any_join_key_attributes_nullable) &&
+             proto.HasExtension(serialization::HashSemiJoinWorkOrder::insert_destination_index) &&
              query_context.isValidInsertDestinationId(
-                 proto.GetExtension(serialization::HashAntiJoinWorkOrder::insert_destination_index)) &&
-             proto.HasExtension(serialization::HashAntiJoinWorkOrder::join_hash_table_index) &&
+                 proto.GetExtension(serialization::HashSemiJoinWorkOrder::insert_destination_index)) &&
+             proto.HasExtension(serialization::HashSemiJoinWorkOrder::join_hash_table_index) &&
              query_context.isValidJoinHashTableId(
-                 proto.GetExtension(serialization::HashAntiJoinWorkOrder::join_hash_table_index)) &&
-             proto.HasExtension(serialization::HashAntiJoinWorkOrder::residual_predicate_index) &&
+                 proto.GetExtension(serialization::HashSemiJoinWorkOrder::join_hash_table_index)) &&
+             proto.HasExtension(serialization::HashSemiJoinWorkOrder::residual_predicate_index) &&
              query_context.isValidPredicate(
-                 proto.GetExtension(serialization::HashAntiJoinWorkOrder::residual_predicate_index)) &&
-             proto.HasExtension(serialization::HashAntiJoinWorkOrder::selection_index) &&
+                 proto.GetExtension(serialization::HashSemiJoinWorkOrder::residual_predicate_index)) &&
+             proto.HasExtension(serialization::HashSemiJoinWorkOrder::selection_index) &&
              query_context.isValidScalarGroupId(
-                 proto.GetExtension(serialization::HashAntiJoinWorkOrder::selection_index)) &&
-             proto.HasExtension(serialization::HashAntiJoinWorkOrder::block_id);
+                 proto.GetExtension(serialization::HashSemiJoinWorkOrder::selection_index)) &&
+             proto.HasExtension(serialization::HashSemiJoinWorkOrder::block_id);
     }
     case serialization::INSERT: {
       return proto.HasExtension(serialization::InsertWorkOrder::insert_destination_index) &&

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index ef79d11..667848e 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -1,6 +1,8 @@
 /**
  *   Copyright 2011-2015 Quickstep Technologies LLC.
  *   Copyright 2015-2016 Pivotal Software, Inc.
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin—Madison.
  *
  *   Licensed under the Apache License, Version 2.0 (the "License");
  *   you may not use this file except in compliance with the License.
@@ -707,8 +709,8 @@ class HashTable : public HashTableBase<resizable,
                                FunctorT *functor) const;
 
   /**
-   * @brief Lookup (multiple) keys from a ValueAccessor, apply a functor to
-   *        the matching values and additionally call a hasMatch() function of
+   * @brief Lookup (multiple) keys from a ValueAccessor, apply a functor to the
+   *        matching values and additionally call a recordMatch() function of
    *        the functor when the first match for a key is found.
    * @warning This method assumes that no concurrent calls to put(),
    *          putCompositeKey(), putValueAccessor(),
@@ -749,8 +751,8 @@ class HashTable : public HashTableBase<resizable,
       FunctorT *functor) const;
 
   /**
-   * @brief Lookup (multiple) keys from a ValueAccessor, apply a functor to
-   *        the matching values and additionally call a hasMatch() function of
+   * @brief Lookup (multiple) keys from a ValueAccessor, apply a functor to the
+   *        matching values and additionally call a recordMatch() function of
    *        the functor when the first match for a key is found. Composite key
    *        version.
    * @warning This method assumes that no concurrent calls to put(),
@@ -1905,17 +1907,19 @@ void HashTable<ValueT,
       if (check_for_null_keys && key.isNull()) {
         continue;
       }
-      const std::size_t hash_code = adjust_hashes_ ? AdjustHash(key.getHash())
-                                                   : key.getHash();
+      const std::size_t hash_code =
+          adjust_hashes_ ? HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_keys>
+                               ::AdjustHash(key.getHash())
+                         : key.getHash();
       std::size_t entry_num = 0;
       const ValueT *value;
-      if (getNextEntryForKey(key, hash_code, &value, &entry_num)) {
+      if (this->getNextEntryForKey(key, hash_code, &value, &entry_num)) {
         functor->recordMatch(*accessor);
         (*functor)(*accessor, *value);
         if (!allow_duplicate_keys) {
            continue;
         }
-        while (getNextEntryForKey(key, hash_code, &value, &entry_num)) {
+        while (this->getNextEntryForKey(key, hash_code, &value, &entry_num)) {
           (*functor)(*accessor, *value);
         }
       }
@@ -1956,17 +1960,19 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_
         continue;
       }
 
-      const std::size_t hash_code = adjust_hashes_ ? AdjustHash(hashCompositeKey(key_vector))
-                                                   : hashCompositeKey(key_vector);
+      const std::size_t hash_code =
+          adjust_hashes_ ? HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_keys>
+                               ::AdjustHash(this->hashCompositeKey(key_vector))
+                         : this->hashCompositeKey(key_vector);
       std::size_t entry_num = 0;
       const ValueT *value;
-      if (getNextEntryForCompositeKey(key_vector, hash_code, &value, &entry_num)) {
+      if (this->getNextEntryForCompositeKey(key_vector, hash_code, &value, &entry_num)) {
         functor->recordMatch(*accessor);
         (*functor)(*accessor, *value);
         if (!allow_duplicate_keys) {
           continue;
         }
-        while (getNextEntryForCompositeKey(key_vector, hash_code, &value, &entry_num)) {
+        while (this->getNextEntryForCompositeKey(key_vector, hash_code, &value, &entry_num)) {
           (*functor)(*accessor, *value);
         }
       }


[2/5] incubator-quickstep git commit: Adds support for left/right outer join (#178)

Posted by ji...@apache.org.
Adds support for left/right outer join (#178)

Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/ef383ef2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/ef383ef2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/ef383ef2

Branch: refs/heads/master
Commit: ef383ef280529a4eecb3cf522ec5217b05af1812
Parents: 2577cf7
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Wed Apr 20 12:39:03 2016 -0500
Committer: Jignesh Patel <pa...@users.noreply.github.com>
Committed: Wed Apr 20 12:39:03 2016 -0500

----------------------------------------------------------------------
 query_optimizer/ExecutionGenerator.cpp          |  16 ++
 query_optimizer/expressions/ExpressionUtil.cpp  |  19 ++
 query_optimizer/expressions/ExpressionUtil.hpp  |  39 ++++
 query_optimizer/logical/CMakeLists.txt          |   1 +
 query_optimizer/logical/HashJoin.hpp            |  38 +++-
 query_optimizer/physical/HashJoin.hpp           |   5 +-
 query_optimizer/resolver/CMakeLists.txt         |   2 +
 query_optimizer/resolver/NameResolver.hpp       |  24 +++
 query_optimizer/resolver/Resolver.cpp           |  44 +++-
 query_optimizer/rules/CMakeLists.txt            |   4 +
 query_optimizer/rules/GenerateJoins.cpp         |  90 +++++++++
 query_optimizer/rules/PushDownFilter.cpp        |  18 +-
 query_optimizer/rules/PushDownSemiAntiJoin.cpp  |   9 +-
 query_optimizer/strategy/Join.cpp               |   3 +
 .../tests/execution_generator/Join.test         |  99 +++++++++
 .../tests/logical_generator/Join.test           | 182 +++++++++++++++++
 .../tests/physical_generator/Join.test          | 164 +++++++++++++++
 query_optimizer/tests/resolver/Join.test        | 141 ++++++++++++-
 relational_operators/CMakeLists.txt             |   2 +-
 relational_operators/HashJoinOperator.cpp       | 198 +++++++++++++++++-
 relational_operators/HashJoinOperator.hpp       | 136 ++++++++++++-
 relational_operators/WorkOrder.proto            |  44 ++--
 relational_operators/WorkOrderFactory.cpp       | 202 +++++++++++++------
 storage/HashTable.hpp                           |  30 +--
 24 files changed, 1393 insertions(+), 117 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 38ab09e..7a8e996 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -607,6 +607,18 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
   const CatalogRelationInfo *probe_operator_info =
       findRelationInfoOutputByPhysical(probe_physical);
 
+  // Create a vector that indicates whether each project expression is using
+  // attributes from the build relation as input. This information is required
+  // by the current implementation of hash left outer join
+  std::unique_ptr<std::vector<bool>> is_selection_on_build;
+  if (physical_plan->join_type() == P::HashJoin::JoinType::kLeftOuterJoin) {
+    is_selection_on_build.reset(
+        new std::vector<bool>(
+            E::MarkExpressionsReferingAnyAttribute(
+                physical_plan->project_expressions(),
+                build_physical->getOutputAttributes())));
+  }
+
   // FIXME(quickstep-team): Add support for self-join.
   if (build_relation_info->relation == probe_operator_info->relation) {
     THROW_SQL_ERROR() << "Self-join is not supported";
@@ -664,6 +676,9 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
     case P::HashJoin::JoinType::kLeftAntiJoin:
       join_type = HashJoinOperator::JoinType::kLeftAntiJoin;
       break;
+    case P::HashJoin::JoinType::kLeftOuterJoin:
+      join_type = HashJoinOperator::JoinType::kLeftOuterJoin;
+      break;
     default:
       LOG(FATAL) << "Invalid physical::HashJoin::JoinType: "
                  << static_cast<typename std::underlying_type<P::HashJoin::JoinType>::type>(
@@ -684,6 +699,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
               join_hash_table_index,
               residual_predicate_index,
               project_expressions_group_index,
+              is_selection_on_build.get(),
               join_type));
   insert_destination_proto->set_relational_op_index(join_operator_index);
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/expressions/ExpressionUtil.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/ExpressionUtil.cpp b/query_optimizer/expressions/ExpressionUtil.cpp
index 5cf7759..3ccd3c9 100644
--- a/query_optimizer/expressions/ExpressionUtil.cpp
+++ b/query_optimizer/expressions/ExpressionUtil.cpp
@@ -37,6 +37,25 @@ AttributeReferencePtr ToRef(const NamedExpressionPtr &expression) {
                                     AttributeReferenceScope::kLocal);
 }
 
+std::vector<AttributeReferencePtr> GetNullableAttributeVector(
+    const std::vector<AttributeReferencePtr> &attributes) {
+  std::vector<AttributeReferencePtr> nullable_attributes;
+  for (const auto &attr : attributes) {
+    if (!attr->getValueType().isNullable()) {
+      nullable_attributes.emplace_back(
+          AttributeReference::Create(attr->id(),
+                                     attr->attribute_name(),
+                                     attr->attribute_alias(),
+                                     attr->relation_name(),
+                                     attr->getValueType().getNullableVersion(),
+                                     attr->scope()));
+    } else {
+      nullable_attributes.emplace_back(attr);
+    }
+  }
+  return nullable_attributes;
+}
+
 std::vector<AttributeReferencePtr> GetAttributeReferencesWithinScope(
     const std::vector<AttributeReferencePtr> &attributes,
     const AttributeReferenceScope scope) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/expressions/ExpressionUtil.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/ExpressionUtil.hpp b/query_optimizer/expressions/ExpressionUtil.hpp
index cc9fbbe..4c35719 100644
--- a/query_optimizer/expressions/ExpressionUtil.hpp
+++ b/query_optimizer/expressions/ExpressionUtil.hpp
@@ -117,6 +117,16 @@ bool SubsetOfExpressions(
 }
 
 /**
+ * @brief Make a copy of the input attribute vector with each attribute's type
+ *        converted as nullable.
+ *
+ * @param attributes The input attribute vector.
+ * @return The nullable copy of the input attribute vector.
+ */
+std::vector<AttributeReferencePtr> GetNullableAttributeVector(
+    const std::vector<AttributeReferencePtr> &attributes);
+
+/**
  * @brief Returns a reference to this named expression.
  *
  * @return An AttributeReference of this named expression.
@@ -124,6 +134,35 @@ bool SubsetOfExpressions(
 AttributeReferencePtr ToRef(const NamedExpressionPtr &expression);
 
 /**
+ * @brief Given a vector of expressions and a vector of attributes, return a
+ *        bool vector that indicates whether each expression is refering to
+ *        ANY attribute in the attribute vector.
+ *
+ * @param expressions The vector of expressions to be checked.
+ * @param attributes The vector of attributes to be checked.
+ * @return A vector of bools indicating whether each expression is refering to
+ *         any attribute in the attribute vector. Note that the length of this
+ *         bool vector equals the length of \p expressions.
+ */
+template <class NamedExpressionType>
+std::vector<bool> MarkExpressionsReferingAnyAttribute(
+    const std::vector<std::shared_ptr<const NamedExpressionType>> &expressions,
+    const std::vector<AttributeReferencePtr> &attributes) {
+  std::vector<bool> matches;
+  UnorderedAttributeSet attr_set(attributes.begin(), attributes.end());
+  for (std::size_t i = 0; i < expressions.size(); ++i) {
+    for (const auto referenced_attr : expressions[i]->getReferencedAttributes()) {
+      if (attr_set.find(referenced_attr) != attr_set.end()) {
+        matches.emplace_back(true);
+      } else {
+        matches.emplace_back(false);
+      }
+    }
+  }
+  return matches;
+}
+
+/**
  * @brief Filter a vector of AttributeReferencePtr to get those which are in
  *        the specified scope.
  *

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/logical/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/logical/CMakeLists.txt b/query_optimizer/logical/CMakeLists.txt
index 0467233..61c6234 100644
--- a/query_optimizer/logical/CMakeLists.txt
+++ b/query_optimizer/logical/CMakeLists.txt
@@ -125,6 +125,7 @@ target_link_libraries(quickstep_queryoptimizer_logical_HashJoin
                       glog
                       quickstep_queryoptimizer_OptimizerTree
                       quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_queryoptimizer_expressions_ExpressionUtil
                       quickstep_queryoptimizer_expressions_Predicate
                       quickstep_queryoptimizer_logical_BinaryJoin
                       quickstep_queryoptimizer_logical_Logical

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/logical/HashJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/logical/HashJoin.hpp b/query_optimizer/logical/HashJoin.hpp
index 2ae30cf..9052a23 100644
--- a/query_optimizer/logical/HashJoin.hpp
+++ b/query_optimizer/logical/HashJoin.hpp
@@ -27,6 +27,7 @@
 
 #include "query_optimizer/OptimizerTree.hpp"
 #include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
 #include "query_optimizer/expressions/Predicate.hpp"
 #include "query_optimizer/logical/BinaryJoin.hpp"
 #include "query_optimizer/logical/Logical.hpp"
@@ -57,10 +58,10 @@ class HashJoin : public BinaryJoin {
   enum class JoinType {
     kInnerJoin = 0,
     kLeftSemiJoin,
-    kLeftAntiJoin
+    kLeftAntiJoin,
+    kLeftOuterJoin
   };
 
-
   LogicalType getLogicalType() const override { return LogicalType::kHashJoin; }
 
   std::string getName() const override {
@@ -71,6 +72,8 @@ class HashJoin : public BinaryJoin {
         return "HashLeftSemiJoin";
       case JoinType::kLeftAntiJoin:
         return "HashLeftAntiJoin";
+      case JoinType::kLeftOuterJoin:
+        return "HashLeftOuterJoin";
       default:
         LOG(FATAL) << "Invalid JoinType: "
                    << static_cast<typename std::underlying_type<JoinType>::type>(join_type_);
@@ -116,12 +119,36 @@ class HashJoin : public BinaryJoin {
                             join_type_);
   }
 
+  std::vector<expressions::AttributeReferencePtr> getOutputAttributes() const override {
+    if (join_type_ != JoinType::kLeftOuterJoin) {
+      return BinaryJoin::getOutputAttributes();
+    }
+
+    // For left outer join, all the output attributes from the right relation
+    // must be nullable.
+    std::vector<expressions::AttributeReferencePtr> output_attributes =
+        left()->getOutputAttributes();
+    const std::vector<expressions::AttributeReferencePtr> right_nullable_output_attributes =
+        GetNullableAttributeVector(right()->getOutputAttributes());
+    output_attributes.insert(output_attributes.end(),
+                             right_nullable_output_attributes.begin(),
+                             right_nullable_output_attributes.end());
+    return output_attributes;
+  }
+
   std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override {
     std::vector<expressions::AttributeReferencePtr> referenced_attributes =
         left_join_attributes_;
     referenced_attributes.insert(referenced_attributes.end(),
                                  right_join_attributes_.begin(),
                                  right_join_attributes_.end());
+    if (residual_predicate_ != nullptr) {
+      const std::vector<expressions::AttributeReferencePtr> referenced_attributes_in_residual =
+          residual_predicate_->getReferencedAttributes();
+      referenced_attributes.insert(referenced_attributes.end(),
+                                   referenced_attributes_in_residual.begin(),
+                                   referenced_attributes_in_residual.end());
+    }
     return referenced_attributes;
   }
 
@@ -170,6 +197,11 @@ class HashJoin : public BinaryJoin {
                                     container_child_field_names,
                                     container_child_fields);
 
+    if (residual_predicate_ != nullptr) {
+      non_container_child_field_names->push_back("residual_predicate");
+      non_container_child_fields->push_back(residual_predicate_);
+    }
+
     container_child_field_names->push_back("left_join_attributes");
     container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(left_join_attributes_));
     container_child_field_names->push_back("right_join_attributes");
@@ -189,7 +221,7 @@ class HashJoin : public BinaryJoin {
         residual_predicate_(residual_predicate),
         join_type_(join_type) {
     DCHECK_EQ(left_join_attributes.size(), right_join_attributes.size());
-    DCHECK(!left_join_attributes.empty());
+    DCHECK(!left_join_attributes.empty() || residual_predicate != nullptr);
 
     if (residual_predicate_ != nullptr) {
       addInputExpression(residual_predicate_);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/physical/HashJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.hpp b/query_optimizer/physical/HashJoin.hpp
index 80e7c2f..b904b5f 100644
--- a/query_optimizer/physical/HashJoin.hpp
+++ b/query_optimizer/physical/HashJoin.hpp
@@ -56,7 +56,8 @@ class HashJoin : public BinaryJoin {
   enum class JoinType {
     kInnerJoin = 0,
     kLeftSemiJoin,
-    kLeftAntiJoin
+    kLeftAntiJoin,
+    kLeftOuterJoin
   };
 
   PhysicalType getPhysicalType() const override { return PhysicalType::kHashJoin; }
@@ -69,6 +70,8 @@ class HashJoin : public BinaryJoin {
         return "HashLeftSemiJoin";
       case JoinType::kLeftAntiJoin:
         return "HashLeftAntiJoin";
+      case JoinType::kLeftOuterJoin:
+        return "HashLeftOuterJoin";
       default:
         LOG(FATAL) << "Invalid JoinType: "
                    << static_cast<typename std::underlying_type<JoinType>::type>(join_type_);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/resolver/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/resolver/CMakeLists.txt b/query_optimizer/resolver/CMakeLists.txt
index 854ace1..db2a8af 100644
--- a/query_optimizer/resolver/CMakeLists.txt
+++ b/query_optimizer/resolver/CMakeLists.txt
@@ -26,6 +26,7 @@ target_link_libraries(quickstep_queryoptimizer_resolver_NameResolver
                       quickstep_catalog_CatalogRelation
                       quickstep_parser_ParseString
                       quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_queryoptimizer_expressions_ExpressionUtil
                       quickstep_queryoptimizer_logical_Logical
                       quickstep_utility_Macros
                       quickstep_utility_SqlError
@@ -94,6 +95,7 @@ target_link_libraries(quickstep_queryoptimizer_resolver_Resolver
                       quickstep_queryoptimizer_logical_DeleteTuples
                       quickstep_queryoptimizer_logical_DropTable
                       quickstep_queryoptimizer_logical_Filter
+                      quickstep_queryoptimizer_logical_HashJoin
                       quickstep_queryoptimizer_logical_InsertSelection
                       quickstep_queryoptimizer_logical_InsertTuple
                       quickstep_queryoptimizer_logical_Logical

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/resolver/NameResolver.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/resolver/NameResolver.hpp b/query_optimizer/resolver/NameResolver.hpp
index ea3f1e4..1618f4e 100644
--- a/query_optimizer/resolver/NameResolver.hpp
+++ b/query_optimizer/resolver/NameResolver.hpp
@@ -25,6 +25,7 @@
 #include <vector>
 
 #include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
 #include "query_optimizer/logical/Logical.hpp"
 #include "utility/Macros.hpp"
 #include "utility/StringUtil.hpp"
@@ -118,6 +119,29 @@ class NameResolver {
     return std::string("$").append(name);
   }
 
+  /**
+   * @return Get the index in the NameResolver for the next relation.
+   */
+  std::size_t nextScopedRelationPosition() const {
+    return relations_.size();
+  }
+
+  /**
+   * @brief Make the output attributes of all the relations with indices between
+   *        start_relation_id and end_relation_id (excluded) to be nullable.
+   *
+   * @param start_relation_id The starting index of the relation for which
+   *        the nullability of output attributes should be changed.
+   * @param end_relation_id The ending relation index.
+   */
+  void makeOutputAttributesNullable(std::size_t start_relation_id,
+                                    std::size_t end_relation_id) {
+    for (std::size_t idx = start_relation_id; idx < end_relation_id; ++idx) {
+      relations_[idx]->attributes =
+          expressions::GetNullableAttributeVector(relations_[idx]->attributes);
+    }
+  }
+
  private:
   /**
    * @brief Info of a relation and its visible attributes in the name scope.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/resolver/Resolver.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/resolver/Resolver.cpp b/query_optimizer/resolver/Resolver.cpp
index 242a1f0..f70dae0 100644
--- a/query_optimizer/resolver/Resolver.cpp
+++ b/query_optimizer/resolver/Resolver.cpp
@@ -19,6 +19,7 @@
 
 #include "query_optimizer/resolver/Resolver.hpp"
 
+#include <algorithm>
 #include <map>
 #include <memory>
 #include <set>
@@ -90,6 +91,7 @@
 #include "query_optimizer/logical/DeleteTuples.hpp"
 #include "query_optimizer/logical/DropTable.hpp"
 #include "query_optimizer/logical/Filter.hpp"
+#include "query_optimizer/logical/HashJoin.hpp"
 #include "query_optimizer/logical/InsertSelection.hpp"
 #include "query_optimizer/logical/InsertTuple.hpp"
 #include "query_optimizer/logical/MultiwayCartesianJoin.hpp"
@@ -1606,24 +1608,52 @@ L::LogicalPtr Resolver::resolveGeneratorTableReference(
 L::LogicalPtr Resolver::resolveJoinedTableReference(
     const ParseJoinedTableReference &joined_table_reference,
     NameResolver *name_resolver) {
-  const L::LogicalPtr left_table =
+  std::size_t start_relation_idx_for_left = name_resolver->nextScopedRelationPosition();
+  L::LogicalPtr left_table =
       resolveTableReference(*joined_table_reference.left_table(), name_resolver);
-  const L::LogicalPtr right_table =
+
+  std::size_t start_relation_idx_for_right = name_resolver->nextScopedRelationPosition();
+  L::LogicalPtr right_table =
       resolveTableReference(*joined_table_reference.right_table(), name_resolver);
 
+  std::size_t end_relation_idx = name_resolver->nextScopedRelationPosition();
+
   ExpressionResolutionInfo resolution_info(*name_resolver,
                                            "join clause" /* clause_name */,
                                            nullptr /* select_list_info */);
   const E::PredicatePtr on_predicate =
       resolvePredicate(*joined_table_reference.join_predicate(), &resolution_info);
 
-  if (joined_table_reference.join_type() == ParseJoinedTableReference::JoinType::kInnerJoin) {
-    return L::Filter::Create(
-        L::MultiwayCartesianJoin::Create({ left_table, right_table }),
-        on_predicate);
+  switch (joined_table_reference.join_type()) {
+    case ParseJoinedTableReference::JoinType::kInnerJoin: {
+      return L::Filter::Create(
+          L::MultiwayCartesianJoin::Create({ left_table, right_table }),
+          on_predicate);
+    }
+    case ParseJoinedTableReference::JoinType::kRightOuterJoin: {
+      // Swap the two tables so it becomes a left outer join.
+      std::swap(left_table, right_table);
+      end_relation_idx = start_relation_idx_for_right;
+      start_relation_idx_for_right = start_relation_idx_for_left;
+    }  // Fall through
+    case ParseJoinedTableReference::JoinType::kLeftOuterJoin: {
+      name_resolver->makeOutputAttributesNullable(start_relation_idx_for_right,
+                                                  end_relation_idx);
+
+      // left_join_attributes and right_join_attributes will be identified by
+      // GenerateJoins during logical optimization.
+      return L::HashJoin::Create(left_table,
+                                 right_table,
+                                 {} /* left_join_attributes */,
+                                 {} /* right_join_attributes */,
+                                 on_predicate,
+                                 L::HashJoin::JoinType::kLeftOuterJoin);
+    }
+    default:
+      break;
   }
 
-  THROW_SQL_ERROR_AT(&joined_table_reference) << "Outer joins are not supported yet";
+  THROW_SQL_ERROR_AT(&joined_table_reference) << "Full outer join is not supported yet";
 }
 
 void Resolver::resolveSelectClause(

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 3461a5e..bc70d2d 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -51,7 +51,9 @@ target_link_libraries(quickstep_queryoptimizer_rules_GenerateJoins
                       quickstep_queryoptimizer_expressions_AttributeReference
                       quickstep_queryoptimizer_expressions_ComparisonExpression
                       quickstep_queryoptimizer_expressions_ExpressionUtil
+                      quickstep_queryoptimizer_expressions_LogicalAnd
                       quickstep_queryoptimizer_expressions_PatternMatcher
+                      quickstep_queryoptimizer_expressions_Predicate
                       quickstep_queryoptimizer_logical_Filter
                       quickstep_queryoptimizer_logical_HashJoin
                       quickstep_queryoptimizer_logical_Logical
@@ -64,6 +66,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_GenerateJoins
                       quickstep_types_operations_comparisons_ComparisonFactory
                       quickstep_types_operations_comparisons_ComparisonID
                       quickstep_utility_Macros
+                      quickstep_utility_SqlError
                       quickstep_utility_VectorUtil)
 target_link_libraries(quickstep_queryoptimizer_rules_PruneColumns
                       quickstep_queryoptimizer_expressions_AttributeReference
@@ -79,6 +82,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_PushDownFilter
                       quickstep_queryoptimizer_expressions_AttributeReference
                       quickstep_queryoptimizer_expressions_ExpressionUtil
                       quickstep_queryoptimizer_logical_Filter
+                      quickstep_queryoptimizer_logical_HashJoin
                       quickstep_queryoptimizer_logical_Logical
                       quickstep_queryoptimizer_logical_PatternMatcher
                       quickstep_queryoptimizer_rules_Rule

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/rules/GenerateJoins.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/GenerateJoins.cpp b/query_optimizer/rules/GenerateJoins.cpp
index 05fa397..174cf03 100644
--- a/query_optimizer/rules/GenerateJoins.cpp
+++ b/query_optimizer/rules/GenerateJoins.cpp
@@ -19,6 +19,7 @@
 
 #include "query_optimizer/rules/GenerateJoins.hpp"
 
+#include <memory>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -26,7 +27,9 @@
 #include "query_optimizer/expressions/AttributeReference.hpp"
 #include "query_optimizer/expressions/ComparisonExpression.hpp"
 #include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/LogicalAnd.hpp"
 #include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
 #include "query_optimizer/logical/Filter.hpp"
 #include "query_optimizer/logical/HashJoin.hpp"
 #include "query_optimizer/logical/Logical.hpp"
@@ -37,6 +40,7 @@
 #include "query_optimizer/rules/RuleHelper.hpp"
 #include "types/operations/comparisons/ComparisonFactory.hpp"
 #include "types/operations/comparisons/ComparisonID.hpp"
+#include "utility/SqlError.hpp"
 #include "utility/VectorUtil.hpp"
 
 #include "glog/logging.h"
@@ -245,6 +249,7 @@ void AddNestedLoopsJoin(const L::LogicalPtr left_logical,
 L::LogicalPtr GenerateJoins::applyToNode(const L::LogicalPtr &input) {
   L::FilterPtr filter;
   L::MultiwayCartesianJoinPtr cartesian_join;
+  L::HashJoinPtr hash_join;
 
   // Merge filter predicates with cartesian joins to generate NestedLoopsJoin or
   // HashJoin.
@@ -391,6 +396,91 @@ L::LogicalPtr GenerateJoins::applyToNode(const L::LogicalPtr &input) {
     L::LogicalPtr output = MaybeGenerateNestedLoopsCartesianJoin(cartesian_join->operands());
     LOG_APPLYING_RULE(input, output);
     return output;
+  } else if (L::SomeHashJoin::MatchesWithConditionalCast(input, &hash_join) &&
+             hash_join->join_type() == L::HashJoin::JoinType::kLeftOuterJoin) {
+    L::LogicalPtr left_logical = hash_join->left();
+    L::LogicalPtr right_logical = hash_join->right();
+
+    const std::vector<E::AttributeReferencePtr> left_relation_attributes =
+        left_logical->getOutputAttributes();
+    const std::vector<E::AttributeReferencePtr> right_relation_attributes =
+        right_logical->getOutputAttributes();
+
+    DCHECK(hash_join->residual_predicate() != nullptr);
+    const std::vector<E::PredicatePtr> on_predicate_items =
+        GetConjunctivePredicates(hash_join->residual_predicate());
+
+    std::vector<E::AttributeReferencePtr> left_join_attributes;
+    std::vector<E::AttributeReferencePtr> right_join_attributes;
+    std::vector<E::PredicatePtr> left_filter_predicates;
+    std::vector<E::PredicatePtr> right_filter_predicates;
+
+    for (const E::PredicatePtr &predicate_item : on_predicate_items) {
+      std::vector<E::AttributeReferencePtr> referenced_attrs =
+          predicate_item->getReferencedAttributes();
+
+      if (E::SubsetOfExpressions(referenced_attrs, left_relation_attributes)) {
+        // The predicate refers attributes only from the left relation.
+        left_filter_predicates.emplace_back(predicate_item);
+      } else if (E::SubsetOfExpressions(referenced_attrs, right_relation_attributes)) {
+        // The predicate refers attributes only from the right relation.
+        right_filter_predicates.emplace_back(predicate_item);
+      } else {
+        // The predicate refers attributes from both relations.
+        //
+        // NOTE(jianqiao): In this case, since currently in the backend we do not
+        // support residual predicates for hash outer joins, so the predicate can
+        // only be an equal comparison between two attributes (one from left and
+        // one from right).
+        E::ComparisonExpressionPtr comp_expr;
+        if (E::SomeComparisonExpression::MatchesWithConditionalCast(predicate_item,
+                                                                    &comp_expr)) {
+          E::AttributeReferencePtr left_attr;
+          E::AttributeReferencePtr right_attr;
+          if (comp_expr->isEqualityComparisonPredicate() &&
+              E::SomeAttributeReference::MatchesWithConditionalCast(comp_expr->left(), &left_attr) &&
+              E::SomeAttributeReference::MatchesWithConditionalCast(comp_expr->right(), &right_attr)) {
+            if (E::ContainsExpression(left_relation_attributes, left_attr)) {
+              DCHECK(E::ContainsExpression(right_relation_attributes, right_attr));
+
+              left_join_attributes.emplace_back(left_attr);
+              right_join_attributes.emplace_back(right_attr);
+            } else {
+              DCHECK(E::ContainsExpression(left_relation_attributes, right_attr));
+              DCHECK(E::ContainsExpression(right_relation_attributes, left_attr));
+
+              left_join_attributes.emplace_back(right_attr);
+              right_join_attributes.emplace_back(left_attr);
+            }
+          } else {
+            THROW_SQL_ERROR() << "Join predicate for outer joins must be an "
+                              << "equality comparison between attributes";
+          }
+        } else {
+          THROW_SQL_ERROR() << "Non-equality join predicate is not allowed for outer joins";
+        }
+      }
+    }
+
+    if (!left_filter_predicates.empty()) {
+        // NOTE(jianqiao): Filter predicates on left table cannot be pushed down
+        // but can be treated as residual predicates once we have support for that.
+        THROW_SQL_ERROR() << "Filter predicates on left table is not allowed for outer joins";
+    }
+
+    if (!right_filter_predicates.empty()) {
+      const E::PredicatePtr right_predicate =
+          (right_filter_predicates.size() == 1u ? right_filter_predicates[0]
+                                                : E::LogicalAnd::Create(right_filter_predicates));
+      right_logical = L::Filter::Create(right_logical, right_predicate);
+    }
+
+    return L::HashJoin::Create(left_logical,
+                               right_logical,
+                               left_join_attributes,
+                               right_join_attributes,
+                               nullptr /* residual_predicate */,
+                               L::HashJoin::JoinType::kLeftOuterJoin);
   }
 
   LOG_IGNORING_RULE(input);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/rules/PushDownFilter.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/PushDownFilter.cpp b/query_optimizer/rules/PushDownFilter.cpp
index 41c21e7..5bf5545 100644
--- a/query_optimizer/rules/PushDownFilter.cpp
+++ b/query_optimizer/rules/PushDownFilter.cpp
@@ -1,6 +1,8 @@
 /**
  *   Copyright 2011-2015 Quickstep Technologies LLC.
  *   Copyright 2015 Pivotal Software, Inc.
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin—Madison.
  *
  *   Licensed under the Apache License, Version 2.0 (the "License");
  *   you may not use this file except in compliance with the License.
@@ -17,11 +19,13 @@
 
 #include "query_optimizer/rules/PushDownFilter.hpp"
 
+#include <cstddef>
 #include <vector>
 
 #include "query_optimizer/expressions/AttributeReference.hpp"
 #include "query_optimizer/expressions/ExpressionUtil.hpp"
 #include "query_optimizer/logical/Filter.hpp"
+#include "query_optimizer/logical/HashJoin.hpp"
 #include "query_optimizer/logical/PatternMatcher.hpp"
 #include "query_optimizer/rules/Rule.hpp"
 #include "query_optimizer/rules/RuleHelper.hpp"
@@ -48,13 +52,23 @@ L::LogicalPtr PushDownFilter::applyToNode(const L::LogicalPtr &input) {
     // We consider if the filter predicates can be pushed under the input of the
     // Filter.
     const L::LogicalPtr &input = filter->input();
-    const std::vector<L::LogicalPtr> input_children = input->children();
+    const std::vector<L::LogicalPtr> &input_children = input->children();
 
     // Store the predicates that can be pushed down to be upon each child node
     // of the Filter input.
     std::vector<std::vector<E::PredicatePtr>> predicates_to_be_pushed(
         input_children.size());
     if (!input_children.empty()) {
+      std::size_t last_input_index = input_children.size();
+
+      // Cannot push down a Filter down the right child of LeftOuterJoin.
+      L::HashJoinPtr hash_join;
+      if (L::SomeHashJoin::MatchesWithConditionalCast(input, &hash_join) &&
+          hash_join->join_type() == L::HashJoin::JoinType::kLeftOuterJoin) {
+        DCHECK_EQ(2u, input_children.size());
+        last_input_index = 1u;
+      }
+
       for (const E::PredicatePtr &conjuntion_item : conjunction_items) {
         bool can_be_pushed = false;
         const std::vector<E::AttributeReferencePtr> referenced_attributes =
@@ -64,7 +78,7 @@ L::LogicalPtr PushDownFilter::applyToNode(const L::LogicalPtr &input) {
         // referenced by the predicate is a subset of output attributes
         // of a child.
         for (std::vector<L::LogicalPtr>::size_type input_index = 0;
-             input_index < input_children.size();
+             input_index < last_input_index;
              ++input_index) {
           if (SubsetOfExpressions(
                   referenced_attributes,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/rules/PushDownSemiAntiJoin.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/PushDownSemiAntiJoin.cpp b/query_optimizer/rules/PushDownSemiAntiJoin.cpp
index 54af764..d53970e 100644
--- a/query_optimizer/rules/PushDownSemiAntiJoin.cpp
+++ b/query_optimizer/rules/PushDownSemiAntiJoin.cpp
@@ -64,7 +64,14 @@ L::LogicalPtr PushDownSemiAntiJoin::pushDownSemiAntiJoin(
   std::vector<L::LogicalPtr> left_input_children = left_input->children();
 
   if (!left_input_children.empty()) {
-    const std::vector<L::LogicalPtr>::size_type last_input_index = left_input_children.size();
+    std::vector<L::LogicalPtr>::size_type last_input_index = left_input_children.size();
+    // Cannot push down a Filter down the right child of LeftOuterJoin.
+    L::HashJoinPtr hash_join;
+    if (L::SomeHashJoin::MatchesWithConditionalCast(left_input, &hash_join) &&
+        hash_join->join_type() == L::HashJoin::JoinType::kLeftOuterJoin) {
+      DCHECK_EQ(2u, left_input_children.size());
+      last_input_index = 1u;
+    }
 
     std::vector<L::LogicalPtr>::size_type input_index = 0;
     while (input_index < last_input_index) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/strategy/Join.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/strategy/Join.cpp b/query_optimizer/strategy/Join.cpp
index becea61..ceb3f4f 100644
--- a/query_optimizer/strategy/Join.cpp
+++ b/query_optimizer/strategy/Join.cpp
@@ -314,6 +314,9 @@ void Join::addHashJoin(const logical::ProjectPtr &logical_project,
     case L::HashJoin::JoinType::kLeftAntiJoin:
       join_type = P::HashJoin::JoinType::kLeftAntiJoin;
       break;
+    case L::HashJoin::JoinType::kLeftOuterJoin:
+      join_type = P::HashJoin::JoinType::kLeftOuterJoin;
+      break;
     default:
       LOG(FATAL) << "Invalid logical::HashJoin::JoinType: "
                  << static_cast<typename std::underlying_type<L::HashJoin::JoinType>::type>(

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/tests/execution_generator/Join.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/execution_generator/Join.test b/query_optimizer/tests/execution_generator/Join.test
index 6d6d2e1..011d365 100644
--- a/query_optimizer/tests/execution_generator/Join.test
+++ b/query_optimizer/tests/execution_generator/Join.test
@@ -130,3 +130,102 @@ WHERE a1.w = b1.w
 |          6|                  61|                     601|              C6|
 |         18|                 181|                    1799|             C18|
 +-----------+--------------------+------------------------+----------------+
+==
+
+SELECT a.w AS "a.w", b.x AS "b.x", c.y AS "c.y", d.z AS "d.z"
+FROM a LEFT JOIN b ON a.w = b.w
+       LEFT JOIN c ON a.x = c.x
+       LEFT JOIN d ON a.y = d.y
+ORDER BY a.w;
+--
++-----------+--------------------+------------------------+----------------+
+|a.w        |b.x                 |c.y                     |d.z             |
++-----------+--------------------+------------------------+----------------+
+|          0|                   0|                      -1|              C0|
+|          1|                NULL|                    NULL|              C1|
+|          2|                  21|                    NULL|              C2|
+|          3|                NULL|                     300|              C3|
+|          4|                  40|                    NULL|              C4|
+|          5|                NULL|                    NULL|              C5|
+|          6|                  61|                     601|              C6|
+|          7|                NULL|                    NULL|              C7|
+|          8|                  80|                    NULL|              C8|
+|          9|                NULL|                     899|              C9|
+|         10|                 101|                    NULL|             C10|
+|         11|                NULL|                    NULL|             C11|
+|         12|                 120|                    1200|             C12|
+|         13|                NULL|                    NULL|             C13|
+|         14|                 141|                    NULL|             C14|
+|         15|                NULL|                    1501|             C15|
+|         16|                 160|                    NULL|             C16|
+|         17|                NULL|                    NULL|             C17|
+|         18|                 181|                    1799|             C18|
+|         19|                NULL|                    NULL|             C19|
++-----------+--------------------+------------------------+----------------+
+==
+
+SELECT a.w AS "a.w", b.x AS "b.x", c.y AS "c.y", d.z AS "d.z"
+FROM a LEFT JOIN b ON a.w = b.w
+       LEFT JOIN c ON b.x = c.x
+       LEFT JOIN d ON c.y = d.y
+ORDER BY a.w;
+--
++-----------+--------------------+------------------------+----------------+
+|a.w        |b.x                 |c.y                     |d.z             |
++-----------+--------------------+------------------------+----------------+
+|          0|                   0|                      -1|            NULL|
+|          1|                NULL|                    NULL|            NULL|
+|          2|                  21|                    NULL|            NULL|
+|          3|                NULL|                    NULL|            NULL|
+|          4|                  40|                    NULL|            NULL|
+|          5|                NULL|                    NULL|            NULL|
+|          6|                  61|                    NULL|            NULL|
+|          7|                NULL|                    NULL|            NULL|
+|          8|                  80|                    NULL|            NULL|
+|          9|                NULL|                    NULL|            NULL|
+|         10|                 101|                    NULL|            NULL|
+|         11|                NULL|                    NULL|            NULL|
+|         12|                 120|                    1200|             C12|
+|         13|                NULL|                    NULL|            NULL|
+|         14|                 141|                    NULL|            NULL|
+|         15|                NULL|                    NULL|            NULL|
+|         16|                 160|                    NULL|            NULL|
+|         17|                NULL|                    NULL|            NULL|
+|         18|                 181|                    NULL|            NULL|
+|         19|                NULL|                    NULL|            NULL|
++-----------+--------------------+------------------------+----------------+
+==
+
+SELECT b.x AS "b.x", c1.x AS "c1.x", c2.x AS "c2.x"
+FROM b LEFT JOIN c c1 ON (b.x = c1.x)
+       LEFT JOIN c c2 ON (b.x = c2.x AND c2.x > 10)
+ORDER BY b.x;
+--
++--------------------+--------------------+--------------------+
+|b.x                 |c1.x                |c2.x                |
++--------------------+--------------------+--------------------+
+|                   0|                   0|                NULL|
+|                  21|                NULL|                NULL|
+|                  40|                NULL|                NULL|
+|                  61|                NULL|                NULL|
+|                  80|                NULL|                NULL|
+|                 101|                NULL|                NULL|
+|                 120|                 120|                 120|
+|                 141|                NULL|                NULL|
+|                 160|                NULL|                NULL|
+|                 181|                NULL|                NULL|
++--------------------+--------------------+--------------------+
+==
+
+SELECT a.w AS "a.w", b.x AS "b.x", c.y AS "c.y", d.z AS "d.z"
+FROM a RIGHT JOIN b ON a.w = b.w
+       JOIN c ON a.x = c.x
+       LEFT JOIN d ON a.y = d.y
+WHERE b.x > 10
+  AND c.y < 1000;
+--
++-----------+--------------------+------------------------+----------------+
+|a.w        |b.x                 |c.y                     |d.z             |
++-----------+--------------------+------------------------+----------------+
+|          6|                  61|                     601|              C6|
++-----------+--------------------+------------------------+----------------+

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/tests/logical_generator/Join.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/logical_generator/Join.test b/query_optimizer/tests/logical_generator/Join.test
index d48df4c..a6452cc 100644
--- a/query_optimizer/tests/logical_generator/Join.test
+++ b/query_optimizer/tests/logical_generator/Join.test
@@ -215,3 +215,185 @@ TopLevelPlan
 |   +-AttributeReference[id=3,name=z,relation=a1,type=Int]
 +-output_attributes=
   +-AttributeReference[id=3,name=z,relation=a1,type=Int]
+==
+
+
+# Outer joins
+SELECT a.w, b.x, c.y, d.z
+FROM a LEFT JOIN b ON a.w = b.w
+       LEFT JOIN c ON a.x = c.x
+       LEFT JOIN d ON a.y = d.y;
+--
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=HashLeftOuterJoin
+| | | +-left=HashLeftOuterJoin
+| | | | +-left=TableReference[relation_name=a]
+| | | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | | | +-AttributeReference[id=3,name=z,relation=a,type=Int]
+| | | | +-right=TableReference[relation_name=b]
+| | | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | +-left_join_attributes=
+| | | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | +-right_join_attributes=
+| | | |   +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | +-right=TableReference[relation_name=c]
+| | | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | +-left_join_attributes=
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | +-right_join_attributes=
+| | |   +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | +-right=TableReference[relation_name=d]
+| | | +-AttributeReference[id=8,name=y,relation=d,type=Int]
+| | | +-AttributeReference[id=9,name=z,relation=d,type=Int]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | +-right_join_attributes=
+| |   +-AttributeReference[id=8,name=y,relation=d,type=Int]
+| +-project_list=
+|   +-AttributeReference[id=0,name=w,relation=a,type=Int]
+|   +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+|   +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+|   +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=0,name=w,relation=a,type=Int]
+  +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+  +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+  +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
+==
+
+# Push down filtering
+SELECT a.w, b.x, c.y, d.z
+FROM a RIGHT JOIN b ON a.w = b.w
+       JOIN c ON a.x = c.x
+       LEFT JOIN d ON a.y = d.y
+WHERE b.x > 10
+  AND c.y > 20;
+--
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=HashJoin
+| | | +-left=HashLeftOuterJoin
+| | | | +-left=Filter
+| | | | | +-input=TableReference[relation_name=b]
+| | | | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | | +-filter_predicate=Greater
+| | | | |   +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | |   +-Literal[value=10,type=Int]
+| | | | +-right=TableReference[relation_name=a]
+| | | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | | | +-AttributeReference[id=3,name=z,relation=a,type=Int]
+| | | | +-left_join_attributes=
+| | | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | +-right_join_attributes=
+| | | |   +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | +-right=Filter
+| | | | +-input=TableReference[relation_name=c]
+| | | | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | | +-filter_predicate=Greater
+| | | |   +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | |   +-Literal[value=20,type=Int]
+| | | +-left_join_attributes=
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int NULL]
+| | | +-right_join_attributes=
+| | |   +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | +-right=TableReference[relation_name=d]
+| | | +-AttributeReference[id=8,name=y,relation=d,type=Int]
+| | | +-AttributeReference[id=9,name=z,relation=d,type=Int]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=2,name=y,relation=a,type=Int NULL]
+| | +-right_join_attributes=
+| |   +-AttributeReference[id=8,name=y,relation=d,type=Int]
+| +-project_list=
+|   +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+|   +-AttributeReference[id=5,name=x,relation=b,type=Int]
+|   +-AttributeReference[id=7,name=y,relation=c,type=Int]
+|   +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+  +-AttributeReference[id=5,name=x,relation=b,type=Int]
+  +-AttributeReference[id=7,name=y,relation=c,type=Int]
+  +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
+==
+
+SELECT *
+FROM b LEFT JOIN c ON (b.x = c.x AND c.x > 10);
+--
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=TableReference[relation_name=b]
+| | | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | +-right=Filter
+| | | +-input=TableReference[relation_name=c]
+| | | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| | | +-filter_predicate=Greater
+| | |   +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | |   +-Literal[value=10,type=Int]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | +-right_join_attributes=
+| |   +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| +-project_list=
+|   +-AttributeReference[id=0,name=w,relation=b,type=Int]
+|   +-AttributeReference[id=1,name=x,relation=b,type=Int]
+|   +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+|   +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=0,name=w,relation=b,type=Int]
+  +-AttributeReference[id=1,name=x,relation=b,type=Int]
+  +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+  +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
+==
+
+# Consider the semantics of this query, a value of b.x WILL BE in the output even if it is no greater than 10.
+SELECT *
+FROM b LEFT JOIN c ON (b.x = c.x AND b.x > 10);
+--
+ERROR: Filter predicates on left table is not allowed for outer joins
+==
+
+# Consider the semantics of this query, a value of b.x WILL NOT BE in the output if it is no greater than 10.
+SELECT *
+FROM b LEFT JOIN c ON b.x = c.x
+WHERE b.x > 10;
+--
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=Filter
+| | | +-input=TableReference[relation_name=b]
+| | | | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | | +-filter_predicate=Greater
+| | |   +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | |   +-Literal[value=10,type=Int]
+| | +-right=TableReference[relation_name=c]
+| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | +-right_join_attributes=
+| |   +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| +-project_list=
+|   +-AttributeReference[id=0,name=w,relation=b,type=Int]
+|   +-AttributeReference[id=1,name=x,relation=b,type=Int]
+|   +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+|   +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=0,name=w,relation=b,type=Int]
+  +-AttributeReference[id=1,name=x,relation=b,type=Int]
+  +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+  +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/tests/physical_generator/Join.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/physical_generator/Join.test b/query_optimizer/tests/physical_generator/Join.test
index c1f49b0..45c9cbd 100644
--- a/query_optimizer/tests/physical_generator/Join.test
+++ b/query_optimizer/tests/physical_generator/Join.test
@@ -235,3 +235,167 @@ TopLevelPlan
 |   +-AttributeReference[id=9,name=z,relation=d1,type=Int]
 +-output_attributes=
   +-AttributeReference[id=3,name=z,relation=a1,type=Int]
+==
+
+SELECT a.w, b.x, c.y, d.z
+FROM a LEFT JOIN b ON a.w = b.w
+       LEFT JOIN c ON a.x = c.x
+       LEFT JOIN d ON a.y = d.y;
+--
+TopLevelPlan
++-plan=HashLeftOuterJoin
+| +-left=HashLeftOuterJoin
+| | +-left=HashLeftOuterJoin
+| | | +-left=TableReference[relation=a]
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | | +-AttributeReference[id=3,name=z,relation=a,type=Int]
+| | | +-right=TableReference[relation=b]
+| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | +-project_expressions=
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| | | +-left_join_attributes=
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | +-right_join_attributes=
+| | |   +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | +-right=TableReference[relation=c]
+| | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | +-project_expressions=
+| | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| | | +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | +-right_join_attributes=
+| |   +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| +-right=TableReference[relation=d]
+| | +-AttributeReference[id=8,name=y,relation=d,type=Int]
+| | +-AttributeReference[id=9,name=z,relation=d,type=Int]
+| +-project_expressions=
+| | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| | +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+| | +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
+| +-left_join_attributes=
+| | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| +-right_join_attributes=
+|   +-AttributeReference[id=8,name=y,relation=d,type=Int]
++-output_attributes=
+  +-AttributeReference[id=0,name=w,relation=a,type=Int]
+  +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+  +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+  +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
+==
+
+SELECT a.w, b.x, c.y, d.z
+FROM a RIGHT JOIN b ON a.w = b.w
+       JOIN c ON a.x = c.x
+       LEFT JOIN d ON a.y = d.y
+WHERE b.x > 10
+  AND c.y > 20;
+--
+TopLevelPlan
++-plan=HashLeftOuterJoin
+| +-left=HashJoin
+| | +-left=HashLeftOuterJoin
+| | | +-left=Selection
+| | | | +-input=TableReference[relation=b]
+| | | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | +-filter_predicate=Greater
+| | | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | | +-Literal[value=10,type=Int]
+| | | | +-project_expressions=
+| | | |   +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | |   +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | +-right=TableReference[relation=a]
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | | +-AttributeReference[id=3,name=z,relation=a,type=Int]
+| | | +-project_expressions=
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int NULL]
+| | | | +-AttributeReference[id=2,name=y,relation=a,type=Int NULL]
+| | | +-left_join_attributes=
+| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | +-right_join_attributes=
+| | |   +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | +-right=Selection
+| | | +-input=TableReference[relation=c]
+| | | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | +-filter_predicate=Greater
+| | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | | +-Literal[value=20,type=Int]
+| | | +-project_expressions=
+| | |   +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | |   +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | +-project_expressions=
+| | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+| | | +-AttributeReference[id=2,name=y,relation=a,type=Int NULL]
+| | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=1,name=x,relation=a,type=Int NULL]
+| | +-right_join_attributes=
+| |   +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| +-right=TableReference[relation=d]
+| | +-AttributeReference[id=8,name=y,relation=d,type=Int]
+| | +-AttributeReference[id=9,name=z,relation=d,type=Int]
+| +-project_expressions=
+| | +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+| | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
+| +-left_join_attributes=
+| | +-AttributeReference[id=2,name=y,relation=a,type=Int NULL]
+| +-right_join_attributes=
+|   +-AttributeReference[id=8,name=y,relation=d,type=Int]
++-output_attributes=
+  +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+  +-AttributeReference[id=5,name=x,relation=b,type=Int]
+  +-AttributeReference[id=7,name=y,relation=c,type=Int]
+  +-AttributeReference[id=9,name=z,relation=d,type=Int NULL]
+==
+
+SELECT *
+FROM b LEFT JOIN c ON (b.x = c.x AND c.x > 10);
+--
+TopLevelPlan
++-plan=HashLeftOuterJoin
+| +-left=TableReference[relation=b]
+| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| +-right=Selection
+| | +-input=TableReference[relation=c]
+| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| | +-filter_predicate=Greater
+| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | +-Literal[value=10,type=Int]
+| | +-project_expressions=
+| |   +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| |   +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| +-project_expressions=
+| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+| | +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
+| +-left_join_attributes=
+| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| +-right_join_attributes=
+|   +-AttributeReference[id=2,name=x,relation=c,type=Int]
++-output_attributes=
+  +-AttributeReference[id=0,name=w,relation=b,type=Int]
+  +-AttributeReference[id=1,name=x,relation=b,type=Int]
+  +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+  +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/query_optimizer/tests/resolver/Join.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/resolver/Join.test b/query_optimizer/tests/resolver/Join.test
index f501f97..3443e2b 100644
--- a/query_optimizer/tests/resolver/Join.test
+++ b/query_optimizer/tests/resolver/Join.test
@@ -156,17 +156,144 @@ TopLevelPlan
   +-AttributeReference[id=3,name=z,relation=a1,type=Int]
 ==
 
+
+# Outer joins
 SELECT *
-FROM b LEFT JOIN c ON b.x = c.x JOIN d ON c.y = d.y;
+FROM b LEFT JOIN c ON (b.x = c.x AND c.x > 10);
 --
-ERROR: Outer joins are not supported yet (2 : 13)
-FROM b LEFT JOIN c ON b.x = c.x JOIN d ON ...
-            ^
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=TableReference[relation_name=b]
+| | | +-AttributeReference[id=0,name=w,relation=b,type=Int]
+| | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | +-right=TableReference[relation_name=c]
+| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| | +-residual_predicate=And
+| | | +-Equal
+| | | | +-AttributeReference[id=1,name=x,relation=b,type=Int]
+| | | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | | +-Greater
+| | |   +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | |   +-Literal[value=10,type=Int]
+| | +-left_join_attributes=
+| | | +-[]
+| | +-right_join_attributes=
+| |   +-[]
+| +-project_list=
+|   +-AttributeReference[id=0,name=w,relation=b,type=Int]
+|   +-AttributeReference[id=1,name=x,relation=b,type=Int]
+|   +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+|   +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=0,name=w,relation=b,type=Int]
+  +-AttributeReference[id=1,name=x,relation=b,type=Int]
+  +-AttributeReference[id=2,name=x,relation=c,type=Int NULL]
+  +-AttributeReference[id=3,name=y,relation=c,type=Int NULL]
+==
+
+SELECT a.w, b.x, c.y, a.w + b.x + c.y
+FROM a LEFT JOIN b ON a.w = b.w RIGHT JOIN c ON b.x = c.x;
+--
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=TableReference[relation_name=c]
+| | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | +-right=HashLeftOuterJoin
+| | | +-left=TableReference[relation_name=a]
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | | +-AttributeReference[id=3,name=z,relation=a,type=Int]
+| | | +-right=TableReference[relation_name=b]
+| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | +-residual_predicate=Equal
+| | | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | +-left_join_attributes=
+| | | | +-[]
+| | | +-right_join_attributes=
+| | |   +-[]
+| | +-residual_predicate=Equal
+| | | +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+| | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | +-left_join_attributes=
+| | | +-[]
+| | +-right_join_attributes=
+| |   +-[]
+| +-project_list=
+|   +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+|   +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+|   +-AttributeReference[id=7,name=y,relation=c,type=Int]
+|   +-Alias[id=8,name=,alias=((a.w+b.x)+c.y),relation=,type=Int NULL]
+|     +-Add
+|       +-Add
+|       | +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+|       | +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+|       +-AttributeReference[id=7,name=y,relation=c,type=Int]
++-output_attributes=
+  +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
+  +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+  +-AttributeReference[id=7,name=y,relation=c,type=Int]
+  +-AttributeReference[id=8,name=,alias=((a.w+b.x)+c.y),relation=,type=Int NULL]
+==
+
+SELECT a.w, b.x, c.y, a.w + b.x + c.y
+FROM a LEFT JOIN (b RIGHT JOIN c ON b.x = c.x) ON a.w = b.w;
+--
+TopLevelPlan
++-plan=Project
+| +-input=HashLeftOuterJoin
+| | +-left=TableReference[relation_name=a]
+| | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | +-AttributeReference[id=1,name=x,relation=a,type=Int]
+| | | +-AttributeReference[id=2,name=y,relation=a,type=Int]
+| | | +-AttributeReference[id=3,name=z,relation=a,type=Int]
+| | +-right=HashLeftOuterJoin
+| | | +-left=TableReference[relation_name=c]
+| | | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | +-right=TableReference[relation_name=b]
+| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | +-residual_predicate=Equal
+| | | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
+| | | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | +-left_join_attributes=
+| | | | +-[]
+| | | +-right_join_attributes=
+| | |   +-[]
+| | +-residual_predicate=Equal
+| | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+| | | +-AttributeReference[id=4,name=w,relation=b,type=Int NULL]
+| | +-left_join_attributes=
+| | | +-[]
+| | +-right_join_attributes=
+| |   +-[]
+| +-project_list=
+|   +-AttributeReference[id=0,name=w,relation=a,type=Int]
+|   +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+|   +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+|   +-Alias[id=8,name=,alias=((a.w+b.x)+c.y),relation=,type=Int NULL]
+|     +-Add
+|       +-Add
+|       | +-AttributeReference[id=0,name=w,relation=a,type=Int]
+|       | +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+|       +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=0,name=w,relation=a,type=Int]
+  +-AttributeReference[id=5,name=x,relation=b,type=Int NULL]
+  +-AttributeReference[id=7,name=y,relation=c,type=Int NULL]
+  +-AttributeReference[id=8,name=,alias=((a.w+b.x)+c.y),relation=,type=Int NULL]
 ==
 
 SELECT *
-FROM b LEFT JOIN (c JOIN d ON c.y = d.y) ON b.x = c.x;
+FROM b FULL JOIN c ON b.x = c.x;
 --
-ERROR: Outer joins are not supported yet (2 : 13)
-FROM b LEFT JOIN (c JOIN d ON c.y = d.y) O...
+ERROR: Full outer join is not supported yet (2 : 13)
+FROM b FULL JOIN c ON b.x = c.x;
             ^

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index b02bc6b..f204d67 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -177,12 +177,12 @@ target_link_libraries(quickstep_relationaloperators_HashJoinOperator
                       quickstep_storage_StorageBlockInfo
                       quickstep_storage_StorageManager
                       quickstep_storage_SubBlocksReference
+                      quickstep_storage_TupleIdSequence
                       quickstep_storage_TupleReference
                       quickstep_storage_TupleStorageSubBlock
                       quickstep_storage_ValueAccessor
                       quickstep_types_containers_ColumnVectorsValueAccessor
                       quickstep_utility_Macros
-                      quickstep_utility_PtrList
                       tmb)
 target_link_libraries(quickstep_relationaloperators_InsertOperator
                       glog

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index e0076e3..82f6b2a 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -1,6 +1,8 @@
 /**
  *   Copyright 2011-2015 Quickstep Technologies LLC.
  *   Copyright 2015-2016 Pivotal Software, Inc.
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin—Madison.
  *
  *   Licensed under the Apache License, Version 2.0 (the "License");
  *   you may not use this file except in compliance with the License.
@@ -36,6 +38,7 @@
 #include "storage/StorageBlockInfo.hpp"
 #include "storage/StorageManager.hpp"
 #include "storage/SubBlocksReference.hpp"
+#include "storage/TupleIdSequence.hpp"
 #include "storage/TupleReference.hpp"
 #include "storage/TupleStorageSubBlock.hpp"
 #include "storage/ValueAccessor.hpp"
@@ -203,6 +206,38 @@ class SemiAntiJoinTupleCollector {
   std::unique_ptr<TupleIdSequence> filter_;
 };
 
+class OuterJoinTupleCollector {
+ public:
+  explicit OuterJoinTupleCollector(const TupleStorageSubBlock &tuple_store) {
+    filter_.reset(tuple_store.getExistenceMap());
+  }
+
+  template <typename ValueAccessorT>
+  inline void operator()(const ValueAccessorT &accessor,
+                         const TupleReference &tref) {
+    joined_tuples_[tref.block].emplace_back(tref.tuple, accessor.getCurrentPosition());
+  }
+
+  template <typename ValueAccessorT>
+  inline void recordMatch(const ValueAccessorT &accessor) {
+    filter_->set(accessor.getCurrentPosition(), false);
+  }
+
+  inline std::unordered_map<block_id, std::vector<std::pair<tuple_id, tuple_id>>>*
+      getJoinedTupleMap() {
+    return &joined_tuples_;
+  }
+
+  const TupleIdSequence* filter() const {
+    return filter_.get();
+  }
+
+ private:
+  std::unordered_map<block_id, std::vector<std::pair<tuple_id, tuple_id>>> joined_tuples_;
+  // BitVector on the probe relation. 1 if the corresponding tuple has no match.
+  std::unique_ptr<TupleIdSequence> filter_;
+};
+
 }  // namespace
 
 bool HashJoinOperator::getAllWorkOrders(
@@ -221,9 +256,13 @@ bool HashJoinOperator::getAllWorkOrders(
     case JoinType::kLeftAntiJoin:
       return getAllNonOuterJoinWorkOrders<HashAntiJoinWorkOrder>(
           container, query_context, storage_manager);
+    case JoinType::kLeftOuterJoin:
+      return getAllOuterJoinWorkOrders(
+          container, query_context, storage_manager);
     default:
       LOG(FATAL) << "Unknown join type in HashJoinOperator::getAllWorkOrders()";
   }
+  return false;
 }
 
 template <class JoinWorkOrderClass>
@@ -285,6 +324,65 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
   return false;
 }
 
+bool HashJoinOperator::getAllOuterJoinWorkOrders(
+    WorkOrdersContainer *container,
+    QueryContext *query_context,
+    StorageManager *storage_manager) {
+  // We wait until the building of global hash table is complete.
+  if (blocking_dependencies_met_) {
+    DCHECK(query_context != nullptr);
+
+    const vector<unique_ptr<const Scalar>> &selection =
+        query_context->getScalarGroup(selection_index_);
+
+    InsertDestination *output_destination =
+        query_context->getInsertDestination(output_destination_index_);
+    const JoinHashTable &hash_table =
+        *(query_context->getJoinHashTable(hash_table_index_));
+
+    if (probe_relation_is_stored_) {
+      if (!started_) {
+        for (const block_id probe_block_id : probe_relation_block_ids_) {
+          container->addNormalWorkOrder(
+              new HashOuterJoinWorkOrder(
+                  build_relation_,
+                  probe_relation_,
+                  join_key_attributes_,
+                  any_join_key_attributes_nullable_,
+                  probe_block_id,
+                  selection,
+                  is_selection_on_build_,
+                  hash_table,
+                  output_destination,
+                  storage_manager),
+              op_index_);
+        }
+        started_ = true;
+      }
+      return started_;
+    } else {
+      while (num_workorders_generated_ < probe_relation_block_ids_.size()) {
+        container->addNormalWorkOrder(
+            new HashOuterJoinWorkOrder(
+                build_relation_,
+                probe_relation_,
+                join_key_attributes_,
+                any_join_key_attributes_nullable_,
+                probe_relation_block_ids_[num_workorders_generated_],
+                selection,
+                is_selection_on_build_,
+                hash_table,
+                output_destination,
+                storage_manager),
+            op_index_);
+        ++num_workorders_generated_;
+      }
+      return done_feeding_input_relation_;
+    }  // end else (probe_relation_is_stored_)
+  }  // end if (blocking_dependencies_met_)
+  return false;
+}
+
 void HashInnerJoinWorkOrder::execute() {
   if (FLAGS_vector_based_joined_tuple_collector) {
     executeWithCollectorType<VectorBasedJoinedTupleCollector>();
@@ -641,11 +739,107 @@ void HashAntiJoinWorkOrder::executeWithResidualPredicate() {
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
        selection_it != selection_.end();
        ++selection_it) {
-    temp_result.addColumn((*selection_it)->getAllValues(probe_accessor_with_filter.get(),
-                                                     &sub_blocks_ref));
+    temp_result.addColumn(
+        (*selection_it)->getAllValues(probe_accessor_with_filter.get(),
+                                      &sub_blocks_ref));
   }
 
   output_destination_->bulkInsertTuples(&temp_result);
 }
 
+void HashOuterJoinWorkOrder::execute() {
+  const relation_id build_relation_id = build_relation_.getID();
+  const relation_id probe_relation_id = probe_relation_.getID();
+
+  const BlockReference probe_block = storage_manager_->getBlock(block_id_,
+                                                                probe_relation_);
+  const TupleStorageSubBlock &probe_store = probe_block->getTupleStorageSubBlock();
+
+  std::unique_ptr<ValueAccessor> probe_accessor(probe_store.createValueAccessor());
+  OuterJoinTupleCollector collector(probe_store);
+  if (join_key_attributes_.size() == 1) {
+    hash_table_.getAllFromValueAccessorWithExtraWorkForFirstMatch(
+        probe_accessor.get(),
+        join_key_attributes_.front(),
+        any_join_key_attributes_nullable_,
+        &collector);
+  } else {
+    hash_table_.getAllFromValueAccessorCompositeKeyWithExtraWorkForFirstMatch(
+        probe_accessor.get(),
+        join_key_attributes_,
+        any_join_key_attributes_nullable_,
+        &collector);
+  }
+
+  // Populate the output tuples for matches.
+  for (const std::pair<const block_id, std::vector<std::pair<tuple_id, tuple_id>>>
+           &build_block_entry : *collector.getJoinedTupleMap()) {
+    const BlockReference build_block =
+        storage_manager_->getBlock(build_block_entry.first, build_relation_);
+    const TupleStorageSubBlock &build_store =
+        build_block->getTupleStorageSubBlock();
+
+    std::unique_ptr<ValueAccessor> build_accessor(
+        build_store.createValueAccessor());
+    ColumnVectorsValueAccessor temp_result;
+    for (auto selection_it = selection_.begin();
+         selection_it != selection_.end();
+         ++selection_it) {
+      temp_result.addColumn(
+          (*selection_it)->getAllValuesForJoin(
+              build_relation_id,
+              build_accessor.get(),
+              probe_relation_id,
+              probe_accessor.get(),
+              build_block_entry.second));
+    }
+    output_destination_->bulkInsertTuples(&temp_result);
+  }
+
+  SubBlocksReference sub_blocks_ref(probe_store,
+                                    probe_block->getIndices(),
+                                    probe_block->getIndicesConsistent());
+
+  // Populate the output tuples for non-matches.
+  const TupleIdSequence *filter = collector.filter();
+  const TupleIdSequence::size_type num_tuples_without_matches = filter->size();
+  if (num_tuples_without_matches > 0) {
+    std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
+        probe_store.createValueAccessor(filter));
+    ColumnVectorsValueAccessor temp_result;
+
+    for (std::size_t i = 0; i < selection_.size(); ++i) {
+      if (is_selection_on_build_[i]) {
+        // NOTE(harshad, jianqiao): The assumption here is that any operation
+        // involving NULL as operands will return NULL result. This assumption
+        // will become invalid if later we add support for functions that can
+        // produce non-NULL result with NULL operands, e.g.
+        //   CASE WHEN x IS NOT NULL THEN x ELSE 0
+        // or equivalently
+        //   COALESCE(x, 0)
+        // where x is an attribute of the build relation.
+        // In that case, this HashOuterJoinWorkOrder needs to be updated to
+        // correctly handle the selections.
+        const Type& column_type = selection_[i]->getType().getNullableVersion();
+        if (NativeColumnVector::UsableForType(column_type)) {
+          NativeColumnVector *result = new NativeColumnVector(
+              column_type, num_tuples_without_matches);
+          result->fillWithNulls();
+          temp_result.addColumn(result);
+        } else {
+          IndirectColumnVector *result = new IndirectColumnVector(
+              column_type, num_tuples_without_matches);
+          result->fillWithValue(TypedValue(column_type.getTypeID()));
+          temp_result.addColumn(result);
+        }
+      } else {
+        temp_result.addColumn(
+            selection_[i]->getAllValues(probe_accessor_with_filter.get(),
+                                        &sub_blocks_ref));
+      }
+    }
+    output_destination_->bulkInsertTuples(&temp_result);
+  }
+}
+
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index c22f435..fcc087a 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -1,6 +1,8 @@
 /**
  *   Copyright 2011-2015 Quickstep Technologies LLC.
  *   Copyright 2015-2016 Pivotal Software, Inc.
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin—Madison.
  *
  *   Licensed under the Apache License, Version 2.0 (the "License");
  *   you may not use this file except in compliance with the License.
@@ -31,7 +33,6 @@
 #include "storage/HashTable.hpp"
 #include "storage/StorageBlockInfo.hpp"
 #include "utility/Macros.hpp"
-#include "utility/PtrList.hpp"
 
 #include "glog/logging.h"
 
@@ -61,7 +62,8 @@ class HashJoinOperator : public RelationalOperator {
   enum class JoinType {
     kInnerJoin = 0,
     kLeftSemiJoin,
-    kLeftAntiJoin
+    kLeftAntiJoin,
+    kLeftOuterJoin
   };
 
   /**
@@ -101,10 +103,14 @@ class HashJoinOperator : public RelationalOperator {
    *        additional filter to pairs of tuples that match the hash-join (i.e.
    *        key equality) predicate. Effectively, this makes the join predicate
    *        the conjunction of the key-equality predicate and residual predicate.
+   *        Note that this field does not apply to outer joins.
    * @param selection_index The group index of Scalars in QueryContext,
    *        corresponding to the attributes of the relation referred by
    *        output_relation_id. Each Scalar is evaluated for the joined tuples,
    *        and the resulting value is inserted into the join result.
+   * @param is_selection_on_build Whether each selection Scalar is using attributes
+   *        from the build relation as input. Can be NULL for inner/semi/anti
+   *        joins since this information is not utilized by these joins.
    * @param join_type The type of join corresponding to this operator.
    **/
   HashJoinOperator(const CatalogRelation &build_relation,
@@ -117,6 +123,7 @@ class HashJoinOperator : public RelationalOperator {
                    const QueryContext::join_hash_table_id hash_table_index,
                    const QueryContext::predicate_id residual_predicate_index,
                    const QueryContext::scalar_group_id selection_index,
+                   const std::vector<bool> *is_selection_on_build = nullptr,
                    const JoinType join_type = JoinType::kInnerJoin)
       : build_relation_(build_relation),
         probe_relation_(probe_relation),
@@ -128,12 +135,19 @@ class HashJoinOperator : public RelationalOperator {
         hash_table_index_(hash_table_index),
         residual_predicate_index_(residual_predicate_index),
         selection_index_(selection_index),
+        is_selection_on_build_(is_selection_on_build == nullptr
+                                   ? std::vector<bool>()
+                                   : *is_selection_on_build),
         join_type_(join_type),
         probe_relation_block_ids_(probe_relation_is_stored
                                       ? probe_relation.getBlocksSnapshot()
                                       : std::vector<block_id>()),
         num_workorders_generated_(0),
-        started_(false) {}
+        started_(false) {
+    DCHECK(join_type != JoinType::kLeftOuterJoin ||
+               (is_selection_on_build != nullptr &&
+                residual_predicate_index == QueryContext::kInvalidPredicateId));
+  }
 
   ~HashJoinOperator() override {}
 
@@ -180,6 +194,10 @@ class HashJoinOperator : public RelationalOperator {
                                     QueryContext *query_context,
                                     StorageManager *storage_manager);
 
+  bool getAllOuterJoinWorkOrders(WorkOrdersContainer *container,
+                                 QueryContext *query_context,
+                                 StorageManager *storage_manager);
+
   const CatalogRelation &build_relation_;
   const CatalogRelation &probe_relation_;
   const bool probe_relation_is_stored_;
@@ -190,6 +208,7 @@ class HashJoinOperator : public RelationalOperator {
   const QueryContext::join_hash_table_id hash_table_index_;
   const QueryContext::predicate_id residual_predicate_index_;
   const QueryContext::scalar_group_id selection_index_;
+  const std::vector<bool> is_selection_on_build_;
   const JoinType join_type_;
 
   std::vector<block_id> probe_relation_block_ids_;
@@ -521,7 +540,7 @@ class HashAntiJoinWorkOrder : public WorkOrder {
                         StorageManager *storage_manager)
       : build_relation_(build_relation),
         probe_relation_(probe_relation),
-        join_key_attributes_(join_key_attributes),
+        join_key_attributes_(std::move(join_key_attributes)),
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
         block_id_(lookup_block_id),
         residual_predicate_(residual_predicate),
@@ -560,6 +579,115 @@ class HashAntiJoinWorkOrder : public WorkOrder {
   DISALLOW_COPY_AND_ASSIGN(HashAntiJoinWorkOrder);
 };
 
+/**
+ * @brief A left outer join WorkOrder produced by the HashJoinOperator.
+ **/
+class HashOuterJoinWorkOrder : public WorkOrder {
+ public:
+  /**
+   * @brief Constructor.
+   *
+   * @param build_relation The relation that the hash table was originally built
+   *        on (i.e. the inner relation in the join).
+   * @param probe_relation The relation to probe the hash table with (i.e. the
+   *        outer relation in the join).
+   * @param join_key_attributes The IDs of equijoin attributes in \c
+   *        probe_relation.
+   * @param any_join_key_attributes_nullable If any attribute is nullable.
+   * @param hash_table The JoinHashTable to use.
+   * @param selection A list of Scalars corresponding to the relation attributes
+   *        in \c output_destination. Each Scalar is evaluated for the joined
+   *        tuples, and the resulting value is inserted into the join result.
+   * @param is_selection_on_build Whether each Scalar in the \p selection vector
+   *        is using attributes from the build relation as input. Note that the
+   *        length of this vector should equal the length of \p selection.
+   * @param lookup_block_id The block id of the probe_relation.
+   * @param output_destination The InsertDestination to insert the join results.
+   * @param storage_manager The StorageManager to use.
+   **/
+  HashOuterJoinWorkOrder(const CatalogRelationSchema &build_relation,
+                         const CatalogRelationSchema &probe_relation,
+                         const std::vector<attribute_id> &join_key_attributes,
+                         const bool any_join_key_attributes_nullable,
+                         const block_id lookup_block_id,
+                         const std::vector<std::unique_ptr<const Scalar>> &selection,
+                         const std::vector<bool> &is_selection_on_build,
+                         const JoinHashTable &hash_table,
+                         InsertDestination *output_destination,
+                         StorageManager *storage_manager)
+      : build_relation_(build_relation),
+        probe_relation_(probe_relation),
+        join_key_attributes_(join_key_attributes),
+        any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
+        block_id_(lookup_block_id),
+        selection_(selection),
+        is_selection_on_build_(is_selection_on_build),
+        hash_table_(hash_table),
+        output_destination_(output_destination),
+        storage_manager_(storage_manager) {}
+
+  /**
+   * @brief Constructor for the distributed version.
+   *
+   * @param build_relation The relation that the hash table was originally built
+   *        on (i.e. the inner relation in the join).
+   * @param probe_relation The relation to probe the hash table with (i.e. the
+   *        outer relation in the join).
+   * @param join_key_attributes The IDs of equijoin attributes in \c
+   *        probe_relation.
+   * @param any_join_key_attributes_nullable If any attribute is nullable.
+   * @param hash_table The JoinHashTable to use.
+   * @param selection A list of Scalars corresponding to the relation attributes
+   *        in \c output_destination. Each Scalar is evaluated for the joined
+   *        tuples, and the resulting value is inserted into the join result.
+   * @param is_selection_on_build Whether each Scalar in the \p selection vector
+   *        is using attributes from the build relation as input. Note that the
+   *        length of this vector should equal the length of \p selection.
+   * @param lookup_block_id The block id of the probe_relation.
+   * @param output_destination The InsertDestination to insert the join results.
+   * @param storage_manager The StorageManager to use.
+   **/
+  HashOuterJoinWorkOrder(const CatalogRelationSchema &build_relation,
+                         const CatalogRelationSchema &probe_relation,
+                         std::vector<attribute_id> &&join_key_attributes,
+                         const bool any_join_key_attributes_nullable,
+                         const block_id lookup_block_id,
+                         const std::vector<std::unique_ptr<const Scalar>> &selection,
+                         std::vector<bool> &&is_selection_on_build,
+                         const JoinHashTable &hash_table,
+                         InsertDestination *output_destination,
+                         StorageManager *storage_manager)
+      : build_relation_(build_relation),
+        probe_relation_(probe_relation),
+        join_key_attributes_(std::move(join_key_attributes)),
+        any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
+        block_id_(lookup_block_id),
+        selection_(selection),
+        is_selection_on_build_(std::move(is_selection_on_build)),
+        hash_table_(hash_table),
+        output_destination_(output_destination),
+        storage_manager_(storage_manager) {}
+
+  ~HashOuterJoinWorkOrder() override {}
+
+  void execute() override;
+
+ private:
+  const CatalogRelationSchema &build_relation_;
+  const CatalogRelationSchema &probe_relation_;
+  const std::vector<attribute_id> join_key_attributes_;
+  const bool any_join_key_attributes_nullable_;
+  const block_id block_id_;
+  const std::vector<std::unique_ptr<const Scalar>> &selection_;
+  const std::vector<bool> is_selection_on_build_;
+  const JoinHashTable &hash_table_;
+
+  InsertDestination *output_destination_;
+  StorageManager *storage_manager_;
+
+  DISALLOW_COPY_AND_ASSIGN(HashOuterJoinWorkOrder);
+};
+
 /** @} */
 
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ef383ef2/relational_operators/WorkOrder.proto
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrder.proto b/relational_operators/WorkOrder.proto
index 1a0bcd1..8ed2080 100644
--- a/relational_operators/WorkOrder.proto
+++ b/relational_operators/WorkOrder.proto
@@ -29,20 +29,21 @@ enum WorkOrderType {
   DESTROY_HASH = 6;
   DROP_TABLE = 7;
   FINALIZE_AGGREGATION = 8;
-  HASH_INNER_JOIN = 9;
-  HASH_SEMI_JOIN = 10;
-  HASH_ANTI_JOIN = 11;
-  INSERT = 12;
-  NESTED_LOOP_JOIN = 13;
-  SAMPLE = 14;
-  SAVE_BLOCKS = 15;
-  SELECT = 16;
-  SORT_MERGE_RUN = 17;
-  SORT_RUN_GENERATION = 18;
-  TABLE_GENERATOR = 19;
-  TEXT_SCAN = 20;
-  TEXT_SPLIT = 21;
-  UPDATE = 22;
+  HASH_ANTI_JOIN = 9;
+  HASH_INNER_JOIN = 10;
+  HASH_OUTER_JOIN = 11;
+  HASH_SEMI_JOIN = 12;
+  INSERT = 13;
+  NESTED_LOOP_JOIN = 14;
+  SAMPLE = 15;
+  SAVE_BLOCKS = 16;
+  SELECT = 17;
+  SORT_MERGE_RUN = 18;
+  SORT_RUN_GENERATION = 19;
+  TABLE_GENERATOR = 20;
+  TEXT_SCAN = 21;
+  TEXT_SPLIT = 22;
+  UPDATE = 23;
 }
 
 message WorkOrder {
@@ -151,6 +152,21 @@ message HashSemiJoinWorkOrder {
   }
 }
 
+message HashOuterJoinWorkOrder {
+  extend WorkOrder {
+    // All required.
+    optional int32 build_relation_id = 370;
+    optional int32 probe_relation_id = 371;
+    repeated int32 join_key_attributes = 372;
+    optional bool any_join_key_attributes_nullable = 373;
+    optional int32 insert_destination_index = 374;
+    optional uint32 join_hash_table_index = 375;
+    optional int32 selection_index = 376;
+    repeated bool is_selection_on_build = 377;
+    optional fixed64 block_id = 378;
+  }
+}
+
 message InsertWorkOrder {
   extend WorkOrder {
     // All required.



[3/5] incubator-quickstep git commit: Set CatalogDatabaseCache proto in optimizer. (#159)

Posted by ji...@apache.org.
Set CatalogDatabaseCache proto in optimizer. (#159)

Link: https://github.com/pivotalsoftware/quickstep/pull/159

Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/390310fe
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/390310fe
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/390310fe

Branch: refs/heads/master
Commit: 390310feefade09e26a5b73feaa8ea9a38428479
Parents: ef383ef
Author: Zuyu ZHANG <zu...@users.noreply.github.com>
Authored: Wed Apr 20 21:20:48 2016 -0500
Committer: Harshad Deshmukh <d....@gmail.com>
Committed: Wed Apr 20 21:20:48 2016 -0500

----------------------------------------------------------------------
 CMakeLists.txt                            |  4 ++-
 query_optimizer/CMakeLists.txt            | 14 +++++++++
 query_optimizer/ExecutionGenerator.cpp    | 40 +++++++++++++++++++++++++-
 query_optimizer/ExecutionGenerator.hpp    | 33 +++++++++++++++++----
 query_optimizer/QueryHandle.hpp           | 22 ++++++++++++--
 query_optimizer/QueryOptimizerConfig.h.in | 17 +++++++++++
 6 files changed, 121 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/390310fe/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 642925f..c9578b0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 #   Copyright 2011-2015 Quickstep Technologies LLC.
-#   Copyright 2015 Pivotal Software, Inc.
+#   Copyright 2015-2016 Pivotal Software, Inc.
 #
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
@@ -137,6 +137,8 @@ if (ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT)
   )
 endif()
 
+option(ENABLE_DISTRIBUTED "Use the distributed version of Quickstep" OFF)
+
 # Turn on the QUICKSTEP_DEBUG flag in the source if this is a debug build.
 if (CMAKE_MAJOR_VERSION GREATER 2)
   cmake_policy(SET CMP0043 NEW)

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/390310fe/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 44c8750..2d09bee 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -15,6 +15,15 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+if (ENABLE_DISTRIBUTED)
+  set(QUICKSTEP_DISTRIBUTED TRUE)
+endif()
+
+configure_file (
+  "${CMAKE_CURRENT_SOURCE_DIR}/QueryOptimizerConfig.h.in"
+  "${CMAKE_CURRENT_BINARY_DIR}/QueryOptimizerConfig.h"
+)
+
 add_subdirectory(cost_model)
 add_subdirectory(expressions)
 add_subdirectory(logical)
@@ -126,6 +135,10 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
                       quickstep_types_containers_Tuple_proto
                       quickstep_utility_Macros
                       quickstep_utility_SqlError)
+if (ENABLE_DISTRIBUTED)
+  target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
+                        quickstep_catalog_Catalog_proto)
+endif()
 target_link_libraries(quickstep_queryoptimizer_LogicalGenerator
                       glog
                       quickstep_parser_ParseStatement
@@ -171,6 +184,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
                       quickstep_queryoptimizer_Validator
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_QueryHandle
+                      quickstep_catalog_Catalog_proto
                       quickstep_queryexecution_QueryContext_proto
                       quickstep_queryoptimizer_QueryPlan
                       quickstep_utility_Macros)

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/390310fe/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 7a8e996..abcdd6c 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -25,9 +25,18 @@
 #include <string>
 #include <type_traits>
 #include <unordered_map>
+
+#ifdef QUICKSTEP_DISTRIBUTED
+#include <unordered_set>
+#endif
+
 #include <utility>
 #include <vector>
 
+#ifdef QUICKSTEP_DISTRIBUTED
+#include "catalog/Catalog.pb.h"
+#endif
+
 #include "catalog/CatalogAttribute.hpp"
 #include "catalog/CatalogDatabase.hpp"
 #include "catalog/CatalogRelation.hpp"
@@ -185,6 +194,20 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) {
         drop_table_index,
         temporary_relation_info.producer_operator_index);
   }
+
+#ifdef QUICKSTEP_DISTRIBUTED
+  catalog_database_cache_proto_->set_name(optimizer_context_->catalog_database()->getName());
+
+  LOG(INFO) << "CatalogDatabaseCache proto has " << referenced_relation_ids_.size() << " relation(s)";
+  for (const relation_id rel_id : referenced_relation_ids_) {
+    const CatalogRelationSchema &relation =
+        optimizer_context_->catalog_database()->getRelationSchemaById(rel_id);
+    LOG(INFO) << "RelationSchema " << rel_id
+              << ", name: " << relation.getName()
+              << ", " << relation.size()  << " attribute(s)";
+    catalog_database_cache_proto_->add_relations()->MergeFrom(relation.getProto());
+  }
+#endif
 }
 
 void ExecutionGenerator::generatePlanInternal(
@@ -289,6 +312,10 @@ void ExecutionGenerator::createTemporaryCatalogRelation(
   const relation_id output_rel_id = optimizer_context_->catalog_database()->addRelation(
       catalog_relation.release());
 
+#ifdef QUICKSTEP_DISTRIBUTED
+  referenced_relation_ids_.insert(output_rel_id);
+#endif
+
   insert_destination_proto->set_insert_destination_type(S::InsertDestinationType::BLOCK_POOL);
   insert_destination_proto->set_relation_id(output_rel_id);
 }
@@ -335,6 +362,11 @@ void ExecutionGenerator::convertTableReference(
   // parent (e.g. the substitution map from an AttributeReference
   // to a CatalogAttribute).
   const CatalogRelation *catalog_relation = physical_table_reference->relation();
+
+#ifdef QUICKSTEP_DISTRIBUTED
+  referenced_relation_ids_.insert(catalog_relation->getID());
+#endif
+
   const std::vector<E::AttributeReferencePtr> &attribute_references =
       physical_table_reference->attribute_list();
   DCHECK_EQ(attribute_references.size(), catalog_relation->size());
@@ -994,8 +1026,14 @@ void ExecutionGenerator::convertDeleteTuples(
 void ExecutionGenerator::convertDropTable(
     const P::DropTablePtr &physical_plan) {
   // DropTable is converted to a DropTable operator.
+  const CatalogRelation &catalog_relation = *physical_plan->catalog_relation();
+
+#ifdef QUICKSTEP_DISTRIBUTED
+  referenced_relation_ids_.insert(catalog_relation.getID());
+#endif
+
   execution_plan_->addRelationalOperator(
-      new DropTableOperator(*physical_plan->catalog_relation(),
+      new DropTableOperator(catalog_relation,
                             optimizer_context_->catalog_database()));
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/390310fe/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp
index 11de106..df47b31 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -1,6 +1,6 @@
 /**
  *   Copyright 2011-2015 Quickstep Technologies LLC.
- *   Copyright 2015 Pivotal Software, Inc.
+ *   Copyright 2015-2016 Pivotal Software, Inc.
  *
  *   Licensed under the Apache License, Version 2.0 (the "License");
  *   you may not use this file except in compliance with the License.
@@ -21,6 +21,11 @@
 #include <memory>
 #include <string>
 #include <unordered_map>
+
+#ifdef QUICKSTEP_DISTRIBUTED
+#include <unordered_set>
+#endif
+
 #include <vector>
 
 #include "catalog/CatalogTypedefs.hpp"
@@ -54,6 +59,8 @@
 #include "query_optimizer/physical/UpdateTable.hpp"
 #include "utility/Macros.hpp"
 
+#include "glog/logging.h"
+
 namespace quickstep {
 
 class CatalogAttribute;
@@ -61,6 +68,11 @@ class CatalogRelation;
 class Predicate;
 
 namespace serialization {
+
+#ifdef QUICKSTEP_DISTRIBUTED
+class CatalogDatabase;
+#endif
+
 class InsertDestination;
 }  // namespace serialization
 
@@ -85,10 +97,14 @@ class ExecutionGenerator {
    */
   ExecutionGenerator(OptimizerContext *optimizer_context,
                      QueryHandle *query_handle)
-      : optimizer_context_(optimizer_context),
-        query_handle_(query_handle),
-        execution_plan_(query_handle->getQueryPlanMutable()),
-        query_context_proto_(query_handle->getQueryContextProtoMutable()) {
+      : optimizer_context_(DCHECK_NOTNULL(optimizer_context)),
+        query_handle_(DCHECK_NOTNULL(query_handle)),
+        execution_plan_(DCHECK_NOTNULL(query_handle->getQueryPlanMutable())),
+        query_context_proto_(DCHECK_NOTNULL(query_handle->getQueryContextProtoMutable())) {
+#ifdef QUICKSTEP_DISTRIBUTED
+    catalog_database_cache_proto_ = DCHECK_NOTNULL(query_handle->getCatalogDatabaseCacheProtoMutable());
+#endif
+
     setupCostModel();
   }
 
@@ -368,6 +384,13 @@ class ExecutionGenerator {
   QueryPlan *execution_plan_;  // A part of QueryHandle.
   serialization::QueryContext *query_context_proto_;  // A part of QueryHandle.
 
+#ifdef QUICKSTEP_DISTRIBUTED
+  serialization::CatalogDatabase *catalog_database_cache_proto_;  // A part of QueryHandle.
+
+  // Used to bookkeep relation ids for 'catalog_database_cache_proto_'.
+  std::unordered_set<relation_id> referenced_relation_ids_;
+#endif
+
   /**
    * @brief Used to generate distinct relation names for temporary relations.
    */

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/390310fe/query_optimizer/QueryHandle.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/QueryHandle.hpp b/query_optimizer/QueryHandle.hpp
index 8819cc3..a17d3e8 100644
--- a/query_optimizer/QueryHandle.hpp
+++ b/query_optimizer/QueryHandle.hpp
@@ -1,5 +1,5 @@
 /**
- *   Copyright 2015 Pivotal Software, Inc.
+ *   Copyright 2015-2016 Pivotal Software, Inc.
  *
  *   Licensed under the Apache License, Version 2.0 (the "License");
  *   you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 #include <memory>
 #include <utility>
 
+#include "catalog/Catalog.pb.h"
 #include "query_execution/QueryContext.pb.h"
 #include "query_optimizer/QueryPlan.hpp"
 #include "utility/Macros.hpp"
@@ -41,7 +42,7 @@ class QueryHandle {
   /**
    * @brief Constructor.
    *
-   * @param The given query id.
+   * @param query_id The given query id.
    */
   explicit QueryHandle(const std::size_t query_id)
       : query_id_(query_id),
@@ -81,6 +82,20 @@ class QueryHandle {
   }
 
   /**
+    * @return The catalog database cache in the protobuf format.
+    */
+  const serialization::CatalogDatabase& getCatalogDatabaseCacheProto() const {
+    return catalog_database_cache_proto_;
+  }
+
+  /**
+   * @return The mutable catalog database cache in the protobuf format.
+   */
+  serialization::CatalogDatabase* getCatalogDatabaseCacheProtoMutable() {
+    return &catalog_database_cache_proto_;
+  }
+
+  /**
    * @brief Get the query result relation.
    */
   const CatalogRelation* getQueryResultRelation() const {
@@ -101,6 +116,9 @@ class QueryHandle {
 
   serialization::QueryContext query_context_proto_;
 
+  // TODO(quickstep-team): Use Catalog to support multiple databases.
+  serialization::CatalogDatabase catalog_database_cache_proto_;
+
   // NOTE(zuyu): The relation gets created by the optimizer,
   //             and deleted by the Cli shell.
   const CatalogRelation *query_result_relation_;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/390310fe/query_optimizer/QueryOptimizerConfig.h.in
----------------------------------------------------------------------
diff --git a/query_optimizer/QueryOptimizerConfig.h.in b/query_optimizer/QueryOptimizerConfig.h.in
new file mode 100644
index 0000000..da0fa18
--- /dev/null
+++ b/query_optimizer/QueryOptimizerConfig.h.in
@@ -0,0 +1,17 @@
+/**
+ *   Copyright 2016 Pivotal Software, Inc.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ **/
+
+#cmakedefine QUICKSTEP_DISTRIBUTED


[5/5] incubator-quickstep git commit: QUICKSTEP-1: Add incubation disclaimer to README.md.

Posted by ji...@apache.org.
QUICKSTEP-1: Add incubation disclaimer to README.md.

This closes #2


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/cd93fd74
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/cd93fd74
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/cd93fd74

Branch: refs/heads/master
Commit: cd93fd7434d9246738b00cd95e453e8d480a7a75
Parents: 94e1b8d
Author: Zuyu Zhang <zz...@pivotal.io>
Authored: Tue Apr 19 12:23:51 2016 -0500
Committer: Jignesh Patel <jm...@hotmail.com>
Committed: Thu Apr 21 10:00:34 2016 -0500

----------------------------------------------------------------------
 DISCLAIMER | 11 +++++++++++
 README.md  | 21 ++++++++++++++++++---
 2 files changed, 29 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd93fd74/DISCLAIMER
----------------------------------------------------------------------
diff --git a/DISCLAIMER b/DISCLAIMER
new file mode 100644
index 0000000..09d629d
--- /dev/null
+++ b/DISCLAIMER
@@ -0,0 +1,11 @@
+Apache Quickstep is an effort undergoing incubation at the Apache Software
+Foundation (ASF), sponsored by the Apache Incubator PMC.
+
+Incubation is required of all newly accepted projects until a further
+review indicates that the infrastructure, communications, and decision
+making process have stabilized in a manner consistent with other
+successful ASF projects.
+
+While incubation status is not necessarily a reflection of the
+completeness or stability of the code, it does indicate that the
+project has yet to be fully endorsed by the ASF.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd93fd74/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index c44e0b9..7cea80c 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,16 @@
-# Quickstep
+# Apache Quickstep (Incubating)
 
 [![Travis Widget]][Travis]
 
 [Travis]: https://travis-ci.org/pivotalsoftware/quickstep
 [Travis Widget]: https://travis-ci.org/pivotalsoftware/quickstep.svg?branch=master
 
-Quickstep is an experimental high-performance database engine designed with the
-aim of Data at Bare-Metal Speed. It began life in 2011 as a 
+Apache Quickstep is an experimental high-performance database engine designed with the
+aim of Data at Bare-Metal Speed. It began life in 2011 as a
 [research project at the University of Wisconsin](https://quickstep.cs.wisc.edu)
 and was acquired by [Pivotal](https://pivotal.io) in 2015.
+Quickstep entered incubation at the
+[Apache Software Foundation](https://www.apache.org) in April, 2016.
 
 ## Getting Started (Building)
 
@@ -91,3 +93,16 @@ concerns of a database system. The main modules are:
 ## Licensing
 
 Quickstep is licensed under the Apache License, Version 2.0. See [LICENSE](https://github.com/pivotalsoftware/quickstep/blob/master/LICENSE) for the full license text.
+
+## Disclaimer
+Apache Quickstep is an effort undergoing incubation at the Apache Software
+Foundation (ASF), sponsored by the Apache Incubator PMC.
+
+Incubation is required of all newly accepted projects until a further
+review indicates that the infrastructure, communications, and decision
+making process have stabilized in a manner consistent with other
+successful ASF projects.
+
+While incubation status is not necessarily a reflection of the
+completeness or stability of the code, it does indicate that the
+project has yet to be fully endorsed by the ASF.


[4/5] incubator-quickstep git commit: Deleted Blobs After Use in Parallel Loading. (#179)

Posted by ji...@apache.org.
Deleted Blobs After Use in Parallel Loading. (#179)

Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/94e1b8d3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/94e1b8d3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/94e1b8d3

Branch: refs/heads/master
Commit: 94e1b8d35d923048180be9c953f0fc5aba148863
Parents: 390310f
Author: Zuyu ZHANG <zu...@users.noreply.github.com>
Authored: Thu Apr 21 09:27:31 2016 -0500
Committer: Jignesh Patel <pa...@users.noreply.github.com>
Committed: Thu Apr 21 09:27:31 2016 -0500

----------------------------------------------------------------------
 relational_operators/TextScanOperator.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94e1b8d3/relational_operators/TextScanOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/TextScanOperator.cpp b/relational_operators/TextScanOperator.cpp
index 80cf953..e6d5200 100644
--- a/relational_operators/TextScanOperator.cpp
+++ b/relational_operators/TextScanOperator.cpp
@@ -288,7 +288,7 @@ void TextScanWorkOrder::execute() {
 
     std::fclose(file);
   } else {
-    BlobReference blob = storage_manager_->getBlob(text_blob_);
+    MutableBlobReference blob = storage_manager_->getBlobMutable(text_blob_);
     const char *blob_pos = static_cast<const char*>(blob->getMemory());
     const char *blob_end = blob_pos + text_size_;
     bool have_row = false;
@@ -300,6 +300,10 @@ void TextScanWorkOrder::execute() {
         output_destination_->insertTupleInBatch(tuple);
       }
     } while (have_row);
+
+    // Drop the consumed blob produced by TextSplitWorkOrder.
+    blob.release();
+    storage_manager_->deleteBlockOrBlobFile(text_blob_);
   }
 }