You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/08/21 17:29:52 UTC

[1/2] incubator-quickstep git commit: Merged with fuse-select-join [Forced Update!]

Repository: incubator-quickstep
Updated Branches:
  refs/heads/LIP-for-tpch-merged f4af59652 -> 9a421dd13 (forced update)


Merged with fuse-select-join


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/9a421dd1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/9a421dd1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/9a421dd1

Branch: refs/heads/LIP-for-tpch-merged
Commit: 9a421dd13ad4d3c9de074017186eaf6081e4424a
Parents: b163289
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Thu Aug 18 21:17:56 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Sun Aug 21 12:03:30 2016 -0500

----------------------------------------------------------------------
 expressions/scalar/ScalarAttribute.cpp          |   2 +-
 query_optimizer/ExecutionGenerator.cpp          |   2 +-
 .../cost_model/StarSchemaSimpleCostModel.cpp    |   4 +-
 query_optimizer/physical/HashJoin.hpp           |   4 +-
 query_optimizer/physical/Selection.cpp          |  15 ++-
 query_optimizer/physical/Selection.hpp          |   2 +
 query_optimizer/rules/CMakeLists.txt            |   3 +-
 query_optimizer/rules/FuseJoinSelect.cpp        |   9 +-
 relational_operators/CMakeLists.txt             |   1 +
 relational_operators/HashJoinOperator.cpp       | 103 +++++++++++++------
 relational_operators/HashJoinOperator.hpp       |   8 ++
 storage/AggregationOperationState.cpp           |   3 +-
 storage/CMakeLists.txt                          |   1 +
 storage/HashTable.hpp                           |   8 +-
 storage/StorageBlock.cpp                        |   6 +-
 storage/StorageManager.hpp                      |  14 +--
 utility/EventProfiler.cpp                       |   2 +-
 utility/EventProfiler.hpp                       |   2 +-
 utility/PlanVisualizer.cpp                      |   4 +
 19 files changed, 127 insertions(+), 66 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/expressions/scalar/ScalarAttribute.cpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarAttribute.cpp b/expressions/scalar/ScalarAttribute.cpp
index 08dc9dd..1a54d16 100644
--- a/expressions/scalar/ScalarAttribute.cpp
+++ b/expressions/scalar/ScalarAttribute.cpp
@@ -143,7 +143,7 @@ ColumnVector* ScalarAttribute::getAllValuesForJoin(
   ValueAccessor *accessor = using_left_relation ? left_accessor
                                                 : right_accessor;
 
-  return InvokeOnValueAccessorNotAdapter(
+  return InvokeOnAnyValueAccessor(
       accessor,
       [&joined_tuple_ids,
        &attr_id,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 457366e..8487fe7 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -694,7 +694,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
 
   // Convert the left filter predicate proto.
   QueryContext::predicate_id left_filter_predicate_index = QueryContext::kInvalidPredicateId;
-  if (physical_plan->residual_predicate()) {
+  if (physical_plan->left_filter_predicate()) {
     left_filter_predicate_index = query_context_proto_->predicates_size();
     unique_ptr<const Predicate> left_filter_predicate(convertPredicate(physical_plan->left_filter_predicate()));
     query_context_proto_->add_predicates()->CopyFrom(left_filter_predicate->getProto());

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index ba7a3c6..d0205b9 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -253,11 +253,11 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
           double unit_selectivity = 1.0 / it->second;
           return comparison_expression->isEqualityComparisonPredicate()
                      ? unit_selectivity
-                     : 0.5;
+                     : 0.1;
         }
       }
 
-      return comparison_expression->isEqualityComparisonPredicate() ? 0.1 : 0.5;
+      return 0.1;
     }
     case E::ExpressionType::kLogicalAnd: {
       const E::LogicalAndPtr &logical_and =

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/query_optimizer/physical/HashJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.hpp b/query_optimizer/physical/HashJoin.hpp
index e24dbeb..32b4f21 100644
--- a/query_optimizer/physical/HashJoin.hpp
+++ b/query_optimizer/physical/HashJoin.hpp
@@ -121,8 +121,8 @@ class HashJoin : public BinaryJoin {
                   residual_predicate_,
                   project_expressions(),
                   join_type_,
-                  bloom_filter_config_,
-                  left_filter_predicate_);
+                  left_filter_predicate_,
+                  bloom_filter_config_);
   }
 
   std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/query_optimizer/physical/Selection.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/Selection.cpp b/query_optimizer/physical/Selection.cpp
index 73af500..22323f4 100644
--- a/query_optimizer/physical/Selection.cpp
+++ b/query_optimizer/physical/Selection.cpp
@@ -20,6 +20,7 @@
 #include "query_optimizer/physical/Selection.hpp"
 
 #include <string>
+#include <unordered_set>
 #include <vector>
 
 #include "query_optimizer/OptimizerTree.hpp"
@@ -36,6 +37,19 @@ namespace physical {
 
 namespace E = ::quickstep::optimizer::expressions;
 
+bool Selection::isSimpleSelection() const {
+  std::unordered_set<E::ExprId> input_attr_ids;
+  for (const auto &attr : input()->getOutputAttributes()) {
+    input_attr_ids.emplace(attr->id());
+  }
+  for (const auto &attr : getOutputAttributes()) {
+    if (input_attr_ids.find(attr->id()) == input_attr_ids.end()) {
+      return false;
+    }
+  }
+  return true;
+}
+
 PhysicalPtr Selection::copyWithNewChildren(
     const std::vector<PhysicalPtr> &new_children) const {
   DCHECK_EQ(children().size(), new_children.size());
@@ -87,7 +101,6 @@ bool Selection::impliesUniqueAttributes(
   return input()->impliesUniqueAttributes(attributes);
 }
 
-
 void Selection::getFieldStringItems(
     std::vector<std::string> *inline_field_names,
     std::vector<std::string> *inline_field_values,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/query_optimizer/physical/Selection.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/Selection.hpp b/query_optimizer/physical/Selection.hpp
index bb50314..bc0f072 100644
--- a/query_optimizer/physical/Selection.hpp
+++ b/query_optimizer/physical/Selection.hpp
@@ -75,6 +75,8 @@ class Selection : public Physical {
    */
   inline const PhysicalPtr& input() const { return children()[0]; }
 
+  bool isSimpleSelection() const;
+
   PhysicalPtr copyWithNewChildren(
       const std::vector<PhysicalPtr> &new_children) const override;
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 9990a4d..b675bc6 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -74,7 +74,8 @@ target_link_libraries(quickstep_queryoptimizer_rules_FuseJoinSelect
                       quickstep_queryoptimizer_physical_Selection
                       quickstep_queryoptimizer_physical_TableReference
                       quickstep_queryoptimizer_rules_BottomUpRule
-                      quickstep_queryoptimizer_rules_Rule)
+                      quickstep_queryoptimizer_rules_Rule
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_rules_GenerateJoins
                       glog
                       quickstep_queryoptimizer_expressions_AttributeReference

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/query_optimizer/rules/FuseJoinSelect.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/FuseJoinSelect.cpp b/query_optimizer/rules/FuseJoinSelect.cpp
index e40acfc..87dbc95 100644
--- a/query_optimizer/rules/FuseJoinSelect.cpp
+++ b/query_optimizer/rules/FuseJoinSelect.cpp
@@ -16,21 +16,18 @@ namespace E = ::quickstep::optimizer::expressions;
 P::PhysicalPtr FuseJoinSelect::applyToNode(const P::PhysicalPtr &input) {
   P::HashJoinPtr hash_join;
   P::SelectionPtr selection;
-  P::TableReferencePtr table_reference;
 
   if (P::SomeHashJoin::MatchesWithConditionalCast(input, &hash_join)
-      && hash_join->join_type() == P::HashJoin::JoinType::kInnerJoin
       && P::SomeSelection::MatchesWithConditionalCast(hash_join->left(), &selection)
-      && P::SomeTableReference::MatchesWithConditionalCast(selection->input(), &table_reference)) {
-    const E::PredicatePtr filter_predicate = selection->filter_predicate();
-    P::PhysicalPtr output = P::HashJoin::Create(table_reference,
+      && selection->isSimpleSelection()) {
+    P::PhysicalPtr output = P::HashJoin::Create(selection->input(),
                                                 hash_join->right(),
                                                 hash_join->left_join_attributes(),
                                                 hash_join->right_join_attributes(),
                                                 hash_join->residual_predicate(),
                                                 hash_join->project_expressions(),
                                                 hash_join->join_type(),
-                                                filter_predicate);
+                                                selection->filter_predicate());
     LOG_APPLYING_RULE(input, output);
     return output;
   }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index 6d71794..af411ea 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -202,6 +202,7 @@ target_link_libraries(quickstep_relationaloperators_HashJoinOperator
                       quickstep_types_TypedValue
                       quickstep_types_containers_ColumnVector
                       quickstep_types_containers_ColumnVectorsValueAccessor
+                      quickstep_utility_EventProfiler
                       quickstep_utility_Macros
                       tmb)
 target_link_libraries(quickstep_relationaloperators_InsertOperator

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index 7357acd..98314b8 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -48,6 +48,7 @@
 #include "types/TypedValue.hpp"
 #include "types/containers/ColumnVector.hpp"
 #include "types/containers/ColumnVectorsValueAccessor.hpp"
+#include "utility/EventProfiler.hpp"
 
 #include "gflags/gflags.h"
 #include "glog/logging.h"
@@ -216,7 +217,8 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
                                      selection,
                                      hash_table,
                                      output_destination,
-                                     storage_manager),
+                                     storage_manager,
+                                     getOperatorIndex()),
               op_index_);
         }
         started_ = true;
@@ -237,7 +239,8 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
                 selection,
                 hash_table,
                 output_destination,
-                storage_manager),
+                storage_manager,
+                getOperatorIndex()),
             op_index_);
         ++num_workorders_generated_;
       }  // end while
@@ -437,13 +440,19 @@ void HashInnerJoinWorkOrder::execute() {
       storage_manager_->getBlock(block_id_, probe_relation_));
   const TupleStorageSubBlock &probe_store = probe_block->getTupleStorageSubBlock();
 
+  auto *container = simple_profiler.getContainer();
+  auto *left_filter_line = container->getEventLine("left_filter");
+  left_filter_line->emplace_back();
   std::unique_ptr<ValueAccessor> probe_accessor(
       probe_store.createValueAccessor(
           left_filter_predicate_ == nullptr
           ? nullptr
           : probe_block->getMatchesForPredicate(left_filter_predicate_)));
+  left_filter_line->back().endEvent();
+  left_filter_line->back().setPayload(getOperatorIndex(), 0);
 
-
+  auto *collector_line = container->getEventLine("collector");
+  collector_line->emplace_back();
   MapBasedJoinedTupleCollector collector;
   if (join_key_attributes_.size() == 1) {
     hash_table_.getAllFromValueAccessor(
@@ -458,16 +467,30 @@ void HashInnerJoinWorkOrder::execute() {
         any_join_key_attributes_nullable_,
         &collector);
   }
+  collector_line->back().endEvent();
+  collector_line->back().setPayload(getOperatorIndex(), 0);
 
   const relation_id build_relation_id = build_relation_.getID();
   const relation_id probe_relation_id = probe_relation_.getID();
 
+  auto *materialize_line = container->getEventLine("materialize");
+  auto *iterate_line = container->getEventLine("iterate_blocks");
+  auto *get_block_line = container->getEventLine("get_block");
+
+  materialize_line->emplace_back();
+  iterate_line->emplace_back();
   for (std::pair<const block_id, std::vector<std::pair<tuple_id, tuple_id>>>
            &build_block_entry : *collector.getJoinedTuples()) {
+    iterate_line->back().endEvent();
+    iterate_line->back().setPayload(getOperatorIndex(), build_block_entry.second.size());
+
+    get_block_line->emplace_back();
     BlockReference build_block =
         storage_manager_->getBlock(build_block_entry.first, build_relation_);
-    const TupleStorageSubBlock &build_store = build_block->getTupleStorageSubBlock();
-    std::unique_ptr<ValueAccessor> build_accessor(build_store.createValueAccessor());
+    get_block_line->back().endEvent();
+    get_block_line->back().setPayload(getOperatorIndex(), 0);
+//    const TupleStorageSubBlock &build_store = build_block->getTupleStorageSubBlock();
+//    std::unique_ptr<ValueAccessor> build_accessor(build_store.createValueAccessor());
 
     // Evaluate '*residual_predicate_', if any.
     //
@@ -479,23 +502,23 @@ void HashInnerJoinWorkOrder::execute() {
     // vectorized materialization and evaluation if the set of matches from the
     // hash join is below a reasonable threshold so that we don't blow up
     // temporary memory requirements to an unreasonable degree.
-    if (residual_predicate_ != nullptr) {
-      std::vector<std::pair<tuple_id, tuple_id>> filtered_matches;
-
-      for (const std::pair<tuple_id, tuple_id> &hash_match
-           : build_block_entry.second) {
-        if (residual_predicate_->matchesForJoinedTuples(*build_accessor,
-                                                        build_relation_id,
-                                                        hash_match.first,
-                                                        *probe_accessor,
-                                                        probe_relation_id,
-                                                        hash_match.second)) {
-          filtered_matches.emplace_back(hash_match);
-        }
-      }
-
-      build_block_entry.second = std::move(filtered_matches);
-    }
+//    if (residual_predicate_ != nullptr) {
+//      std::vector<std::pair<tuple_id, tuple_id>> filtered_matches;
+//
+//      for (const std::pair<tuple_id, tuple_id> &hash_match
+//           : build_block_entry.second) {
+//        if (residual_predicate_->matchesForJoinedTuples(*build_accessor,
+//                                                        build_relation_id,
+//                                                        hash_match.first,
+//                                                        *probe_accessor,
+//                                                        probe_relation_id,
+//                                                        hash_match.second)) {
+//          filtered_matches.emplace_back(hash_match);
+//        }
+//      }
+//
+//      build_block_entry.second = std::move(filtered_matches);
+//    }
 
     // TODO(chasseur): If all the output expressions are ScalarAttributes,
     // we could implement a similar fast-path to StorageBlock::selectSimple()
@@ -513,22 +536,36 @@ void HashInnerJoinWorkOrder::execute() {
     // benefit (probably only a real performance win when there are very few
     // matching tuples in each individual inner block but very many inner
     // blocks with at least one match).
-    ColumnVectorsValueAccessor temp_result;
-    for (vector<unique_ptr<const Scalar>>::const_iterator selection_cit = selection_.begin();
-         selection_cit != selection_.end();
-         ++selection_cit) {
-      temp_result.addColumn((*selection_cit)->getAllValuesForJoin(build_relation_id,
-                                                                  build_accessor.get(),
-                                                                  probe_relation_id,
-                                                                  probe_accessor.get(),
-                                                                  build_block_entry.second));
-    }
+//    ColumnVectorsValueAccessor temp_result;
+//    for (vector<unique_ptr<const Scalar>>::const_iterator selection_cit = selection_.begin();
+//         selection_cit != selection_.end();
+//         ++selection_cit) {
+//      temp_result.addColumn((*selection_cit)->getAllValuesForJoin(build_relation_id,
+//                                                                  build_accessor.get(),
+//                                                                  probe_relation_id,
+//                                                                  probe_accessor.get(),
+//                                                                  build_block_entry.second));
+//    }
 
     // NOTE(chasseur): calling the bulk-insert method of InsertDestination once
     // for each pair of joined blocks incurs some extra overhead that could be
     // avoided by keeping checked-out MutableBlockReferences across iterations
     // of this loop, but that would get messy when combined with partitioning.
-    output_destination_->bulkInsertTuples(&temp_result);
+//    output_destination_->bulkInsertTuples(&temp_result);
+
+    iterate_line->emplace_back();
+  }
+  iterate_line->back().endEvent();
+  iterate_line->back().setPayload(getOperatorIndex(), 0);
+  materialize_line->back().endEvent();
+  materialize_line->back().setPayload(getOperatorIndex(), collector.getJoinedTuples()->size());
+
+  if (build_relation_id == 0 &&
+      probe_relation_id == 0 &&
+      residual_predicate_ == nullptr &&
+      output_destination_ == nullptr &&
+      selection_.empty()) {
+    return;
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index 05e16a4..4f53daa 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -511,6 +511,10 @@ class HashSemiJoinWorkOrder : public WorkOrder {
 
   void execute() override;
 
+  const Predicate *left_filter_predicate() const {
+    return left_filter_predicate_;
+  }
+
  private:
   void executeWithoutResidualPredicate();
 
@@ -645,6 +649,10 @@ class HashAntiJoinWorkOrder : public WorkOrder {
     }
   }
 
+  const Predicate *left_filter_predicate() const {
+    return left_filter_predicate_;
+  }
+
  private:
   void executeWithoutResidualPredicate();
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/storage/AggregationOperationState.cpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.cpp b/storage/AggregationOperationState.cpp
index d85b5c4..1e8ddfd 100644
--- a/storage/AggregationOperationState.cpp
+++ b/storage/AggregationOperationState.cpp
@@ -460,7 +460,7 @@ void AggregationOperationState::aggregateBlockHashTable(const block_id input_blo
             batch_size_try < num_tuples_left ? batch_size_try : num_tuples_left;
         for (std::size_t i = 0; i < batch_size; ++i) {
           accessor->next();
-          batch.push_back(accessor->getCurrentPosition());
+          batch[i] = accessor->getCurrentPosition();
         }
 
         std::size_t num_hits =
@@ -469,7 +469,6 @@ void AggregationOperationState::aggregateBlockHashTable(const block_id input_blo
           filtered->set(batch[t], true);
         }
 
-        batch.clear();
         num_tuples_left -= batch_size;
         batch_size_try = batch_size * 2;
       } while (num_tuples_left > 0);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/storage/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt
index 11e7f40..fad66ef 100644
--- a/storage/CMakeLists.txt
+++ b/storage/CMakeLists.txt
@@ -994,6 +994,7 @@ target_link_libraries(quickstep_storage_StorageManager
                       quickstep_threading_SpinSharedMutex
                       quickstep_utility_Alignment
                       quickstep_utility_CalculateInstalledMemory
+                      quickstep_utility_EventProfiler
                       quickstep_utility_Macros
                       quickstep_utility_ShardedLockManager
                       tmb)

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index 9c50fc7..82205a1 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -2314,7 +2314,7 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_
             batch_size_try < num_tuples_left ? batch_size_try : num_tuples_left;
         for (std::size_t i = 0; i < batch_size; ++i) {
           accessor->next();
-          batch.push_back(accessor->getCurrentPosition());
+          batch[i] = accessor->getCurrentPosition();
         }
 
         std::size_t num_hits =
@@ -2338,13 +2338,11 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_
               break;
           }
         }
-        batch.clear();
+
         num_tuples_left -= batch_size;
         batch_size_try = batch_size * 2;
       } while (!accessor->iterationFinished());
-    }
-
-    else { // no Bloom filters to probe
+    } else { // no Bloom filters to probe
       while(accessor->next()) {
         TypedValue key = accessor->getTypedValue(key_attr_id);
         if (check_for_null_keys && key.isNull()) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/storage/StorageBlock.cpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.cpp b/storage/StorageBlock.cpp
index a115af9..4dc1fc7 100644
--- a/storage/StorageBlock.cpp
+++ b/storage/StorageBlock.cpp
@@ -389,7 +389,7 @@ void StorageBlock::select(const vector<unique_ptr<const Scalar>> &selection,
               batch_size_try < num_tuples_left ? batch_size_try : num_tuples_left;
           for (std::size_t i = 0; i < batch_size; ++i) {
             accessor->next();
-            batch.push_back(accessor->getCurrentPosition());
+            batch[i] = accessor->getCurrentPosition();
           }
 
           std::size_t num_hits =
@@ -398,7 +398,6 @@ void StorageBlock::select(const vector<unique_ptr<const Scalar>> &selection,
             matches->set(batch[t], true);
           }
 
-          batch.clear();
           num_tuples_left -= batch_size;
           batch_size_try = batch_size * 2;
         } while (num_tuples_left > 0);
@@ -465,7 +464,7 @@ void StorageBlock::selectSimple(const std::vector<attribute_id> &selection,
             batch_size_try < num_tuples_left ? batch_size_try : num_tuples_left;
         for (std::size_t i = 0; i < batch_size; ++i) {
           accessor->next();
-          batch.push_back(accessor->getCurrentPosition());
+          batch[i] = accessor->getCurrentPosition();
         }
 
         std::size_t num_hits =
@@ -474,7 +473,6 @@ void StorageBlock::selectSimple(const std::vector<attribute_id> &selection,
           matches->set(batch[t], true);
         }
 
-        batch.clear();
         num_tuples_left -= batch_size;
         batch_size_try = batch_size * 2;
       } while (num_tuples_left > 0);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/storage/StorageManager.hpp
----------------------------------------------------------------------
diff --git a/storage/StorageManager.hpp b/storage/StorageManager.hpp
index 066953b..da9a1d2 100644
--- a/storage/StorageManager.hpp
+++ b/storage/StorageManager.hpp
@@ -96,9 +96,10 @@ class StorageManager {
       : StorageManager(storage_path,
                        FLAGS_block_domain,
                        FLAGS_buffer_pool_slots,
-                       LRUKEvictionPolicyFactory::ConstructLRUKEvictionPolicy(
-                           2,
-                           std::chrono::milliseconds(200))) {
+//                       LRUKEvictionPolicyFactory::ConstructLRUKEvictionPolicy(
+//                           2,
+//                           std::chrono::milliseconds(200))) {
+                       new EvictAnyBlockEvictionPolicy()) {
   }
 
   /**
@@ -122,9 +123,10 @@ class StorageManager {
       : StorageManager(storage_path,
                        FLAGS_block_domain,
                        max_memory_usage,
-                       LRUKEvictionPolicyFactory::ConstructLRUKEvictionPolicy(
-                           2,
-                           std::chrono::milliseconds(200))) {
+//                       LRUKEvictionPolicyFactory::ConstructLRUKEvictionPolicy(
+//                           2,
+//                           std::chrono::milliseconds(200))) {
+                       new EvictAnyBlockEvictionPolicy()) {
   }
 
 #ifdef QUICKSTEP_DISTRIBUTED

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/utility/EventProfiler.cpp
----------------------------------------------------------------------
diff --git a/utility/EventProfiler.cpp b/utility/EventProfiler.cpp
index e345993..9ab917b 100644
--- a/utility/EventProfiler.cpp
+++ b/utility/EventProfiler.cpp
@@ -23,7 +23,7 @@
 
 namespace quickstep {
 
-EventProfiler<int, std::size_t, std::size_t> simple_profiler;
+EventProfiler<std::string, std::size_t, std::size_t> simple_profiler;
 EventProfiler<std::size_t> relop_profiler;
 
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/utility/EventProfiler.hpp
----------------------------------------------------------------------
diff --git a/utility/EventProfiler.hpp b/utility/EventProfiler.hpp
index f7fa598..1cbd830 100644
--- a/utility/EventProfiler.hpp
+++ b/utility/EventProfiler.hpp
@@ -178,7 +178,7 @@ class EventProfiler {
   Mutex mutex_;
 };
 
-extern EventProfiler<int, std::size_t, std::size_t> simple_profiler;
+extern EventProfiler<std::string, std::size_t, std::size_t> simple_profiler;
 extern EventProfiler<std::size_t> relop_profiler;
 
 /** @} */

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9a421dd1/utility/PlanVisualizer.cpp
----------------------------------------------------------------------
diff --git a/utility/PlanVisualizer.cpp b/utility/PlanVisualizer.cpp
index e4df69c..0e662b9 100644
--- a/utility/PlanVisualizer.cpp
+++ b/utility/PlanVisualizer.cpp
@@ -189,6 +189,10 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
         node_info.labels.emplace_back("RIGHT join attrs unique");
       }
 
+      if (hash_join->left_filter_predicate()) {
+        node_info.labels.emplace_back("has left filter predicate");
+      }
+
       const auto &bf_config = hash_join->bloom_filter_config();
       for (const auto &bf : bf_config.build_side_bloom_filters) {
         node_info.labels.emplace_back(


[2/2] incubator-quickstep git commit: Initial commit.

Posted by ji...@apache.org.
Initial commit.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/b163289c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/b163289c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/b163289c

Branch: refs/heads/LIP-for-tpch-merged
Commit: b163289c3bfb437a9fc9b62e85b07094c0357304
Parents: 1111ec5
Author: Hakan Memisoglu <ha...@gmail.com>
Authored: Tue Aug 16 16:40:27 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Sun Aug 21 12:03:30 2016 -0500

----------------------------------------------------------------------
 query_optimizer/CMakeLists.txt               |  1 +
 query_optimizer/ExecutionGenerator.cpp       |  9 +++++
 query_optimizer/PhysicalGenerator.cpp        |  3 ++
 query_optimizer/physical/HashJoin.cpp        |  5 +++
 query_optimizer/physical/HashJoin.hpp        | 13 ++++++-
 query_optimizer/rules/AttachBloomFilters.cpp |  1 +
 query_optimizer/rules/CMakeLists.txt         | 11 ++++++
 query_optimizer/rules/FuseJoinSelect.cpp     | 43 +++++++++++++++++++++++
 query_optimizer/rules/FuseJoinSelect.hpp     | 33 +++++++++++++++++
 relational_operators/HashJoinOperator.cpp    | 13 ++++++-
 relational_operators/HashJoinOperator.hpp    | 12 +++++++
 relational_operators/WorkOrder.proto         |  1 +
 12 files changed, 143 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b163289c/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 7440151..849caaa 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -201,6 +201,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
                       quickstep_queryoptimizer_logical_Logical
                       quickstep_queryoptimizer_physical_Physical
                       quickstep_queryoptimizer_rules_AttachBloomFilters
+                      quickstep_queryoptimizer_rules_FuseJoinSelect
                       quickstep_queryoptimizer_rules_PruneColumns
                       quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
                       quickstep_queryoptimizer_rules_SwapProbeBuild

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b163289c/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index f8559ec..457366e 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -692,6 +692,14 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
     query_context_proto_->add_predicates()->CopyFrom(residual_predicate->getProto());
   }
 
+  // Convert the left filter predicate proto.
+  QueryContext::predicate_id left_filter_predicate_index = QueryContext::kInvalidPredicateId;
+  if (physical_plan->residual_predicate()) {
+    left_filter_predicate_index = query_context_proto_->predicates_size();
+    unique_ptr<const Predicate> left_filter_predicate(convertPredicate(physical_plan->left_filter_predicate()));
+    query_context_proto_->add_predicates()->CopyFrom(left_filter_predicate->getProto());
+  }
+
   // Convert the project expressions proto.
   const QueryContext::scalar_group_id project_expressions_group_index =
       query_context_proto_->scalar_groups_size();
@@ -796,6 +804,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
               insert_destination_index,
               join_hash_table_index,
               residual_predicate_index,
+              left_filter_predicate_index,
               project_expressions_group_index,
               is_selection_on_build.get(),
               join_type));

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b163289c/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 9ee685d..e093272 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -27,6 +27,7 @@
 #include "query_optimizer/logical/Logical.hpp"
 #include "query_optimizer/physical/Physical.hpp"
 #include "query_optimizer/rules/AttachBloomFilters.hpp"
+#include "query_optimizer/rules/FuseJoinSelect.hpp"
 #include "query_optimizer/rules/PruneColumns.hpp"
 #include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
 #include "query_optimizer/rules/SwapProbeBuild.hpp"
@@ -102,6 +103,8 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
   }
   rules.emplace_back(new PruneColumns());
   // rules.emplace_back(new SwapProbeBuild());
+  rules.emplace_back(new FuseJoinSelect());
+  rules.emplace_back(new PruneColumns());
   rules.emplace_back(new AttachBloomFilters());
 
   for (std::unique_ptr<Rule<P::Physical>> &rule : rules) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b163289c/query_optimizer/physical/HashJoin.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.cpp b/query_optimizer/physical/HashJoin.cpp
index 883c87a..dc564a7 100644
--- a/query_optimizer/physical/HashJoin.cpp
+++ b/query_optimizer/physical/HashJoin.cpp
@@ -111,6 +111,11 @@ void HashJoin::getFieldStringItems(
     non_container_child_field_names->push_back("residual_predicate");
     non_container_child_fields->push_back(residual_predicate_);
   }
+  if (left_filter_predicate_ != nullptr) {
+    non_container_child_field_names->push_back("left_filter_predicate");
+    non_container_child_fields->push_back(left_filter_predicate_);
+  }
+
   container_child_field_names->push_back("left_join_attributes");
   container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(left_join_attributes_));
   container_child_field_names->push_back("right_join_attributes");

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b163289c/query_optimizer/physical/HashJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.hpp b/query_optimizer/physical/HashJoin.hpp
index a830d0b..e24dbeb 100644
--- a/query_optimizer/physical/HashJoin.hpp
+++ b/query_optimizer/physical/HashJoin.hpp
@@ -107,6 +107,10 @@ class HashJoin : public BinaryJoin {
     return join_type_;
   }
 
+  const expressions::PredicatePtr& left_filter_predicate() const {
+    return left_filter_predicate_;
+  }
+
   PhysicalPtr copyWithNewChildren(
       const std::vector<PhysicalPtr> &new_children) const override {
     DCHECK_EQ(children().size(), new_children.size());
@@ -117,7 +121,8 @@ class HashJoin : public BinaryJoin {
                   residual_predicate_,
                   project_expressions(),
                   join_type_,
-                  bloom_filter_config_);
+                  bloom_filter_config_,
+                  left_filter_predicate_);
   }
 
   std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override;
@@ -144,6 +149,7 @@ class HashJoin : public BinaryJoin {
    * @param residual_predicate Optional filtering predicate evaluated after join.
    * @param project_expressions The project expressions.
    * @param Join type of this hash join.
+   * @param left_filter_predicate Optional filtering predicate for probe side before join.
    * @return An immutable physical HashJoin.
    */
   static HashJoinPtr Create(
@@ -154,6 +160,7 @@ class HashJoin : public BinaryJoin {
       const expressions::PredicatePtr &residual_predicate,
       const std::vector<expressions::NamedExpressionPtr> &project_expressions,
       const JoinType join_type,
+      const expressions::PredicatePtr &left_filter_predicate = nullptr,
       const BloomFilterConfig bloom_filter_config = BloomFilterConfig()) {
     return HashJoinPtr(
         new HashJoin(left,
@@ -163,6 +170,7 @@ class HashJoin : public BinaryJoin {
                      residual_predicate,
                      project_expressions,
                      join_type,
+                     left_filter_predicate,
                      bloom_filter_config));
   }
 
@@ -184,12 +192,14 @@ class HashJoin : public BinaryJoin {
       const expressions::PredicatePtr &residual_predicate,
       const std::vector<expressions::NamedExpressionPtr> &project_expressions,
       const JoinType join_type,
+      const expressions::PredicatePtr &left_filter_predicate,
       const BloomFilterConfig &bloom_filter_config)
       : BinaryJoin(left, right, project_expressions),
         left_join_attributes_(left_join_attributes),
         right_join_attributes_(right_join_attributes),
         residual_predicate_(residual_predicate),
         join_type_(join_type),
+        left_filter_predicate_(left_filter_predicate),
         bloom_filter_config_(bloom_filter_config) {
   }
 
@@ -197,6 +207,7 @@ class HashJoin : public BinaryJoin {
   std::vector<expressions::AttributeReferencePtr> right_join_attributes_;
   expressions::PredicatePtr residual_predicate_;
   JoinType join_type_;
+  expressions::PredicatePtr left_filter_predicate_;
   BloomFilterConfig bloom_filter_config_;
 
   DISALLOW_COPY_AND_ASSIGN(HashJoin);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b163289c/query_optimizer/rules/AttachBloomFilters.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachBloomFilters.cpp b/query_optimizer/rules/AttachBloomFilters.cpp
index 898d831..f86ba60 100644
--- a/query_optimizer/rules/AttachBloomFilters.cpp
+++ b/query_optimizer/rules/AttachBloomFilters.cpp
@@ -338,6 +338,7 @@ P::PhysicalPtr AttachBloomFilters::performAttach(const physical::PhysicalPtr &no
           hash_join->residual_predicate(),
           hash_join->project_expressions(),
           hash_join->join_type(),
+          hash_join->left_filter_predicate(),
           attach_it->second);
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b163289c/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 54b1e59..9990a4d 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -21,6 +21,7 @@ add_subdirectory(tests)
 add_library(quickstep_queryoptimizer_rules_AttachBloomFilters AttachBloomFilters.cpp AttachBloomFilters.hpp)
 add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp)
 add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp)
+add_library(quickstep_queryoptimizer_rules_FuseJoinSelect FuseJoinSelect.cpp FuseJoinSelect.hpp)
 add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
 add_library(quickstep_queryoptimizer_rules_PruneColumns PruneColumns.cpp PruneColumns.hpp)
 add_library(quickstep_queryoptimizer_rules_PushDownFilter PushDownFilter.cpp PushDownFilter.hpp)
@@ -65,6 +66,15 @@ target_link_libraries(quickstep_queryoptimizer_rules_CollapseProject
                       quickstep_queryoptimizer_rules_Rule
                       quickstep_queryoptimizer_rules_RuleHelper
                       quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_FuseJoinSelect
+                      quickstep_queryoptimizer_expressions_Predicate
+                      quickstep_queryoptimizer_physical_HashJoin
+                      quickstep_queryoptimizer_physical_PatternMatcher
+                      quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_physical_Selection
+                      quickstep_queryoptimizer_physical_TableReference
+                      quickstep_queryoptimizer_rules_BottomUpRule
+                      quickstep_queryoptimizer_rules_Rule)
 target_link_libraries(quickstep_queryoptimizer_rules_GenerateJoins
                       glog
                       quickstep_queryoptimizer_expressions_AttributeReference
@@ -206,6 +216,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
                       quickstep_queryoptimizer_rules_AttachBloomFilters
                       quickstep_queryoptimizer_rules_BottomUpRule
                       quickstep_queryoptimizer_rules_CollapseProject
+                      quickstep_queryoptimizer_rules_FuseJoinSelect
                       quickstep_queryoptimizer_rules_GenerateJoins
                       quickstep_queryoptimizer_rules_PruneColumns
                       quickstep_queryoptimizer_rules_PushDownFilter

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b163289c/query_optimizer/rules/FuseJoinSelect.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/FuseJoinSelect.cpp b/query_optimizer/rules/FuseJoinSelect.cpp
new file mode 100644
index 0000000..e40acfc
--- /dev/null
+++ b/query_optimizer/rules/FuseJoinSelect.cpp
@@ -0,0 +1,43 @@
+#include "query_optimizer/rules/FuseJoinSelect.hpp"
+
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace P = ::quickstep::optimizer::physical;
+namespace E = ::quickstep::optimizer::expressions;
+
+P::PhysicalPtr FuseJoinSelect::applyToNode(const P::PhysicalPtr &input) {
+  P::HashJoinPtr hash_join;
+  P::SelectionPtr selection;
+  P::TableReferencePtr table_reference;
+
+  if (P::SomeHashJoin::MatchesWithConditionalCast(input, &hash_join)
+      && hash_join->join_type() == P::HashJoin::JoinType::kInnerJoin
+      && P::SomeSelection::MatchesWithConditionalCast(hash_join->left(), &selection)
+      && P::SomeTableReference::MatchesWithConditionalCast(selection->input(), &table_reference)) {
+    const E::PredicatePtr filter_predicate = selection->filter_predicate();
+    P::PhysicalPtr output = P::HashJoin::Create(table_reference,
+                                                hash_join->right(),
+                                                hash_join->left_join_attributes(),
+                                                hash_join->right_join_attributes(),
+                                                hash_join->residual_predicate(),
+                                                hash_join->project_expressions(),
+                                                hash_join->join_type(),
+                                                filter_predicate);
+    LOG_APPLYING_RULE(input, output);
+    return output;
+  }
+
+  LOG_IGNORING_RULE(input);
+  return input;
+}
+
+}
+}

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b163289c/query_optimizer/rules/FuseJoinSelect.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/FuseJoinSelect.hpp b/query_optimizer/rules/FuseJoinSelect.hpp
new file mode 100644
index 0000000..24ac08b
--- /dev/null
+++ b/query_optimizer/rules/FuseJoinSelect.hpp
@@ -0,0 +1,33 @@
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_FUSE_JOIN_SELECT_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_FUSE_JOIN_SELECT_HPP_
+
+#include <string>
+
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "query_optimizer/rules/BottomUpRule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace P = ::quickstep::optimizer::physical;
+
+class FuseJoinSelect : public BottomUpRule<P::Physical> {
+ public:
+  FuseJoinSelect() {
+  }
+
+  std::string getName() const override { return "FuseJoinSelect"; }
+
+ protected:
+  P::PhysicalPtr applyToNode(const P::PhysicalPtr &input) override;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FuseJoinSelect);
+};
+
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b163289c/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index 9b573ac..7357acd 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -192,6 +192,8 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
 
     const Predicate *residual_predicate =
         query_context->getPredicate(residual_predicate_index_);
+    const Predicate *left_filter_predicate =
+        query_context->getPredicate(left_filter_predicate_index_);
     const vector<unique_ptr<const Scalar>> &selection =
         query_context->getScalarGroup(selection_index_);
     InsertDestination *output_destination =
@@ -210,6 +212,7 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
                                      any_join_key_attributes_nullable_,
                                      probe_block_id,
                                      residual_predicate,
+                                     left_filter_predicate,
                                      selection,
                                      hash_table,
                                      output_destination,
@@ -230,6 +233,7 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
                 any_join_key_attributes_nullable_,
                 probe_relation_block_ids_[num_workorders_generated_],
                 residual_predicate,
+                left_filter_predicate,
                 selection,
                 hash_table,
                 output_destination,
@@ -370,6 +374,7 @@ serialization::WorkOrder* HashJoinOperator::createNonOuterJoinWorkOrderProto(
   proto->SetExtension(serialization::HashJoinWorkOrder::selection_index, selection_index_);
   proto->SetExtension(serialization::HashJoinWorkOrder::block_id, block);
   proto->SetExtension(serialization::HashJoinWorkOrder::residual_predicate_index, residual_predicate_index_);
+  proto->SetExtension(serialization::HashJoinWorkOrder::left_filter_predicate_index, left_filter_predicate_index_);
 
   return proto;
 }
@@ -432,7 +437,13 @@ void HashInnerJoinWorkOrder::execute() {
       storage_manager_->getBlock(block_id_, probe_relation_));
   const TupleStorageSubBlock &probe_store = probe_block->getTupleStorageSubBlock();
 
-  std::unique_ptr<ValueAccessor> probe_accessor(probe_store.createValueAccessor());
+  std::unique_ptr<ValueAccessor> probe_accessor(
+      probe_store.createValueAccessor(
+          left_filter_predicate_ == nullptr
+          ? nullptr
+          : probe_block->getMatchesForPredicate(left_filter_predicate_)));
+
+
   MapBasedJoinedTupleCollector collector;
   if (join_key_attributes_.size() == 1) {
     hash_table_.getAllFromValueAccessor(

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b163289c/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index 30571a1..05e16a4 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -128,6 +128,7 @@ class HashJoinOperator : public RelationalOperator {
       const QueryContext::insert_destination_id output_destination_index,
       const QueryContext::join_hash_table_id hash_table_index,
       const QueryContext::predicate_id residual_predicate_index,
+      const QueryContext::predicate_id left_filter_predicate_index,
       const QueryContext::scalar_group_id selection_index,
       const std::vector<bool> *is_selection_on_build = nullptr,
       const JoinType join_type = JoinType::kInnerJoin)
@@ -141,6 +142,7 @@ class HashJoinOperator : public RelationalOperator {
         output_destination_index_(output_destination_index),
         hash_table_index_(hash_table_index),
         residual_predicate_index_(residual_predicate_index),
+        left_filter_predicate_index_(left_filter_predicate_index),
         selection_index_(selection_index),
         is_selection_on_build_(is_selection_on_build == nullptr
                                    ? std::vector<bool>()
@@ -256,6 +258,7 @@ class HashJoinOperator : public RelationalOperator {
   const QueryContext::insert_destination_id output_destination_index_;
   const QueryContext::join_hash_table_id hash_table_index_;
   const QueryContext::predicate_id residual_predicate_index_;
+  const QueryContext::predicate_id left_filter_predicate_index_;
   const QueryContext::scalar_group_id selection_index_;
   const std::vector<bool> is_selection_on_build_;
   const JoinType join_type_;
@@ -304,6 +307,7 @@ class HashInnerJoinWorkOrder : public WorkOrder {
       const bool any_join_key_attributes_nullable,
       const block_id lookup_block_id,
       const Predicate *residual_predicate,
+      const Predicate *left_filter_predicate,
       const std::vector<std::unique_ptr<const Scalar>> &selection,
       const JoinHashTable &hash_table,
       InsertDestination *output_destination,
@@ -316,6 +320,7 @@ class HashInnerJoinWorkOrder : public WorkOrder {
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
         block_id_(lookup_block_id),
         residual_predicate_(residual_predicate),
+        left_filter_predicate_(left_filter_predicate),
         selection_(selection),
         hash_table_(hash_table),
         output_destination_(DCHECK_NOTNULL(output_destination)),
@@ -388,6 +393,7 @@ class HashInnerJoinWorkOrder : public WorkOrder {
   const bool any_join_key_attributes_nullable_;
   const block_id block_id_;
   const Predicate *residual_predicate_;
+  const Predicate *left_filter_predicate_;
   const std::vector<std::unique_ptr<const Scalar>> &selection_;
   const JoinHashTable &hash_table_;
 
@@ -434,6 +440,7 @@ class HashSemiJoinWorkOrder : public WorkOrder {
       const bool any_join_key_attributes_nullable,
       const block_id lookup_block_id,
       const Predicate *residual_predicate,
+      const Predicate *left_filter_predicate,
       const std::vector<std::unique_ptr<const Scalar>> &selection,
       const JoinHashTable &hash_table,
       InsertDestination *output_destination,
@@ -446,6 +453,7 @@ class HashSemiJoinWorkOrder : public WorkOrder {
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
         block_id_(lookup_block_id),
         residual_predicate_(residual_predicate),
+        left_filter_predicate_(left_filter_predicate),
         selection_(selection),
         hash_table_(hash_table),
         output_destination_(DCHECK_NOTNULL(output_destination)),
@@ -514,6 +522,7 @@ class HashSemiJoinWorkOrder : public WorkOrder {
   const bool any_join_key_attributes_nullable_;
   const block_id block_id_;
   const Predicate *residual_predicate_;
+  const Predicate *left_filter_predicate_;
   const std::vector<std::unique_ptr<const Scalar>> &selection_;
   const JoinHashTable &hash_table_;
 
@@ -560,6 +569,7 @@ class HashAntiJoinWorkOrder : public WorkOrder {
       const bool any_join_key_attributes_nullable,
       const block_id lookup_block_id,
       const Predicate *residual_predicate,
+      const Predicate *left_filter_predicate,
       const std::vector<std::unique_ptr<const Scalar>> &selection,
       const JoinHashTable &hash_table,
       InsertDestination *output_destination,
@@ -572,6 +582,7 @@ class HashAntiJoinWorkOrder : public WorkOrder {
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
         block_id_(lookup_block_id),
         residual_predicate_(residual_predicate),
+        left_filter_predicate_(left_filter_predicate),
         selection_(selection),
         hash_table_(hash_table),
         output_destination_(DCHECK_NOTNULL(output_destination)),
@@ -645,6 +656,7 @@ class HashAntiJoinWorkOrder : public WorkOrder {
   const bool any_join_key_attributes_nullable_;
   const block_id block_id_;
   const Predicate *residual_predicate_;
+  const Predicate *left_filter_predicate_;
   const std::vector<std::unique_ptr<const Scalar>> &selection_;
   const JoinHashTable &hash_table_;
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b163289c/relational_operators/WorkOrder.proto
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrder.proto b/relational_operators/WorkOrder.proto
index 02aa50e..4874450 100644
--- a/relational_operators/WorkOrder.proto
+++ b/relational_operators/WorkOrder.proto
@@ -128,6 +128,7 @@ message HashJoinWorkOrder {
 
     // Used by all but HashOuterJoinWorkOrder.
     optional int32 residual_predicate_index = 169;
+    optional int32 left_filter_predicate_index = 400;
     // Used by HashOuterJoinWorkOrder only.
     repeated bool is_selection_on_build = 170;
   }