You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/10/18 17:21:09 UTC

[01/12] incubator-quickstep git commit: IWYU fixes for QueryExecutionUtil. [Forced Update!]

Repository: incubator-quickstep
Updated Branches:
  refs/heads/lip-refactor 969b02f57 -> ca9c1790a (forced update)


IWYU fixes for QueryExecutionUtil.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/b5dcb6d2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/b5dcb6d2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/b5dcb6d2

Branch: refs/heads/lip-refactor
Commit: b5dcb6d29805dce944c11d5ad0720a3267ad57cf
Parents: 2e02333
Author: Zuyu Zhang <zu...@apache.org>
Authored: Sun Oct 9 15:48:03 2016 -0700
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Sun Oct 9 15:48:03 2016 -0700

----------------------------------------------------------------------
 query_execution/CMakeLists.txt         |  1 -
 query_execution/QueryExecutionUtil.hpp | 34 ++++++++++++++++-------------
 2 files changed, 19 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b5dcb6d2/query_execution/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_execution/CMakeLists.txt b/query_execution/CMakeLists.txt
index 1b27194..6a84be1 100644
--- a/query_execution/CMakeLists.txt
+++ b/query_execution/CMakeLists.txt
@@ -217,7 +217,6 @@ target_link_libraries(quickstep_queryexecution_QueryExecutionTypedefs
 target_link_libraries(quickstep_queryexecution_QueryExecutionUtil
                       quickstep_queryexecution_AdmitRequestMessage
                       quickstep_queryexecution_QueryExecutionTypedefs
-                      quickstep_queryexecution_WorkerMessage
                       quickstep_utility_Macros
                       tmb)
 target_link_libraries(quickstep_queryexecution_QueryManagerBase

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b5dcb6d2/query_execution/QueryExecutionUtil.hpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryExecutionUtil.hpp b/query_execution/QueryExecutionUtil.hpp
index feb4cc0..7a3a3b3 100644
--- a/query_execution/QueryExecutionUtil.hpp
+++ b/query_execution/QueryExecutionUtil.hpp
@@ -20,26 +20,30 @@
 #ifndef QUICKSTEP_QUERY_EXECUTION_QUERY_EXECUTION_UTIL_HPP_
 #define QUICKSTEP_QUERY_EXECUTION_QUERY_EXECUTION_UTIL_HPP_
 
+#include <cstddef>
 #include <memory>
 #include <utility>
 
 #include "query_execution/AdmitRequestMessage.hpp"
 #include "query_execution/QueryExecutionTypedefs.hpp"
-#include "query_execution/WorkerMessage.hpp"
 #include "utility/Macros.hpp"
 
 #include "glog/logging.h"
 
 #include "tmb/address.h"
 #include "tmb/id_typedefs.h"
-#include "tmb/message_style.h"
 #include "tmb/message_bus.h"
+#include "tmb/message_style.h"
 #include "tmb/tagged_message.h"
 
 namespace quickstep {
 
 class QueryHandle;
 
+/** \addtogroup QueryExecution
+ *  @{
+ */
+
 /**
  * @brief A static class for reusable methods in query_execution module.
  **/
@@ -57,14 +61,14 @@ class QueryExecutionUtil {
    *         The caller should ensure that the status is SendStatus::kOK.
    **/
   static tmb::MessageBus::SendStatus SendTMBMessage(
-      MessageBus *bus,
-      client_id sender_id,
-      client_id receiver_id,
-      TaggedMessage &&tagged_message) {  // NOLINT(whitespace/operators)
-    Address receiver_address;
+      tmb::MessageBus *bus,
+      tmb::client_id sender_id,
+      tmb::client_id receiver_id,
+      tmb::TaggedMessage &&tagged_message) {  // NOLINT(whitespace/operators)
+    tmb::Address receiver_address;
     receiver_address.AddRecipient(receiver_id);
 
-    MessageStyle single_receiver_style;
+    tmb::MessageStyle single_receiver_style;
     return bus->Send(sender_id,
                      receiver_address,
                      single_receiver_style,
@@ -88,11 +92,11 @@ class QueryExecutionUtil {
       const tmb::client_id sender_id,
       const tmb::client_id receiver_id,
       QueryHandle *query_handle,
-      MessageBus *bus) {
+      tmb::MessageBus *bus) {
     std::unique_ptr<AdmitRequestMessage> request_message(
         new AdmitRequestMessage(query_handle));
     const std::size_t size_of_request_msg = sizeof(*request_message);
-    TaggedMessage admit_tagged_message(
+    tmb::TaggedMessage admit_tagged_message(
         request_message.release(), size_of_request_msg, kAdmitRequestMessage);
 
     return QueryExecutionUtil::SendTMBMessage(
@@ -111,9 +115,9 @@ class QueryExecutionUtil {
    **/
   static void ReceiveQueryCompletionMessage(const tmb::client_id receiver_id,
                                             tmb::MessageBus *bus) {
-    const AnnotatedMessage annotated_msg =
+    const tmb::AnnotatedMessage annotated_msg =
         bus->Receive(receiver_id, 0, true);
-    const TaggedMessage &tagged_message = annotated_msg.tagged_message;
+    const tmb::TaggedMessage &tagged_message = annotated_msg.tagged_message;
     DCHECK_EQ(kWorkloadCompletionMessage, tagged_message.message_type());
   }
 
@@ -122,11 +126,11 @@ class QueryExecutionUtil {
     // The sender thread broadcasts poison message to the workers and foreman.
     // Each worker dies after receiving poison message. The order of workers'
     // death is irrelavant.
-    MessageStyle style;
+    tmb::MessageStyle style;
     style.Broadcast(true);
-    Address address;
+    tmb::Address address;
     address.All(true);
-    TaggedMessage poison_tagged_message(kPoisonMessage);
+    tmb::TaggedMessage poison_tagged_message(kPoisonMessage);
 
     DLOG(INFO) << "TMB client ID " << sender_id
                << " broadcast PoisonMessage (typed '" << kPoisonMessage << "') to all";


[04/12] incubator-quickstep git commit: Add unit test for PackedRowStore ColumnAccessor

Posted by ji...@apache.org.
Add unit test for PackedRowStore ColumnAccessor


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/262ad5a6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/262ad5a6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/262ad5a6

Branch: refs/heads/lip-refactor
Commit: 262ad5a6bf7bdff976a87d6858807bba8a1c0d8d
Parents: 743f6b0
Author: Saket Saurabh <ss...@cs.wisc.edu>
Authored: Thu Sep 22 16:44:19 2016 -0500
Committer: Saket Saurabh <ss...@cs.wisc.edu>
Committed: Tue Oct 11 11:36:57 2016 -0500

----------------------------------------------------------------------
 ...kedRowStoreTupleStorageSubBlock_unittest.cpp | 40 ++++++++++++++++++++
 1 file changed, 40 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/262ad5a6/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp
----------------------------------------------------------------------
diff --git a/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp b/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp
index 304fa07..a6f6606 100644
--- a/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp
+++ b/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp
@@ -37,6 +37,8 @@
 #include "storage/StorageErrors.hpp"
 #include "storage/TupleIdSequence.hpp"
 #include "storage/TupleStorageSubBlock.hpp"
+#include "storage/ValueAccessor.hpp"
+#include "storage/ValueAccessorUtil.hpp"
 #include "types/CharType.hpp"
 #include "types/DoubleType.hpp"
 #include "types/IntType.hpp"
@@ -248,6 +250,36 @@ class PackedRowStoreTupleStorageSubBlockTest : public ::testing::TestWithParam<b
                                                     tuple_store_->getAttributeValueTyped(tid, 2)));
     }
   }
+  
+  template<bool check_null>
+  void checkColumnAccessor() {
+    initializeNewBlock(kSubBlockSize);
+    fillBlockWithSampleData();
+    ASSERT_TRUE(tuple_store_->isPacked());
+    std::unique_ptr<ValueAccessor> accessor(tuple_store_->createValueAccessor());
+    attribute_id  value_accessor_id = 0;
+    tuple_id tid = 0;
+    InvokeOnAnyValueAccessor(accessor.get(),
+                             [&](auto *accessor) -> void {  // NOLINT(build/c++11)
+      accessor->beginIteration();
+      ASSERT_TRUE(accessor->isColumnAccessorSupported());
+      std::unique_ptr<const ColumnAccessor<check_null>>
+      column_accessor(accessor->template getColumnAccessor<check_null>(value_accessor_id));
+      ASSERT_TRUE(column_accessor != nullptr);
+      while (accessor->next()) {
+        const void *va_value = column_accessor->getUntypedValue();
+        std::unique_ptr<Tuple> expected_tuple(createSampleTuple(tid));
+         
+        if (expected_tuple->getAttributeValue(value_accessor_id).isNull()) {
+          ASSERT_TRUE(va_value == nullptr);
+        } else {
+          ASSERT_TRUE(eq_comp_int_->compareDataPtrs(expected_tuple->getAttributeValue(value_accessor_id).getDataPtr(),
+                                                    va_value));
+        }
+        ++tid;
+      }
+    });
+  }
 
   std::unique_ptr<CatalogRelation> relation_;
   ScopedBuffer tuple_store_memory_;
@@ -374,6 +406,14 @@ TEST_P(PackedRowStoreTupleStorageSubBlockTest, InsertInBatchTest) {
   EXPECT_EQ(row_capacity - 1, tuple_store_->getMaxTupleID());
   EXPECT_EQ(row_capacity, tuple_store_->numTuples());
 }
+  
+TEST_P(PackedRowStoreTupleStorageSubBlockTest, ColumnAccessorTest) {
+  if (GetParam()) { // when true, the attributes can be nullable.
+    checkColumnAccessor<true>();
+  } else { // when false, the attributes are non-null.
+    checkColumnAccessor<false>();
+  }
+}
 
 TEST_P(PackedRowStoreTupleStorageSubBlockTest, GetAttributeValueTest) {
   initializeNewBlock(kSubBlockSize);


[07/12] incubator-quickstep git commit: Fix testcase failure by explicit typecasting and avoiding macro usage

Posted by ji...@apache.org.
Fix testcase failure by explicit typecasting and avoiding macro usage


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/e4de2417
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/e4de2417
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/e4de2417

Branch: refs/heads/lip-refactor
Commit: e4de24178d085464af648d5914891ec548f09853
Parents: e845246
Author: Saket Saurabh <ss...@cs.wisc.edu>
Authored: Mon Oct 3 01:21:30 2016 -0500
Committer: Saket Saurabh <ss...@cs.wisc.edu>
Committed: Tue Oct 11 11:36:57 2016 -0500

----------------------------------------------------------------------
 ...kedRowStoreTupleStorageSubBlock_unittest.cpp | 38 ++++++++++----------
 1 file changed, 18 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/e4de2417/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp
----------------------------------------------------------------------
diff --git a/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp b/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp
index 924f9b1..bf3c605 100644
--- a/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp
+++ b/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp
@@ -256,29 +256,27 @@ class PackedRowStoreTupleStorageSubBlockTest : public ::testing::TestWithParam<b
     initializeNewBlock(kSubBlockSize);
     fillBlockWithSampleData();
     ASSERT_TRUE(tuple_store_->isPacked());
-    std::unique_ptr<ValueAccessor> accessor(tuple_store_->createValueAccessor());
+    std::unique_ptr<PackedRowStoreValueAccessor> accessor(
+      static_cast<PackedRowStoreValueAccessor*>(tuple_store_->createValueAccessor()));
     attribute_id  value_accessor_id = 0;
     tuple_id tid = 0;
-    InvokeOnAnyValueAccessor(accessor.get(),
-                             [&](auto *accessor) -> void {  // NOLINT(build/c++11)
-      accessor->beginIteration();
-      ASSERT_TRUE(accessor->isColumnAccessorSupported());
-      std::unique_ptr<const ColumnAccessor<check_null>>
-      column_accessor(accessor->template getColumnAccessor<check_null>(value_accessor_id));
-      ASSERT_TRUE(column_accessor != nullptr);
-      while (accessor->next()) {
-        const void *va_value = column_accessor->getUntypedValue();
-        std::unique_ptr<Tuple> expected_tuple(createSampleTuple(tid));
-
-        if (expected_tuple->getAttributeValue(value_accessor_id).isNull()) {
-          ASSERT_TRUE(va_value == nullptr);
-        } else {
-          ASSERT_TRUE(eq_comp_int_->compareDataPtrs(expected_tuple->getAttributeValue(value_accessor_id).getDataPtr(),
-                                                    va_value));
-        }
-        ++tid;
+    accessor->beginIteration();
+    ASSERT_TRUE(accessor->isColumnAccessorSupported());
+    std::unique_ptr<const ColumnAccessor<check_null>>
+    column_accessor(accessor->template getColumnAccessor<check_null>(value_accessor_id));
+    ASSERT_TRUE(column_accessor != nullptr);
+    while (accessor->next()) {
+      const void *va_value = column_accessor->getUntypedValue();
+      std::unique_ptr<Tuple> expected_tuple(createSampleTuple(tid));
+
+      if (expected_tuple->getAttributeValue(value_accessor_id).isNull()) {
+        ASSERT_TRUE(va_value == nullptr);
+      } else {
+        ASSERT_TRUE(eq_comp_int_->compareDataPtrs(expected_tuple->getAttributeValue(value_accessor_id).getDataPtr(),
+                                                  va_value));
       }
-    });
+      ++tid;
+    }
   }
 
   std::unique_ptr<CatalogRelation> relation_;


[05/12] incubator-quickstep git commit: Optimize PackedRowStoreValueAccessor & BasicColumnStoreValueAccessor by removing redundant computations and clearly exposing a strided memory access pattern

Posted by ji...@apache.org.
Optimize PackedRowStoreValueAccessor & BasicColumnStoreValueAccessor by removing redundant computations and clearly exposing a strided memory access pattern


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/743f6b0a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/743f6b0a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/743f6b0a

Branch: refs/heads/lip-refactor
Commit: 743f6b0acc769a0f2eedb0daf8ed6f535dc1c4b0
Parents: 80af233
Author: Saket Saurabh <ss...@cs.wisc.edu>
Authored: Wed Sep 21 03:17:19 2016 -0500
Committer: Saket Saurabh <ss...@cs.wisc.edu>
Committed: Tue Oct 11 11:36:57 2016 -0500

----------------------------------------------------------------------
 .gitignore                                      |   1 +
 storage/BasicColumnStoreValueAccessor.hpp       |  55 +++++++
 storage/CompressedColumnStoreValueAccessor.hpp  |  33 +++++
 .../CompressedPackedRowStoreValueAccessor.hpp   |  33 +++++
 storage/PackedRowStoreValueAccessor.hpp         |  54 +++++++
 storage/SplitRowStoreValueAccessor.hpp          |  18 +++
 storage/ValueAccessor.hpp                       | 143 +++++++++++++++++++
 types/containers/ColumnVectorsValueAccessor.hpp |  18 +++
 .../comparisons/AsciiStringComparators-inl.hpp  | 101 ++++++++++---
 types/operations/comparisons/Comparison-inl.hpp |  27 +++-
 .../comparisons/LiteralComparators-inl.hpp      | 129 +++++++++++++----
 .../PatternMatchingComparators-inl.hpp          |  24 +++-
 12 files changed, 574 insertions(+), 62 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/743f6b0a/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 37a361f..5dea02f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,4 +7,5 @@
 Makefile.in
 autom4te.cache
 .DS_Store
+.idea
 *~

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/743f6b0a/storage/BasicColumnStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/BasicColumnStoreValueAccessor.hpp b/storage/BasicColumnStoreValueAccessor.hpp
index 0560d99..22d3c0b 100644
--- a/storage/BasicColumnStoreValueAccessor.hpp
+++ b/storage/BasicColumnStoreValueAccessor.hpp
@@ -52,6 +52,61 @@ class BasicColumnStoreValueAccessorHelper {
     return num_tuples_;
   }
 
+  /**
+   * @brief Returns whether this accessor has a fast strided ColumnAccessor available
+   *        that can be used to optimize memory access in a tight loop iteration
+   *        over the underlying storage block.
+   *
+   * @return true if fast ColumnAccessor is supported, otherwise false.
+   */
+  inline bool isColumnAccessorSupported() const {
+    return true;
+  }
+
+  /**
+   * @brief Get a pointer to a ColumnAccessor object that provides a fast strided memory
+   *        access on the underlying storage block.
+   * @note The ownership of the returned object lies with the caller.
+   * @warning This method should only be called if isColumnAccessorSupported() method
+   *          returned true. If ColumnAccessor is not supported this method will return a nullptr.
+   *
+   * @param current_tuple_position A constant reference to the tuple position in the containing
+   *        ValueAccessor. This reference value is shared between the containing ValueAccessor &
+   *        a ColumnAccessor. However, a ColumnAccessor *CANNOT* modify this tuple position.
+   * @param attr_id The attribute id on which this ColumnAccessor will be created.
+   *
+   * @return A pointer to a ColumnAccessor object with specific properties set that can be used
+   *         in a tight loop iterations over the underlying storage block.
+   **/
+  template <bool check_null = true>
+  inline const ColumnAccessor<check_null>* getColumnAccessor(const tuple_id &current_tuple_position,
+                                                             const attribute_id attr_id) const {
+    DEBUG_ASSERT(relation_.hasAttributeWithId(attr_id));
+    const void* base_location = static_cast<const char*>(column_stripes_[attr_id]);
+    const std::size_t stride = relation_.getAttributeById(attr_id)->getType().maximumByteLength();
+    std::unique_ptr<ColumnAccessor<check_null>> column_accessor;
+    if (check_null) {
+      // The nullable_base might get initialized to -1 if column_null_bitmaps returns false for
+      // the given attribute. Setting the nullable_base to -1 will mean that
+      // column accessor will always evaluate null check to false.
+      const int nullable_base = (!column_null_bitmaps_.elementIsNull(attr_id)) ? 0 : -1;
+      const unsigned nullable_stride = 1;
+      column_accessor.reset(new ColumnAccessor<check_null>(current_tuple_position,
+                                                           num_tuples_,
+                                                           base_location,
+                                                           stride,
+                                                           &(column_null_bitmaps_[attr_id]),
+                                                           nullable_base,
+                                                           nullable_stride));
+    } else {
+      column_accessor.reset(new ColumnAccessor<check_null>(current_tuple_position,
+                                                           num_tuples_,
+                                                           base_location,
+                                                           stride));
+    }
+    return column_accessor.release();
+  }
+
   template <bool check_null>
   inline const void* getAttributeValue(const tuple_id tuple,
                                        const attribute_id attr) const {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/743f6b0a/storage/CompressedColumnStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/CompressedColumnStoreValueAccessor.hpp b/storage/CompressedColumnStoreValueAccessor.hpp
index 25e5eed..366d4c6 100644
--- a/storage/CompressedColumnStoreValueAccessor.hpp
+++ b/storage/CompressedColumnStoreValueAccessor.hpp
@@ -67,6 +67,39 @@ class CompressedColumnStoreValueAccessorHelper {
     return num_tuples_;
   }
 
+  /**
+   * @brief Returns whether this accessor has a fast strided ColumnAccessor available
+   *        that can be used to optimize memory access in a tight loop iteration
+   *        over the underlying storage block.
+   *
+   * @return true if fast ColumnAccessor is supported, otherwise false.
+   */
+  inline bool isColumnAccessorSupported() const {
+    return false;
+  }
+
+  /**
+   * @brief Get a pointer to a ColumnAccessor object that provides a fast strided memory
+   *        access on the underlying storage block.
+   * @note The ownership of the returned object lies with the caller.
+   * @warning This method should only be called if isColumnAccessorSupported() method
+   *          returned true. If ColumnAccessor is not supported this method will return a nullptr.
+   *
+   * @param current_tuple_position A constant reference to the tuple position in the containing
+   *        ValueAccessor. This reference value is shared between the containing ValueAccessor &
+   *        a ColumnAccessor. However, a ColumnAccessor *CANNOT* modify this tuple position.
+   * @param attr_id The attribute id on which this ColumnAccessor will be created.
+   *
+   * @return A pointer to a ColumnAccessor object with specific properties set that can be used
+   *         in a tight loop iterations over the underlying storage block.
+   **/
+  template <bool check_null = true>
+  inline const ColumnAccessor<check_null>* getColumnAccessor(const tuple_id &current_tuple_position,
+                                                             const attribute_id attr_id) const {
+    // Return nullptr because this value accessor does not support column accessor yet.
+    return nullptr;
+  }
+
   template <bool check_null>
   inline const void* getAttributeValue(const tuple_id tuple,
                                        const attribute_id attr) const {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/743f6b0a/storage/CompressedPackedRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/CompressedPackedRowStoreValueAccessor.hpp b/storage/CompressedPackedRowStoreValueAccessor.hpp
index 8858175..aeff2e1 100644
--- a/storage/CompressedPackedRowStoreValueAccessor.hpp
+++ b/storage/CompressedPackedRowStoreValueAccessor.hpp
@@ -75,6 +75,39 @@ class CompressedPackedRowStoreValueAccessorHelper {
     return num_tuples_;
   }
 
+  /**
+   * @brief Returns whether this accessor has a fast strided ColumnAccessor available
+   *        that can be used to optimize memory access in a tight loop iteration
+   *        over the underlying storage block.
+   *
+   * @return true if fast ColumnAccessor is supported, otherwise false.
+   */
+  inline bool isColumnAccessorSupported() const {
+    return false;
+  }
+
+  /**
+   * @brief Get a pointer to a ColumnAccessor object that provides a fast strided memory
+   *        access on the underlying storage block.
+   * @note The ownership of the returned object lies with the caller.
+   * @warning This method should only be called if isColumnAccessorSupported() method
+   *          returned true. If ColumnAccessor is not supported this method will return a nullptr.
+   *
+   * @param current_tuple_position A constant reference to the tuple position in the containing
+   *        ValueAccessor. This reference value is shared between the containing ValueAccessor &
+   *        a ColumnAccessor. However, a ColumnAccessor *CANNOT* modify this tuple position.
+   * @param attr_id The attribute id on which this ColumnAccessor will be created.
+   *
+   * @return A pointer to a ColumnAccessor object with specific properties set that can be used
+   *         in a tight loop iterations over the underlying storage block.
+   **/
+  template <bool check_null = true>
+  inline const ColumnAccessor<check_null>* getColumnAccessor(const tuple_id &current_tuple_position,
+                                                             const attribute_id attr_id) const {
+    // Return nullptr because this value accessor does not support column accessor yet.
+    return nullptr;
+  }
+
   template <bool check_null>
   inline const void* getAttributeValue(const tuple_id tuple,
                                        const attribute_id attr) const {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/743f6b0a/storage/PackedRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/PackedRowStoreValueAccessor.hpp b/storage/PackedRowStoreValueAccessor.hpp
index 80edecd..7eb2d41 100644
--- a/storage/PackedRowStoreValueAccessor.hpp
+++ b/storage/PackedRowStoreValueAccessor.hpp
@@ -49,6 +49,60 @@ class PackedRowStoreValueAccessorHelper {
     return num_tuples_;
   }
 
+  /**
+   * @brief Returns whether this accessor has a fast strided ColumnAccessor available
+   *        that can be used to optimize memory access in a tight loop iteration
+   *        over the underlying storage block.
+   *
+   * @return true if fast ColumnAccessor is supported, otherwise false.
+   */
+  inline bool isColumnAccessorSupported() const {
+    return true;
+  }
+
+  /**
+   * @brief Get a pointer to a ColumnAccessor object that provides a fast strided memory
+   *        access on the underlying storage block.
+   * @note The ownership of the returned object lies with the caller.
+   * @warning This method should only be called if isColumnAccessorSupported() method
+   *          returned true. If ColumnAccessor is not supported this method will return a nullptr.
+   *
+   * @param current_tuple_position A constant reference to the tuple position in the containing
+   *        ValueAccessor. This reference value is shared between the containing ValueAccessor &
+   *        a ColumnAccessor. However, a ColumnAccessor *CANNOT* modify this tuple position.
+   * @param attr_id The attribute id on which this ColumnAccessor will be created.
+   *
+   * @return A pointer to a ColumnAccessor object with specific properties set that can be used
+   *         in a tight loop iterations over the underlying storage block.
+   **/
+  template <bool check_null = true>
+  inline const ColumnAccessor<check_null>* getColumnAccessor(const tuple_id &current_tuple_position,
+                                                             const attribute_id attr_id) const {
+    DEBUG_ASSERT(relation_.hasAttributeWithId(attr_id));
+    const void* base_location = static_cast<const char*>(tuple_storage_)
+        + relation_.getFixedLengthAttributeOffset(attr_id);
+    const std::size_t stride = relation_.getFixedByteLength();
+
+    std::unique_ptr<ColumnAccessor<check_null>> column_accessor;
+    if (check_null) {
+      const int nullable_base = relation_.getNullableAttributeIndex(attr_id);
+      const unsigned nullable_stride = relation_.numNullableAttributes();
+      column_accessor.reset(new ColumnAccessor<check_null>(current_tuple_position,
+                                                           num_tuples_,
+                                                           base_location,
+                                                           stride,
+                                                           null_bitmap_,
+                                                           nullable_base,
+                                                           nullable_stride));
+    } else {
+      column_accessor.reset(new ColumnAccessor<check_null>(current_tuple_position,
+                                                           num_tuples_,
+                                                           base_location,
+                                                           stride));
+    }
+    return column_accessor.release();
+  }
+
   template <bool check_null>
   inline const void* getAttributeValue(const tuple_id tuple,
                                        const attribute_id attr) const {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/743f6b0a/storage/SplitRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/SplitRowStoreValueAccessor.hpp b/storage/SplitRowStoreValueAccessor.hpp
index 61bb7bf..951a20a 100644
--- a/storage/SplitRowStoreValueAccessor.hpp
+++ b/storage/SplitRowStoreValueAccessor.hpp
@@ -97,6 +97,24 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
     return num_tuples_;
   }
 
+  /**
+   * @brief Get a pointer to a ColumnAccessor object that provides a fast strided memory
+   *        access on the underlying storage block.
+   * @note The ownership of the returned object lies with the caller.
+   * @warning This method should only be called if isColumnAccessorSupported() method
+   *          returned true. If ColumnAccessor is not supported this method will return a nullptr.
+   *
+   * @param attr_id The attribute id on which this ColumnAccessor will be created.
+   *
+   * @return A pointer to a ColumnAccessor object with specific properties set that can be used
+   *         in a tight loop iterations over the underlying storage block.
+   **/
+  template <bool check_null = true>
+  inline const ColumnAccessor<check_null>* getColumnAccessor(const attribute_id attr_id) const {
+    // Column Accessors are currently unsupported for this value accessor, hence nullptr.
+    return nullptr;
+  }
+
   template <bool check_null = true>
   inline const void* getUntypedValue(const attribute_id attr_id) const {
     return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_position_);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/743f6b0a/storage/ValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/ValueAccessor.hpp b/storage/ValueAccessor.hpp
index 70d4405..3b58a7c 100644
--- a/storage/ValueAccessor.hpp
+++ b/storage/ValueAccessor.hpp
@@ -41,6 +41,9 @@ namespace quickstep {
 
 class TupleStorageSubBlock;
 
+template <bool check_null = true>
+class ColumnAccessor;
+
 // TODO(chasseur): Iteration on ValueAccessors is row-at-a-time, but in some
 // cases column-wise data movement may be more efficient.
 
@@ -182,6 +185,18 @@ class ValueAccessor {
   virtual tuple_id getNumTuplesVirtual() const = 0;
 
   /**
+   * @brief Returns whether this accessor has a fast strided ColumnAccessor available
+   *        that can be used to optimize memory access in a tight loop iteration
+   *        over the underlying storage block. Specific derived classes should override
+   *        this method if they support ColumnAccessor.
+   *
+   * @return true if fast ColumnAccessor is supported, otherwise false.
+   */
+  virtual inline bool isColumnAccessorSupported() const {
+    return false;
+  }
+
+  /**
    * @brief Get a pointer to an untyped value for the current tuple in stateful
    *        iteration.
    * @note The inline version of this method provided by subclasses,
@@ -372,6 +387,24 @@ class TupleIdSequenceAdapterValueAccessor : public ValueAccessor {
     return id_sequence_.numTuples();
   }
 
+  /**
+   * @brief Get a pointer to a ColumnAccessor object that provides a fast strided memory
+   *        access on the underlying storage block.
+   * @note The ownership of the returned object lies with the caller.
+   * @warning This method should only be called if isColumnAccessorSupported() method
+   *          returned true. If ColumnAccessor is not supported this method will return a nullptr.
+   *
+   * @param attr_id The attribute id on which this ColumnAccessor will be created.
+   *
+   * @return A pointer to a ColumnAccessor object with specific properties set that can be used
+   *         in a tight loop iterations over the underlying storage block.
+   **/
+  template <bool check_null = true>
+  inline const ColumnAccessor<check_null>* getColumnAccessor(const attribute_id attr_id) const {
+    // Column Accessors are currently unsupported for this accessor, hence nullptr.
+    return nullptr;
+  }
+
   template <bool check_null = true>
   inline const void* getUntypedValue(const attribute_id attr_id) const {
     return accessor_->template getUntypedValueAtAbsolutePosition<check_null>(attr_id, *current_position_);
@@ -556,6 +589,24 @@ class OrderedTupleIdSequenceAdapterValueAccessor : public ValueAccessor {
     return id_sequence_.size();
   }
 
+  /**
+   * @brief Get a pointer to a ColumnAccessor object that provides a fast strided memory
+   *        access on the underlying storage block.
+   * @note The ownership of the returned object lies with the caller.
+   * @warning This method should only be called if isColumnAccessorSupported() method
+   *          returned true. If ColumnAccessor is not supported this method will return a nullptr.
+   *
+   * @param attr_id The attribute id on which this ColumnAccessor will be created.
+   *
+   * @return A pointer to a ColumnAccessor object with specific properties set that can be used
+   *         in a tight loop iterations over the underlying storage block.
+   **/
+  template <bool check_null = true>
+  inline const ColumnAccessor<check_null>* getColumnAccessor(const attribute_id attr_id) const {
+    // Column Accessors are currently unsupported for this accessor, hence nullptr.
+    return nullptr;
+  }
+
   template <bool check_null = true>
   inline const void* getUntypedValue(const attribute_id attr_id) const {
     return accessor_->template getUntypedValueAtAbsolutePosition<check_null>(attr_id,
@@ -743,6 +794,27 @@ class PackedTupleStorageSubBlockValueAccessor : public ValueAccessor {
     return getTypedValueAtAbsolutePosition(attr_id, current_tuple_);
   }
 
+  inline bool isColumnAccessorSupported() const override {
+    return helper_.isColumnAccessorSupported();
+  }
+
+  /**
+   * @brief Get a pointer to a ColumnAccessor object that provides a fast strided memory
+   *        access on the underlying storage block.
+   * @note The ownership of the returned object lies with the caller.
+   * @warning This method should only be called if isColumnAccessorSupported() method
+   *          returned true. If ColumnAccessor is not supported this method will return a nullptr.
+   *
+   * @param attr_id The attribute id on which this ColumnAccessor will be created.
+   *
+   * @return A pointer to a ColumnAccessor object with specific properties set that can be used
+   *         in a tight loop iterations over the underlying storage block.
+   **/
+  template <bool check_null = true>
+  inline const ColumnAccessor<check_null>* getColumnAccessor(const attribute_id attr_id) const {
+    return helper_.template getColumnAccessor<check_null>(current_tuple_, attr_id);
+  }
+
   template <bool check_null = true>
   inline const void* getUntypedValueAtAbsolutePosition(const attribute_id attr_id,
                                                        const tuple_id tid) const {
@@ -896,6 +968,77 @@ class PackedTupleStorageSubBlockValueAccessor : public ValueAccessor {
   DISALLOW_COPY_AND_ASSIGN(PackedTupleStorageSubBlockValueAccessor);
 };
 
+
+/**
+ * @brief ColumnAccessor is a helper template class that is used to optimize memory
+ *        access patterns for a ValueAccessor when it is used in a tight loop
+ *        to extract values for a given attribute from a given storage block.
+ **/
+template <bool check_null>
+class ColumnAccessor {
+ public:
+  /**
+   * @brief Constructor.
+   *
+   * @param current_tuple_position A constant reference to the tuple position in the containing
+   *        ValueAccessor. This reference value is shared between the containing ValueAccessor &
+   *        a ColumnAccessor. However, a ColumnAccessor *CANNOT* modify this tuple position.
+   * @param num_tuples Number of tuples for this block.
+   * @param base_address The starting address in memory for the first column value.
+   * @param stride The memory offset at which other column values will be found.
+   * @param null_bitmap The bitmap that will be referred in case of nullable attributes.
+   * @param nullable_base The starting index for the first nullable attribute in the bitmap.
+   *        Note that setting this value to -1 will essentially cause null checks to always
+   *        return false.
+   * @param nullable_stride The offset at which null bits will be found for
+   *        different attribute values.
+   **/
+  ColumnAccessor(const tuple_id &current_tuple_position,
+                 const std::size_t num_tuples,
+                 const void *base_address,
+                 const std::size_t stride,
+                 const BitVector<false> *null_bitmap = nullptr,
+                 const int nullable_base = -1,
+                 const unsigned nullable_stride = 0)
+      : current_tuple_position_(current_tuple_position),
+        num_tuples_(num_tuples),
+        base_address_(base_address),
+        stride_(stride),
+        null_bitmap_(null_bitmap),
+        nullable_base_(nullable_base),
+        nullable_stride_(nullable_stride) {
+  }
+
+  /**
+   * @brief Get a pointer to an untyped value for the current tuple in stateful
+   *        iteration over the given column.
+   *
+   * @return An untyped pointer to the attribute value for the current tuple.
+   **/
+  inline const void* getUntypedValue() const {
+    DEBUG_ASSERT(current_tuple_position_ < num_tuples_);
+    if (check_null) {
+      DEBUG_ASSERT(null_bitmap_ != nullptr);
+      if ((nullable_base_ != -1)
+          && null_bitmap_->getBit(current_tuple_position_ * nullable_stride_ + nullable_base_)) {
+        return nullptr;
+      }
+    }
+    return static_cast<const char*>(base_address_) + current_tuple_position_ * stride_;
+  }
+
+ private:
+  const tuple_id &current_tuple_position_;
+  const tuple_id num_tuples_;
+  const void *base_address_;
+  const std::size_t stride_;
+  const BitVector<false> *null_bitmap_;
+  const int nullable_base_;
+  const unsigned nullable_stride_;
+
+  DISALLOW_COPY_AND_ASSIGN(ColumnAccessor);
+};
+
 /** @} */
 
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/743f6b0a/types/containers/ColumnVectorsValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVectorsValueAccessor.hpp b/types/containers/ColumnVectorsValueAccessor.hpp
index 2300f3b..fe413a0 100644
--- a/types/containers/ColumnVectorsValueAccessor.hpp
+++ b/types/containers/ColumnVectorsValueAccessor.hpp
@@ -121,6 +121,24 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
     return column_length_;
   }
 
+  /**
+   * @brief Get a pointer to a ColumnAccessor object that provides a fast strided memory
+   *        access on the underlying storage block.
+   * @note The ownership of the returned object lies with the caller.
+   * @warning This method should only be called if isColumnAccessorSupported() method
+   *          returned true. If ColumnAccessor is not supported this method will return a nullptr.
+   *
+   * @param attr_id The attribute id on which this ColumnAccessor will be created.
+   *
+   * @return A pointer to a ColumnAccessor object with specific properties set that can be used
+   *         in a tight loop iterations over the underlying storage block.
+   **/
+  template <bool check_null = true>
+  inline const ColumnAccessor<check_null>* getColumnAccessor(const attribute_id attr_id) const {
+    // Column Accessors are currently unsupported for this value accessor, hence nullptr.
+    return nullptr;
+  }
+
   template <bool check_null = true>
   inline const void* getUntypedValue(const attribute_id attr_id) const {
     return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_position_);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/743f6b0a/types/operations/comparisons/AsciiStringComparators-inl.hpp
----------------------------------------------------------------------
diff --git a/types/operations/comparisons/AsciiStringComparators-inl.hpp b/types/operations/comparisons/AsciiStringComparators-inl.hpp
index cde03c3..b048c60 100644
--- a/types/operations/comparisons/AsciiStringComparators-inl.hpp
+++ b/types/operations/comparisons/AsciiStringComparators-inl.hpp
@@ -273,13 +273,32 @@ TupleIdSequence* AsciiStringUncheckedComparator<ComparisonFunctor,
       }
     } else {
       accessor->beginIteration();
-      while (accessor->next()) {
-        const void *left_value = accessor->template getUntypedValue<left_nullable>(left_id);
-        const void *right_value = accessor->template getUntypedValue<right_nullable>(right_id);
-        result->set(accessor->getCurrentPosition(),
-                    (!(left_nullable && (left_value == nullptr))
-                        || (right_nullable && (right_value == nullptr)))
-                            && this->compareDataPtrsHelper<true>(left_value, right_value));
+      if (accessor->isColumnAccessorSupported()) {
+        // If ColumnAccessor is supported on the underlying accessor, we have a fast strided
+        // column accessor available for the iteration on the underlying block.
+        std::unique_ptr<const ColumnAccessor<left_nullable>>
+            left_column_accessor(accessor->template getColumnAccessor<left_nullable>(left_id));
+        std::unique_ptr<const ColumnAccessor<right_nullable>>
+            right_column_accessor(accessor->template getColumnAccessor<right_nullable>(right_id));
+        DEBUG_ASSERT(left_column_accessor != nullptr);
+        DEBUG_ASSERT(right_column_accessor != nullptr);
+        while (accessor->next()) {
+          const void *left_value = left_column_accessor->getUntypedValue();
+          const void *right_value = right_column_accessor->getUntypedValue();
+          result->set(accessor->getCurrentPosition(),
+                      (!(left_nullable && (left_value == nullptr))
+                          || (right_nullable && (right_value == nullptr)))
+                          && this->compareDataPtrsHelper<true>(left_value, right_value));
+        }
+      } else {
+        while (accessor->next()) {
+          const void *left_value = accessor->template getUntypedValue<left_nullable>(left_id);
+          const void *right_value = accessor->template getUntypedValue<right_nullable>(right_id);
+          result->set(accessor->getCurrentPosition(),
+                      (!(left_nullable && (left_value == nullptr))
+                          || (right_nullable && (right_value == nullptr)))
+                          && this->compareDataPtrsHelper<true>(left_value, right_value));
+        }
       }
       if (!short_circuit && (filter != nullptr)) {
         result->intersectWith(*filter);
@@ -333,12 +352,28 @@ TupleIdSequence* AsciiStringUncheckedComparator<ComparisonFunctor,
       }
     } else {
       accessor->beginIteration();
-      while (accessor->next()) {
-        const void *va_value
-            = accessor->template getUntypedValue<va_nullable>(value_accessor_attr_id);
-        result->set(accessor->getCurrentPosition(),
-                    !(va_nullable && (va_value == nullptr))
-                        && this->compareDataPtrsHelper<value_accessor_on_left>(va_value, static_string));
+      if (accessor->isColumnAccessorSupported()) {
+        // If ColumnAccessor is supported on the underlying accessor, we have a fast strided
+        // column accessor available for the iteration on the underlying block.
+        std::unique_ptr<const ColumnAccessor<va_nullable>>
+            column_accessor(accessor->template getColumnAccessor<va_nullable>(value_accessor_attr_id));
+        DEBUG_ASSERT(column_accessor != nullptr);
+        while (accessor->next()) {
+          const void *va_value = column_accessor->getUntypedValue();
+          result->set(accessor->getCurrentPosition(),
+                      !(va_nullable && (va_value == nullptr))
+                          && this->compareDataPtrsHelper<value_accessor_on_left>(va_value,
+                                                                                 static_string));
+        }
+      } else {
+        while (accessor->next()) {
+          const void *va_value
+              = accessor->template getUntypedValue<va_nullable>(value_accessor_attr_id);
+          result->set(accessor->getCurrentPosition(),
+                      !(va_nullable && (va_value == nullptr))
+                          && this->compareDataPtrsHelper<value_accessor_on_left>(va_value,
+                                                                                 static_string));
+        }
       }
       if (!short_circuit && (filter != nullptr)) {
         result->intersectWith(*filter);
@@ -448,16 +483,36 @@ TupleIdSequence* AsciiStringUncheckedComparator<ComparisonFunctor,
         } else {
           accessor->beginIteration();
           std::size_t cv_pos = 0;
-          while (accessor->next()) {
-            const void *cv_value
-                = column_vector.template getUntypedValue<cv_nullable>(cv_pos);
-            const void *va_value
-                = accessor->template getUntypedValue<va_nullable>(value_accessor_attr_id);
-            result->set(cv_pos,
-                        (!((cv_nullable && (cv_value == nullptr))
-                            || (va_nullable && (va_value == nullptr))))
-                                && this->compareDataPtrsHelper<column_vector_on_left>(cv_value, va_value));
-            ++cv_pos;
+          if (accessor->isColumnAccessorSupported()) {
+            // If ColumnAccessor is supported on the underlying accessor, we have a fast strided
+            // column accessor available for the iteration on the underlying block.
+            std::unique_ptr<const ColumnAccessor<va_nullable>>
+                column_accessor(accessor->template getColumnAccessor<va_nullable>(value_accessor_attr_id));
+            DEBUG_ASSERT(column_accessor != nullptr);
+            while (accessor->next()) {
+              const void *cv_value
+                  = column_vector.template getUntypedValue<cv_nullable>(cv_pos);
+              const void *va_value = column_accessor->getUntypedValue();
+              result->set(cv_pos,
+                          (!((cv_nullable && (cv_value == nullptr))
+                              || (va_nullable && (va_value == nullptr))))
+                              && this->compareDataPtrsHelper<column_vector_on_left>(cv_value,
+                                                                                    va_value));
+              ++cv_pos;
+            }
+          } else {
+            while (accessor->next()) {
+              const void *cv_value
+                  = column_vector.template getUntypedValue<cv_nullable>(cv_pos);
+              const void *va_value
+                  = accessor->template getUntypedValue<va_nullable>(value_accessor_attr_id);
+              result->set(cv_pos,
+                          (!((cv_nullable && (cv_value == nullptr))
+                              || (va_nullable && (va_value == nullptr))))
+                              && this->compareDataPtrsHelper<column_vector_on_left>(cv_value,
+                                                                                    va_value));
+              ++cv_pos;
+            }
           }
         }
         if (!short_circuit && (filter != nullptr)) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/743f6b0a/types/operations/comparisons/Comparison-inl.hpp
----------------------------------------------------------------------
diff --git a/types/operations/comparisons/Comparison-inl.hpp b/types/operations/comparisons/Comparison-inl.hpp
index 25f5f15..96771bf 100644
--- a/types/operations/comparisons/Comparison-inl.hpp
+++ b/types/operations/comparisons/Comparison-inl.hpp
@@ -309,11 +309,28 @@ TupleIdSequence* UncheckedComparator::compareSingleValueAccessorDefaultImpl(
       }
     } else {
       accessor->beginIteration();
-      while (accessor->next()) {
-        result->set(accessor->getCurrentPosition(),
-                    this->compareDataPtrs(
-                        accessor->template getUntypedValue<left_nullable>(left_id),
-                        accessor->template getUntypedValue<right_nullable>(right_id)));
+      if (accessor->isColumnAccessorSupported()) {
+        // If ColumnAccessor is supported on the underlying accessor, we have a fast strided
+        // column accessor available for the iteration on the underlying block.
+        std::unique_ptr<const ColumnAccessor<left_nullable>>
+            left_column_accessor(accessor->template getColumnAccessor<left_nullable>(left_id));
+        std::unique_ptr<const ColumnAccessor<right_nullable>>
+            right_column_accessor(accessor->template getColumnAccessor<right_nullable>(right_id));
+        DEBUG_ASSERT(left_column_accessor != nullptr);
+        DEBUG_ASSERT(right_column_accessor != nullptr);
+        while (accessor->next()) {
+          result->set(accessor->getCurrentPosition(),
+                      this->compareDataPtrs(
+                          left_column_accessor->getUntypedValue(),
+                          right_column_accessor->getUntypedValue()));
+        }
+      } else {
+        while (accessor->next()) {
+          result->set(accessor->getCurrentPosition(),
+                      this->compareDataPtrs(
+                          accessor->template getUntypedValue<left_nullable>(left_id),
+                          accessor->template getUntypedValue<right_nullable>(right_id)));
+        }
       }
       if (!short_circuit && (filter != nullptr)) {
         result->intersectWith(*filter);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/743f6b0a/types/operations/comparisons/LiteralComparators-inl.hpp
----------------------------------------------------------------------
diff --git a/types/operations/comparisons/LiteralComparators-inl.hpp b/types/operations/comparisons/LiteralComparators-inl.hpp
index 14844ac..31eec13 100644
--- a/types/operations/comparisons/LiteralComparators-inl.hpp
+++ b/types/operations/comparisons/LiteralComparators-inl.hpp
@@ -273,13 +273,32 @@ TupleIdSequence* LiteralUncheckedComparator<ComparisonFunctor,
       }
     } else {
       accessor->beginIteration();
-      while (accessor->next()) {
-        const void *left_value = accessor->template getUntypedValue<left_nullable>(left_id);
-        const void *right_value = accessor->template getUntypedValue<right_nullable>(right_id);
-        result->set(accessor->getCurrentPosition(),
-                    (!((left_nullable && (left_value == nullptr))
-                        || (right_nullable && (right_value == nullptr))))
-                            && this->compareDataPtrsHelper<true>(left_value, right_value));
+      if (accessor->isColumnAccessorSupported()) {
+        // If ColumnAccessor is supported on the underlying accessor, we have a fast strided
+        // column accessor available for the iteration on the underlying block.
+        std::unique_ptr<const ColumnAccessor<left_nullable>>
+            left_column_accessor(accessor->template getColumnAccessor<left_nullable>(left_id));
+        std::unique_ptr<const ColumnAccessor<right_nullable>>
+            right_column_accessor(accessor->template getColumnAccessor<right_nullable>(right_id));
+        DEBUG_ASSERT(left_column_accessor != nullptr);
+        DEBUG_ASSERT(right_column_accessor != nullptr);
+        while (accessor->next()) {
+          const void *left_value = left_column_accessor->getUntypedValue();
+          const void *right_value = right_column_accessor->getUntypedValue();
+          result->set(accessor->getCurrentPosition(),
+                      (!((left_nullable && (left_value == nullptr))
+                          || (right_nullable && (right_value == nullptr))))
+                          && this->compareDataPtrsHelper<true>(left_value, right_value));
+        }
+      } else {
+        while (accessor->next()) {
+          const void *left_value = accessor->template getUntypedValue<left_nullable>(left_id);
+          const void *right_value = accessor->template getUntypedValue<right_nullable>(right_id);
+          result->set(accessor->getCurrentPosition(),
+                      (!((left_nullable && (left_value == nullptr))
+                          || (right_nullable && (right_value == nullptr))))
+                          && this->compareDataPtrsHelper<true>(left_value, right_value));
+        }
       }
       if (!short_circuit && (filter != nullptr)) {
         result->intersectWith(*filter);
@@ -339,12 +358,27 @@ TupleIdSequence* LiteralUncheckedComparator<ComparisonFunctor,
       }
     } else {
       accessor->beginIteration();
-      while (accessor->next()) {
-        const void *va_value
-            = accessor->template getUntypedValue<va_nullable>(value_accessor_attr_id);
-        result->set(accessor->getCurrentPosition(),
-                    !(va_nullable && (va_value == nullptr))
-                        && this->compareDataPtrsHelper<value_accessor_on_left>(va_value, &literal));
+      if (accessor->isColumnAccessorSupported()) {
+        // If ColumnAccessor is supported on the underlying accessor, we have a fast strided
+        // column accessor available for the iteration on the underlying block.
+        std::unique_ptr<const ColumnAccessor<va_nullable>>
+            column_accessor(accessor->template getColumnAccessor<va_nullable>(value_accessor_attr_id));
+        DEBUG_ASSERT(column_accessor != nullptr);
+        while (accessor->next()) {
+          const void *va_value = column_accessor->getUntypedValue();
+          result->set(accessor->getCurrentPosition(),
+                      !(va_nullable && (va_value == nullptr))
+                          && this->compareDataPtrsHelper<value_accessor_on_left>(va_value, &literal));
+        }
+      } else {
+        while (accessor->next()) {
+          const void *va_value
+              = accessor->template getUntypedValue<va_nullable>(value_accessor_attr_id);
+          result->set(accessor->getCurrentPosition(),
+                      !(va_nullable && (va_value == nullptr))
+                          && this->compareDataPtrsHelper<value_accessor_on_left>(va_value,
+                                                                                 &literal));
+        }
       }
       if (!short_circuit && (filter != nullptr)) {
         result->intersectWith(*filter);
@@ -458,16 +492,36 @@ TupleIdSequence* LiteralUncheckedComparator<ComparisonFunctor,
       } else {
         accessor->beginIteration();
         std::size_t cv_pos = 0;
-        while (accessor->next()) {
-          const void *cv_value
-              = native_column_vector.getUntypedValue<cv_nullable>(cv_pos);
-          const void *va_value
-              = accessor->template getUntypedValue<va_nullable>(value_accessor_attr_id);
-          result->set(cv_pos,
-                      (!((cv_nullable && (cv_value == nullptr))
-                          || (va_nullable && (va_value == nullptr))))
-                              && this->compareDataPtrsHelper<column_vector_on_left>(cv_value, va_value));
-          ++cv_pos;
+        if (accessor->isColumnAccessorSupported()) {
+          // If ColumnAccessor is supported on the underlying accessor, we have a fast strided
+          // column accessor available for the iteration on the underlying block.
+          std::unique_ptr<const ColumnAccessor<va_nullable>>
+              column_accessor(accessor->template getColumnAccessor<va_nullable>(value_accessor_attr_id));
+          DEBUG_ASSERT(column_accessor != nullptr);
+          while (accessor->next()) {
+            const void *cv_value
+                = native_column_vector.getUntypedValue<cv_nullable>(cv_pos);
+            const void *va_value = column_accessor->getUntypedValue();
+            result->set(cv_pos,
+                        (!((cv_nullable && (cv_value == nullptr))
+                            || (va_nullable && (va_value == nullptr))))
+                            && this->compareDataPtrsHelper<column_vector_on_left>(cv_value,
+                                                                                  va_value));
+            ++cv_pos;
+          }
+        } else {
+          while (accessor->next()) {
+            const void *cv_value
+                = native_column_vector.getUntypedValue<cv_nullable>(cv_pos);
+            const void *va_value
+                = accessor->template getUntypedValue<va_nullable>(value_accessor_attr_id);
+            result->set(cv_pos,
+                        (!((cv_nullable && (cv_value == nullptr))
+                            || (va_nullable && (va_value == nullptr))))
+                            && this->compareDataPtrsHelper<column_vector_on_left>(cv_value,
+                                                                                  va_value));
+            ++cv_pos;
+          }
         }
       }
       if (!short_circuit && (filter != nullptr)) {
@@ -495,13 +549,30 @@ TypedValue LiteralUncheckedComparator<ComparisonFunctor,
       accessor,
       [&](auto *accessor) -> void {  // NOLINT(build/c++11)
     accessor->beginIteration();
-    while (accessor->next()) {
-      const void *va_value = accessor->template getUntypedValue<left_nullable>(value_accessor_id);
-      if (left_nullable && !va_value) {
-        continue;
+    if (accessor->isColumnAccessorSupported()) {
+      // If ColumnAccessor is supported on the underlying accessor, we have a fast strided
+      // column accessor available for the iteration on the underlying block.
+      std::unique_ptr<const ColumnAccessor<left_nullable>>
+          column_accessor(accessor->template getColumnAccessor<left_nullable>(value_accessor_id));
+      DEBUG_ASSERT(column_accessor != nullptr);
+      while (accessor->next()) {
+        const void *va_value = column_accessor->getUntypedValue();
+        if (left_nullable && !va_value) {
+          continue;
+        }
+        if (!current_literal || this->compareDataPtrsHelper<true>(va_value, current_literal)) {
+          current_literal = va_value;
+        }
       }
-      if (!current_literal || this->compareDataPtrsHelper<true>(va_value, current_literal)) {
-        current_literal = va_value;
+    } else {
+      while (accessor->next()) {
+        const void *va_value = accessor->template getUntypedValue<left_nullable>(value_accessor_id);
+        if (left_nullable && !va_value) {
+          continue;
+        }
+        if (!current_literal || this->compareDataPtrsHelper<true>(va_value, current_literal)) {
+          current_literal = va_value;
+        }
       }
     }
   });

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/743f6b0a/types/operations/comparisons/PatternMatchingComparators-inl.hpp
----------------------------------------------------------------------
diff --git a/types/operations/comparisons/PatternMatchingComparators-inl.hpp b/types/operations/comparisons/PatternMatchingComparators-inl.hpp
index 617eadf..ca0f45e 100644
--- a/types/operations/comparisons/PatternMatchingComparators-inl.hpp
+++ b/types/operations/comparisons/PatternMatchingComparators-inl.hpp
@@ -241,11 +241,25 @@ TupleIdSequence* PatternMatchingUncheckedComparator<is_like_pattern, is_negation
       }
     } else {
       accessor->beginIteration();
-      while (accessor->next()) {
-        const void *va_value
-            = accessor->template getUntypedValue<left_nullable>(value_accessor_attr_id);
-        result->set(accessor->getCurrentPosition(),
-                    this->matchDataPtrWithPattern(va_value, re2_pattern));
+      if (accessor->isColumnAccessorSupported()) {
+        // If ColumnAccessor is supported on the underlying accessor, we have a fast strided
+        // column accessor available for the iteration on the underlying block.
+        std::unique_ptr<const ColumnAccessor<left_nullable>>
+            column_accessor
+            (accessor->template getColumnAccessor<left_nullable>(value_accessor_attr_id));
+        DEBUG_ASSERT(column_accessor != nullptr);
+        while (accessor->next()) {
+          const void *va_value = column_accessor->getUntypedValue();
+          result->set(accessor->getCurrentPosition(),
+                      this->matchDataPtrWithPattern(va_value, re2_pattern));
+        }
+      } else {
+        while (accessor->next()) {
+          const void *va_value
+              = accessor->template getUntypedValue<left_nullable>(value_accessor_attr_id);
+          result->set(accessor->getCurrentPosition(),
+                      this->matchDataPtrWithPattern(va_value, re2_pattern));
+        }
       }
       if (!short_circuit && (filter != nullptr)) {
         result->intersectWith(*filter);



[08/12] incubator-quickstep git commit: Update travis to run only 1 thread while building release version

Posted by ji...@apache.org.
Update travis to run only 1 thread while building release version


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/160276c7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/160276c7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/160276c7

Branch: refs/heads/lip-refactor
Commit: 160276c7d790883ad667105cf1ec3e43a487855c
Parents: 17ffbb0
Author: Saket Saurabh <ss...@cs.wisc.edu>
Authored: Mon Oct 17 23:18:30 2016 -0500
Committer: Saket Saurabh <ss...@cs.wisc.edu>
Committed: Tue Oct 18 09:52:17 2016 -0500

----------------------------------------------------------------------
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/160276c7/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 6895c0d..784a46f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,7 +21,7 @@ env:
   - BUILD_TYPE=Release VECTOR_COPY_ELISION_LEVEL=selection
 
 install:
-  - if [ "$CC" = "gcc" ] || [[ "$BUILD_TYPE" = "Release" &&  "$VECTOR_COPY_ELISION_LEVEL" = "selection" ]]; then
+  - if [ "$CC" = "gcc" ] || [[ "$BUILD_TYPE" = "Release" ]]; then
       export MAKE_JOBS=1;
     else
       export MAKE_JOBS=2;


[02/12] incubator-quickstep git commit: Minor bug fix in AggregationOperationState

Posted by ji...@apache.org.
Minor bug fix in AggregationOperationState

- Replace getHashTable() call with getHashTableFast() call.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/80af2332
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/80af2332
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/80af2332

Branch: refs/heads/lip-refactor
Commit: 80af23327f1f19d2f55457f949917a3cb2e467a5
Parents: b5dcb6d
Author: Harshad Deshmukh <hb...@apache.org>
Authored: Mon Oct 10 14:13:09 2016 -0500
Committer: Harshad Deshmukh <hb...@apache.org>
Committed: Mon Oct 10 14:13:09 2016 -0500

----------------------------------------------------------------------
 storage/AggregationOperationState.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/80af2332/storage/AggregationOperationState.cpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.cpp b/storage/AggregationOperationState.cpp
index 073b813..7908db1 100644
--- a/storage/AggregationOperationState.cpp
+++ b/storage/AggregationOperationState.cpp
@@ -537,7 +537,7 @@ void AggregationOperationState::finalizeHashTable(
       // However for aggregateOnDistinctifyHashTableForGroupBy to work
       // correctly, we should create an empty group by hash table.
       AggregationStateHashTableBase *new_hash_table =
-          group_by_hashtable_pool_->getHashTable();
+          group_by_hashtable_pool_->getHashTableFast();
       group_by_hashtable_pool_->returnHashTable(new_hash_table);
       hash_tables = group_by_hashtable_pool_->getAllHashTables();
     }


[12/12] incubator-quickstep git commit: Add LIPFilter feature.

Posted by ji...@apache.org.
Add LIPFilter feature.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/ca9c1790
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/ca9c1790
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/ca9c1790

Branch: refs/heads/lip-refactor
Commit: ca9c1790aaeed64b3975b90ea59c70ff483ae6d5
Parents: 7a46443
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Wed Sep 7 13:20:43 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Tue Oct 18 12:21:01 2016 -0500

----------------------------------------------------------------------
 expressions/scalar/ScalarAttribute.cpp          |   2 +-
 query_execution/CMakeLists.txt                  |   9 +-
 query_execution/QueryContext.cpp                |  42 ++--
 query_execution/QueryContext.hpp                | 149 +++++++++------
 query_execution/QueryContext.proto              |  25 +--
 query_optimizer/CMakeLists.txt                  |  20 ++
 query_optimizer/ExecutionGenerator.cpp          |  36 +++-
 query_optimizer/ExecutionGenerator.hpp          |   5 +
 query_optimizer/LIPFilterGenerator.cpp          | 190 +++++++++++++++++++
 query_optimizer/LIPFilterGenerator.hpp          | 129 +++++++++++++
 relational_operators/AggregationOperator.cpp    |  21 +-
 relational_operators/AggregationOperator.hpp    |   9 +-
 relational_operators/BuildHashOperator.cpp      |  23 ++-
 relational_operators/BuildHashOperator.hpp      |  18 +-
 relational_operators/HashJoinOperator.cpp       |  86 +++++++--
 relational_operators/HashJoinOperator.hpp       |  43 +++--
 relational_operators/RelationalOperator.hpp     |  12 +-
 relational_operators/SelectOperator.cpp         |  67 ++++++-
 relational_operators/SelectOperator.hpp         |  18 +-
 storage/AggregationOperationState.cpp           |  60 ++++--
 storage/AggregationOperationState.hpp           |  10 +-
 storage/CMakeLists.txt                          |   2 -
 storage/FastHashTable.hpp                       | 152 ++-------------
 storage/FastHashTableFactory.hpp                |  35 +---
 storage/HashTable.hpp                           | 103 ----------
 storage/HashTable.proto                         |   6 -
 storage/HashTableFactory.hpp                    |  34 +---
 storage/StorageBlock.cpp                        | 131 ++++++-------
 storage/StorageBlock.hpp                        |  21 +-
 utility/DAG.hpp                                 |   6 +-
 utility/lip_filter/CMakeLists.txt               |  41 +++-
 utility/lip_filter/LIPFilter.cpp                |  24 +++
 utility/lip_filter/LIPFilter.hpp                |  41 ++++
 utility/lip_filter/LIPFilter.proto              |  58 ++++++
 utility/lip_filter/LIPFilterAdaptiveProber.hpp  | 188 ++++++++++++++++++
 utility/lip_filter/LIPFilterBuilder.hpp         |  84 ++++++++
 utility/lip_filter/LIPFilterDeployment.cpp      |  69 +++++++
 utility/lip_filter/LIPFilterDeployment.hpp      |  72 +++++++
 utility/lip_filter/LIPFilterFactory.cpp         |  57 ++++++
 utility/lip_filter/LIPFilterFactory.hpp         |  51 +++++
 utility/lip_filter/SingleIdentityHashFilter.hpp | 168 ++++++++++++++++
 41 files changed, 1763 insertions(+), 554 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/expressions/scalar/ScalarAttribute.cpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarAttribute.cpp b/expressions/scalar/ScalarAttribute.cpp
index b29286b..cc42084 100644
--- a/expressions/scalar/ScalarAttribute.cpp
+++ b/expressions/scalar/ScalarAttribute.cpp
@@ -168,7 +168,7 @@ ColumnVector* ScalarAttribute::getAllValuesForJoin(
   ValueAccessor *accessor = using_left_relation ? left_accessor
                                                 : right_accessor;
 
-  return InvokeOnValueAccessorNotAdapter(
+  return InvokeOnAnyValueAccessor(
       accessor,
       [&joined_tuple_ids,
        &attr_id,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/query_execution/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_execution/CMakeLists.txt b/query_execution/CMakeLists.txt
index 6a84be1..b6c77b1 100644
--- a/query_execution/CMakeLists.txt
+++ b/query_execution/CMakeLists.txt
@@ -189,11 +189,13 @@ target_link_libraries(quickstep_queryexecution_QueryContext
                       quickstep_storage_WindowAggregationOperationState
                       quickstep_types_TypedValue
                       quickstep_types_containers_Tuple
-                      quickstep_utility_BloomFilter
                       quickstep_utility_Macros
-                      quickstep_utility_SortConfiguration)
+                      quickstep_utility_SortConfiguration
+                      quickstep_utility_lipfilter_LIPFilter
+                      quickstep_utility_lipfilter_LIPFilterDeployment
+                      quickstep_utility_lipfilter_LIPFilterFactory
+                      quickstep_utility_lipfilter_LIPFilter_proto)
 target_link_libraries(quickstep_queryexecution_QueryContext_proto
-                      quickstep_utility_BloomFilter_proto
                       quickstep_expressions_Expressions_proto
                       quickstep_expressions_tablegenerator_GeneratorFunction_proto
                       quickstep_storage_AggregationOperationState_proto
@@ -202,6 +204,7 @@ target_link_libraries(quickstep_queryexecution_QueryContext_proto
                       quickstep_storage_WindowAggregationOperationState_proto
                       quickstep_types_containers_Tuple_proto
                       quickstep_utility_SortConfiguration_proto
+                      quickstep_utility_lipfilter_LIPFilter_proto
                       ${PROTOBUF_LIBRARY})
 target_link_libraries(quickstep_queryexecution_QueryExecutionMessages_proto
                       quickstep_catalog_Catalog_proto

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/query_execution/QueryContext.cpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryContext.cpp b/query_execution/QueryContext.cpp
index 2572e18..57d200a 100644
--- a/query_execution/QueryContext.cpp
+++ b/query_execution/QueryContext.cpp
@@ -39,7 +39,10 @@
 #include "storage/InsertDestination.pb.h"
 #include "types/TypedValue.hpp"
 #include "types/containers/Tuple.hpp"
-#include "utility/BloomFilter.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+#include "utility/lip_filter/LIPFilter.pb.h"
+#include "utility/lip_filter/LIPFilterDeployment.hpp"
+#include "utility/lip_filter/LIPFilterFactory.hpp"
 #include "utility/SortConfiguration.hpp"
 
 #include "glog/logging.h"
@@ -68,10 +71,6 @@ QueryContext::QueryContext(const serialization::QueryContext &proto,
                                                         storage_manager));
   }
 
-  for (int i = 0; i < proto.bloom_filters_size(); ++i) {
-    bloom_filters_.emplace_back(new BloomFilter(proto.bloom_filters(i)));
-  }
-
   for (int i = 0; i < proto.generator_functions_size(); ++i) {
     const GeneratorFunctionHandle *func_handle =
         GeneratorFunctionFactory::Instance().reconstructFromProto(proto.generator_functions(i));
@@ -83,8 +82,7 @@ QueryContext::QueryContext(const serialization::QueryContext &proto,
   for (int i = 0; i < proto.join_hash_tables_size(); ++i) {
     join_hash_tables_.emplace_back(
         JoinHashTableFactory::CreateResizableFromProto(proto.join_hash_tables(i),
-                                                       storage_manager,
-                                                       bloom_filters_));
+                                                       storage_manager));
   }
 
   for (int i = 0; i < proto.insert_destinations_size(); ++i) {
@@ -98,6 +96,18 @@ QueryContext::QueryContext(const serialization::QueryContext &proto,
         bus));
   }
 
+  for (int i = 0; i < proto.lip_filters_size(); ++i) {
+    lip_filters_.emplace_back(
+        std::unique_ptr<LIPFilter>(
+            LIPFilterFactory::ReconstructFromProto(proto.lip_filters(i))));
+  }
+
+  for (int i = 0; i < proto.lip_filter_deployments_size(); ++i) {
+    lip_deployments_.emplace_back(
+        std::make_unique<LIPFilterDeployment>(
+            proto.lip_filter_deployments(i), lip_filters_));
+  }
+
   for (int i = 0; i < proto.predicates_size(); ++i) {
     predicates_.emplace_back(
         PredicateFactory::ReconstructFromProto(proto.predicates(i), database));
@@ -157,12 +167,6 @@ bool QueryContext::ProtoIsValid(const serialization::QueryContext &proto,
     }
   }
 
-  for (int i = 0; i < proto.bloom_filters_size(); ++i) {
-    if (!BloomFilter::ProtoIsValid(proto.bloom_filters(i))) {
-      return false;
-    }
-  }
-
   // Each GeneratorFunctionHandle object is serialized as a function name with
   // a list of arguments. Here checks that the arguments are valid TypedValue's.
   for (int i = 0; i < proto.generator_functions_size(); ++i) {
@@ -191,6 +195,18 @@ bool QueryContext::ProtoIsValid(const serialization::QueryContext &proto,
     }
   }
 
+  for (int i = 0; i < proto.lip_filters_size(); ++i) {
+    if (!LIPFilterFactory::ProtoIsValid(proto.lip_filters(i))) {
+      return false;
+    }
+  }
+
+  for (int i = 0; i < proto.lip_filter_deployments_size(); ++i) {
+    if (!LIPFilterDeployment::ProtoIsValid(proto.lip_filter_deployments(i))) {
+      return false;
+    }
+  }
+
   for (int i = 0; i < proto.predicates_size(); ++i) {
     if (!PredicateFactory::ProtoIsValid(proto.predicates(i), database)) {
       return false;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/query_execution/QueryContext.hpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryContext.hpp b/query_execution/QueryContext.hpp
index 393b55e..66476f4 100644
--- a/query_execution/QueryContext.hpp
+++ b/query_execution/QueryContext.hpp
@@ -35,7 +35,8 @@
 #include "storage/InsertDestination.hpp"
 #include "storage/WindowAggregationOperationState.hpp"
 #include "types/containers/Tuple.hpp"
-#include "utility/BloomFilter.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+#include "utility/lip_filter/LIPFilterDeployment.hpp"
 #include "utility/Macros.hpp"
 #include "utility/SortConfiguration.hpp"
 
@@ -67,11 +68,6 @@ class QueryContext {
   typedef std::uint32_t aggregation_state_id;
 
   /**
-   * @brief A unique identifier for a BloomFilter per query.
-   **/
-  typedef std::uint32_t bloom_filter_id;
-
-  /**
    * @brief A unique identifier for a GeneratorFunctionHandle per query.
    **/
   typedef std::uint32_t generator_function_id;
@@ -90,6 +86,17 @@ class QueryContext {
   typedef std::uint32_t join_hash_table_id;
 
   /**
+   * @brief A unique identifier for a LIPFilter per query.
+   **/
+  typedef std::uint32_t lip_filter_id;
+
+  /**
+   * @brief A unique identifier for a LIPFilterDeploymentInfo per query.
+   **/
+  typedef std::uint32_t lip_deployment_id;
+  static constexpr lip_deployment_id kInvalidILIPDeploymentId = static_cast<lip_deployment_id>(-1);
+
+  /**
    * @brief A unique identifier for a Predicate per query.
    *
    * @note A negative value indicates a null Predicate.
@@ -193,52 +200,6 @@ class QueryContext {
   }
 
   /**
-   * @brief Whether the given BloomFilter id is valid.
-   *
-   * @param id The BloomFilter id.
-   *
-   * @return True if valid, otherwise false.
-   **/
-  bool isValidBloomFilterId(const bloom_filter_id id) const {
-    return id < bloom_filters_.size();
-  }
-
-  /**
-   * @brief Get a mutable reference to the BloomFilter.
-   *
-   * @param id The BloomFilter id.
-   *
-   * @return The BloomFilter, already created in the constructor.
-   **/
-  inline BloomFilter* getBloomFilterMutable(const bloom_filter_id id) {
-    DCHECK_LT(id, bloom_filters_.size());
-    return bloom_filters_[id].get();
-  }
-
-  /**
-   * @brief Get a constant pointer to the BloomFilter.
-   *
-   * @param id The BloomFilter id.
-   *
-   * @return The constant pointer to BloomFilter that is
-   *         already created in the constructor.
-   **/
-  inline const BloomFilter* getBloomFilter(const bloom_filter_id id) const {
-    DCHECK_LT(id, bloom_filters_.size());
-    return bloom_filters_[id].get();
-  }
-
-  /**
-   * @brief Destory the given BloomFilter.
-   *
-   * @param id The id of the BloomFilter to destroy.
-   **/
-  inline void destroyBloomFilter(const bloom_filter_id id) {
-    DCHECK_LT(id, bloom_filters_.size());
-    bloom_filters_[id].reset();
-  }
-
-  /**
    * @brief Whether the given GeneratorFunctionHandle id is valid.
    *
    * @param id The GeneratorFunctionHandle id.
@@ -333,6 +294,87 @@ class QueryContext {
   }
 
   /**
+   * @brief Whether the given LIPFilter id is valid.
+   *
+   * @param id The LIPFilter id.
+   *
+   * @return True if valid, otherwise false.
+   **/
+  bool isValidLIPFilterId(const lip_filter_id id) const {
+    return id < lip_filters_.size();
+  }
+
+  /**
+   * @brief Get a mutable reference to the LIPFilter.
+   *
+   * @param id The LIPFilter id.
+   *
+   * @return The LIPFilter, already created in the constructor.
+   **/
+  inline LIPFilter* getLIPFilterMutable(const lip_filter_id id) {
+    DCHECK_LT(id, lip_filters_.size());
+    return lip_filters_[id].get();
+  }
+
+  /**
+   * @brief Get a constant pointer to the LIPFilter.
+   *
+   * @param id The LIPFilter id.
+   *
+   * @return The constant pointer to LIPFilter that is
+   *         already created in the constructor.
+   **/
+  inline const LIPFilter* getLIPFilter(const lip_filter_id id) const {
+    DCHECK_LT(id, lip_filters_.size());
+    return lip_filters_[id].get();
+  }
+
+  /**
+   * @brief Destory the given LIPFilter.
+   *
+   * @param id The id of the LIPFilter to destroy.
+   **/
+  inline void destroyLIPFilter(const lip_filter_id id) {
+    DCHECK_LT(id, lip_filters_.size());
+    lip_filters_[id].reset();
+  }
+
+  /**
+   * @brief Whether the given LIPFilterDeploymentInfo id is valid.
+   *
+   * @param id The LIPFilterDeploymentInfo id.
+   *
+   * @return True if valid, otherwise false.
+   **/
+  bool isValidLIPDeploymentId(const lip_deployment_id id) const {
+    return id < lip_deployments_.size();
+  }
+
+  /**
+   * @brief Get a constant pointer to the LIPFilterDeployment.
+   *
+   * @param id The LIPFilterDeployment id.
+   *
+   * @return The constant pointer to LIPFilterDeployment that is
+   *         already created in the constructor.
+   **/
+  inline const LIPFilterDeployment* getLIPDeployment(
+      const lip_deployment_id id) const {
+    DCHECK_LT(id, lip_deployments_.size());
+    return lip_deployments_[id].get();
+  }
+
+  /**
+   * @brief Destory the given LIPFilterDeploymentInfo.
+   *
+   * @param id The id of the LIPFilterDeploymentInfo to destroy.
+   **/
+  inline void destroyLIPDeployment(const lip_deployment_id id) {
+    DCHECK_LT(id, lip_deployments_.size());
+    lip_deployments_[id].reset();
+  }
+
+  /**
    * @brief Whether the given Predicate id is valid or no predicate.
    *
    * @param id The Predicate id.
@@ -507,10 +549,11 @@ class QueryContext {
 
  private:
   std::vector<std::unique_ptr<AggregationOperationState>> aggregation_states_;
-  std::vector<std::unique_ptr<BloomFilter>> bloom_filters_;
   std::vector<std::unique_ptr<const GeneratorFunctionHandle>> generator_functions_;
   std::vector<std::unique_ptr<InsertDestination>> insert_destinations_;
   std::vector<std::unique_ptr<JoinHashTable>> join_hash_tables_;
+  std::vector<std::unique_ptr<LIPFilter>> lip_filters_;
+  std::vector<std::unique_ptr<LIPFilterDeployment>> lip_deployments_;
   std::vector<std::unique_ptr<const Predicate>> predicates_;
   std::vector<std::vector<std::unique_ptr<const Scalar>>> scalar_groups_;
   std::vector<std::unique_ptr<const SortConfiguration>> sort_configs_;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/query_execution/QueryContext.proto
----------------------------------------------------------------------
diff --git a/query_execution/QueryContext.proto b/query_execution/QueryContext.proto
index 1a586a4..ab0f520 100644
--- a/query_execution/QueryContext.proto
+++ b/query_execution/QueryContext.proto
@@ -26,8 +26,8 @@ import "storage/HashTable.proto";
 import "storage/InsertDestination.proto";
 import "storage/WindowAggregationOperationState.proto";
 import "types/containers/Tuple.proto";
-import "utility/BloomFilter.proto";
 import "utility/SortConfiguration.proto";
+import "utility/lip_filter/LIPFilter.proto";
 
 message QueryContext {
   message ScalarGroup {
@@ -46,19 +46,20 @@ message QueryContext {
   }
 
   repeated AggregationOperationState aggregation_states = 1;
-  repeated BloomFilter bloom_filters = 2;
-  repeated GeneratorFunctionHandle generator_functions = 3;
-  repeated HashTable join_hash_tables = 4;
-  repeated InsertDestination insert_destinations = 5;
-  repeated Predicate predicates = 6;
-  repeated ScalarGroup scalar_groups = 7;
-  repeated SortConfiguration sort_configs = 8;
-  repeated Tuple tuples = 9;
+  repeated GeneratorFunctionHandle generator_functions = 2;
+  repeated HashTable join_hash_tables = 3;
+  repeated InsertDestination insert_destinations = 4;
+  repeated LIPFilter lip_filters = 5;
+  repeated LIPFilterDeployment lip_filter_deployments = 6;
+  repeated Predicate predicates = 7;
+  repeated ScalarGroup scalar_groups = 8;
+  repeated SortConfiguration sort_configs = 9;
+  repeated Tuple tuples = 10;
 
   // NOTE(zuyu): For UpdateWorkOrder only.
-  repeated UpdateGroup update_groups = 10;
+  repeated UpdateGroup update_groups = 11;
 
-  repeated WindowAggregationOperationState window_aggregation_states = 11;
+  repeated WindowAggregationOperationState window_aggregation_states = 12;
 
-  required uint64 query_id = 12;
+  required uint64 query_id = 13;
 }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index fa9141c..e1f36d1 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -41,6 +41,7 @@ add_subdirectory(tests)
 
 # Declare micro-libs:
 add_library(quickstep_queryoptimizer_ExecutionGenerator ExecutionGenerator.cpp ExecutionGenerator.hpp)
+add_library(quickstep_queryoptimizer_LIPFilterGenerator LIPFilterGenerator.cpp LIPFilterGenerator.hpp)
 add_library(quickstep_queryoptimizer_LogicalGenerator LogicalGenerator.cpp LogicalGenerator.hpp)
 add_library(quickstep_queryoptimizer_LogicalToPhysicalMapper
             ../empty_src.cpp
@@ -72,6 +73,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
                       quickstep_expressions_windowaggregation_WindowAggregateFunction_proto
                       quickstep_queryexecution_QueryContext
                       quickstep_queryexecution_QueryContext_proto
+                      quickstep_queryoptimizer_LIPFilterGenerator
                       quickstep_queryoptimizer_OptimizerContext
                       quickstep_queryoptimizer_QueryHandle
                       quickstep_queryoptimizer_QueryPlan
@@ -151,6 +153,23 @@ if (ENABLE_DISTRIBUTED)
   target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
                         quickstep_catalog_Catalog_proto)
 endif()
+target_link_libraries(quickstep_queryoptimizer_LIPFilterGenerator
+                      glog
+                      quickstep_catalog_CatalogAttribute
+                      quickstep_catalog_CatalogTypedefs
+                      quickstep_queryexecution_QueryContext
+                      quickstep_queryexecution_QueryContext_proto
+                      quickstep_queryoptimizer_QueryPlan
+                      quickstep_queryoptimizer_physical_Aggregate
+                      quickstep_queryoptimizer_physical_HashJoin
+                      quickstep_queryoptimizer_physical_LIPFilterConfiguration
+                      quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_physical_Selection
+                      quickstep_relationaloperators_RelationalOperator
+                      quickstep_types_Type
+                      quickstep_utility_lipfilter_LIPFilter
+                      quickstep_utility_lipfilter_LIPFilterDeployment
+                      quickstep_utility_lipfilter_LIPFilter_proto)
 target_link_libraries(quickstep_queryoptimizer_LogicalGenerator
                       glog
                       quickstep_parser_ParseStatement
@@ -224,6 +243,7 @@ target_link_libraries(quickstep_queryoptimizer_Validator
 add_library(quickstep_queryoptimizer ../empty_src.cpp QueryOptimizerModule.hpp)
 target_link_libraries(quickstep_queryoptimizer
                       quickstep_queryoptimizer_ExecutionGenerator
+                      quickstep_queryoptimizer_LIPFilterGenerator
                       quickstep_queryoptimizer_LogicalGenerator
                       quickstep_queryoptimizer_LogicalToPhysicalMapper
                       quickstep_queryoptimizer_Optimizer

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 09ef9e0..3c6c0df 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -54,9 +54,11 @@
 #include "expressions/window_aggregation/WindowAggregateFunction.pb.h"
 #include "query_execution/QueryContext.hpp"
 #include "query_execution/QueryContext.pb.h"
+#include "query_optimizer/LIPFilterGenerator.hpp"
 #include "query_optimizer/OptimizerContext.hpp"
 #include "query_optimizer/QueryHandle.hpp"
 #include "query_optimizer/QueryPlan.hpp"
+#include "query_optimizer/cost_model/SimpleCostModel.hpp"
 #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
 #include "query_optimizer/expressions/AggregateFunction.hpp"
 #include "query_optimizer/expressions/Alias.hpp"
@@ -75,6 +77,7 @@
 #include "query_optimizer/physical/HashJoin.hpp"
 #include "query_optimizer/physical/InsertSelection.hpp"
 #include "query_optimizer/physical/InsertTuple.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
 #include "query_optimizer/physical/NestedLoopsJoin.hpp"
 #include "query_optimizer/physical/PatternMatcher.hpp"
 #include "query_optimizer/physical/Physical.hpp"
@@ -152,9 +155,6 @@ static const volatile bool aggregate_hashtable_type_dummy
 
 DEFINE_bool(parallelize_load, true, "Parallelize loading data files.");
 
-DEFINE_bool(optimize_joins, false,
-            "Enable post execution plan generation optimizations for joins.");
-
 namespace E = ::quickstep::optimizer::expressions;
 namespace P = ::quickstep::optimizer::physical;
 namespace S = ::quickstep::serialization;
@@ -167,6 +167,13 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) {
 
   cost_model_.reset(
       new cost::StarSchemaSimpleCostModel(top_level_physical_plan_->shared_subplans()));
+  simple_cost_model_.reset(
+      new cost::SimpleCostModel(top_level_physical_plan_->shared_subplans()));
+  const P::LIPFilterConfigurationPtr &lip_filter_configuration =
+      top_level_physical_plan_->lip_filter_configuration();
+  if (lip_filter_configuration != nullptr) {
+    lip_filter_generator_.reset(new LIPFilterGenerator(lip_filter_configuration));
+  }
 
   const CatalogRelation *result_relation = nullptr;
 
@@ -176,6 +183,10 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) {
     }
     generatePlanInternal(top_level_physical_plan_->plan());
 
+    if (lip_filter_generator_ != nullptr) {
+      lip_filter_generator_->deployLIPFilters(execution_plan_, query_context_proto_);
+    }
+
     // Set the query result relation if the input plan exists in physical_to_execution_map_,
     // which indicates the plan is the result of a SELECT query.
     const std::unordered_map<P::PhysicalPtr, CatalogRelationInfo>::const_iterator it =
@@ -232,6 +243,10 @@ void ExecutionGenerator::generatePlanInternal(
     generatePlanInternal(child);
   }
 
+  if (lip_filter_generator_ != nullptr) {
+    lip_filter_generator_->registerAttributeMap(physical_plan, attribute_substitution_map_);
+  }
+
   switch (physical_plan->getPhysicalType()) {
     case P::PhysicalType::kAggregate:
       return convertAggregate(
@@ -563,6 +578,10 @@ void ExecutionGenerator::convertSelection(
       std::forward_as_tuple(select_index,
                             output_relation));
   temporary_relation_info_vec_.emplace_back(select_index, output_relation);
+
+  if (lip_filter_generator_ != nullptr) {
+    lip_filter_generator_->addSelectionInfo(physical_selection, select_index);
+  }
 }
 
 void ExecutionGenerator::convertSharedSubplanReference(const physical::SharedSubplanReferencePtr &physical_plan) {
@@ -790,6 +809,12 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
       std::forward_as_tuple(join_operator_index,
                             output_relation));
   temporary_relation_info_vec_.emplace_back(join_operator_index, output_relation);
+
+  if (lip_filter_generator_ != nullptr) {
+    lip_filter_generator_->addHashJoinInfo(physical_plan,
+                                           build_operator_index,
+                                           join_operator_index);
+  }
 }
 
 void ExecutionGenerator::convertNestedLoopsJoin(
@@ -1417,6 +1442,11 @@ void ExecutionGenerator::convertAggregate(
   execution_plan_->addDirectDependency(destroy_aggregation_state_operator_index,
                                        finalize_aggregation_operator_index,
                                        true);
+
+  if (lip_filter_generator_ != nullptr) {
+    lip_filter_generator_->addAggregateInfo(physical_plan,
+                                            aggregation_operator_index);
+  }
 }
 
 void ExecutionGenerator::convertSort(const P::SortPtr &physical_sort) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp
index 495955e..8890296 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -33,6 +33,7 @@
 #include "catalog/CatalogTypedefs.hpp"
 #include "query_execution/QueryContext.hpp"
 #include "query_execution/QueryContext.pb.h"
+#include "query_optimizer/LIPFilterGenerator.hpp"
 #include "query_optimizer/QueryHandle.hpp"
 #include "query_optimizer/QueryPlan.hpp"
 #include "query_optimizer/cost_model/CostModel.hpp"
@@ -419,9 +420,13 @@ class ExecutionGenerator {
    * @brief The cost model to use for creating the execution plan.
    */
   std::unique_ptr<cost::CostModel> cost_model_;
+  std::unique_ptr<cost::CostModel> simple_cost_model_;
 
   physical::TopLevelPlanPtr top_level_physical_plan_;
 
+  // Sub-generator for deploying LIP (lookahead information passing) filters.
+  std::unique_ptr<LIPFilterGenerator> lip_filter_generator_;
+
   DISALLOW_COPY_AND_ASSIGN(ExecutionGenerator);
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/query_optimizer/LIPFilterGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/LIPFilterGenerator.cpp b/query_optimizer/LIPFilterGenerator.cpp
new file mode 100644
index 0000000..ef10400
--- /dev/null
+++ b/query_optimizer/LIPFilterGenerator.cpp
@@ -0,0 +1,190 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/LIPFilterGenerator.hpp"
+
+#include <map>
+#include <utility>
+
+#include "catalog/CatalogAttribute.hpp"
+#include "catalog/CatalogTypedefs.hpp"
+#include "query_execution/QueryContext.pb.h"
+#include "relational_operators/RelationalOperator.hpp"
+#include "types/Type.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+#include "utility/lip_filter/LIPFilter.pb.h"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+void LIPFilterGenerator::registerAttributeMap(
+    const P::PhysicalPtr &node,
+    const std::unordered_map<E::ExprId, const CatalogAttribute *> &attribute_substitution_map) {
+  const auto &build_info_map = lip_filter_configuration_->getBuildInfoMap();
+  const auto build_it = build_info_map.find(node);
+  if (build_it != build_info_map.end()) {
+    auto &map_entry = attribute_map_[node];
+    for (const auto &info : build_it->second) {
+      E::ExprId attr_id = info.build_attribute->id();
+      map_entry.emplace(attr_id, attribute_substitution_map.at(attr_id));
+    }
+  }
+  const auto &probe_info_map = lip_filter_configuration_->getProbeInfoMap();
+  const auto probe_it = probe_info_map.find(node);
+  if (probe_it != probe_info_map.end()) {
+    auto &map_entry = attribute_map_[node];
+    for (const auto &info : probe_it->second) {
+      E::ExprId attr_id = info.probe_attribute->id();
+      map_entry.emplace(attr_id, attribute_substitution_map.at(attr_id));
+    }
+  }
+}
+
+void LIPFilterGenerator::deployLIPFilters(QueryPlan *execution_plan,
+                                          serialization::QueryContext *query_context_proto) const {
+  LIPFilterBuilderMap lip_filter_builder_map;
+
+  // Deploy builders
+  const auto &build_info_map = lip_filter_configuration_->getBuildInfoMap();
+  for (const auto &info : builder_infos_) {
+    const auto build_it = build_info_map.find(info.builder_node);
+    if (build_it != build_info_map.end()) {
+      deployBuilderInternal(execution_plan,
+                            query_context_proto,
+                            info.builder_node,
+                            info.builder_operator_index,
+                            build_it->second,
+                            &lip_filter_builder_map);
+    }
+  }
+
+  // Deploy probers
+  const auto &probe_info_map = lip_filter_configuration_->getProbeInfoMap();
+  for (const auto &info : prober_infos_) {
+    const auto probe_it = probe_info_map.find(info.prober_node);
+    if (probe_it != probe_info_map.end()) {
+      deployProberInteral(execution_plan,
+                          query_context_proto,
+                          info.prober_node,
+                          info.prober_operator_index,
+                          probe_it->second,
+                          lip_filter_builder_map);
+    }
+  }
+}
+
+void LIPFilterGenerator::deployBuilderInternal(
+    QueryPlan *execution_plan,
+    serialization::QueryContext *query_context_proto,
+    const physical::PhysicalPtr &builder_node,
+    const QueryPlan::DAGNodeIndex builder_operator_index,
+    const std::vector<physical::LIPFilterBuildInfo> &build_info_vec,
+    LIPFilterBuilderMap *lip_filter_builder_map) const {
+  const auto lip_deployment_index = query_context_proto->lip_filter_deployments_size();
+  auto *lip_filter_deployment_info_proto =
+      query_context_proto->add_lip_filter_deployments();
+  lip_filter_deployment_info_proto->set_action_type(serialization::LIPFilterActionType::BUILD);
+
+  const auto &builder_attribute_map = attribute_map_.at(builder_node);
+  for (const auto &info : build_info_vec) {
+    const QueryContext::lip_filter_id lip_filter_id = query_context_proto->lip_filters_size();
+    serialization::LIPFilter *lip_filter_proto = query_context_proto->add_lip_filters();
+    const CatalogAttribute *target_attr = builder_attribute_map.at(info.build_attribute->id());
+    const Type &attr_type = target_attr->getType();
+
+    switch (info.filter_type) {
+      case LIPFilterType::kSingleIdentityHashFilter: {
+        DCHECK(!attr_type.isVariableLength());
+        lip_filter_proto->set_lip_filter_type(
+            serialization::LIPFilterType::SINGLE_IDENTITY_HASH_FILTER);
+        lip_filter_proto->SetExtension(
+            serialization::SingleIdentityHashFilter::filter_cardinality, info.filter_cardinality);
+        lip_filter_proto->SetExtension(
+            serialization::SingleIdentityHashFilter::attribute_size, attr_type.minimumByteLength());
+        break;
+      }
+      default:
+        LOG(FATAL) << "Unsupported LIPFilter type";
+        break;
+    }
+
+    lip_filter_builder_map->emplace(
+        std::make_pair(info.build_attribute->id(), builder_node),
+        std::make_pair(lip_filter_id, builder_operator_index));
+
+    auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_entries();
+    lip_filter_entry_proto->set_lip_filter_id(lip_filter_id);
+    lip_filter_entry_proto->set_attribute_id(target_attr->getID());
+    lip_filter_entry_proto->mutable_attribute_type()->CopyFrom(attr_type.getProto());
+
+    std::cerr << "Build " << info.build_attribute->toString()
+              << " @" << builder_node
+              << " size = " << info.filter_cardinality << "\n";
+  }
+
+  RelationalOperator *relop =
+      execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(builder_operator_index);
+  relop->deployLIPFilter(lip_deployment_index);
+}
+
+void LIPFilterGenerator::deployProberInteral(
+    QueryPlan *execution_plan,
+    serialization::QueryContext *query_context_proto,
+    const physical::PhysicalPtr &prober_node,
+    const QueryPlan::DAGNodeIndex prober_operator_index,
+    const std::vector<physical::LIPFilterProbeInfo> &probe_info_vec,
+    const LIPFilterBuilderMap &lip_filter_builder_map) const {
+  const auto lip_deployment_index = query_context_proto->lip_filter_deployments_size();
+  auto *lip_filter_deployment_info_proto =
+      query_context_proto->add_lip_filter_deployments();
+  lip_filter_deployment_info_proto->set_action_type(serialization::LIPFilterActionType::PROBE);
+
+  const auto &prober_attribute_map = attribute_map_.at(prober_node);
+  for (const auto &info : probe_info_vec) {
+    const auto &builder_info =
+        lip_filter_builder_map.at(
+            std::make_pair(info.build_attribute->id(), info.builder));
+    const CatalogAttribute *target_attr = prober_attribute_map.at(info.probe_attribute->id());
+
+    auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_entries();
+    lip_filter_entry_proto->set_lip_filter_id(builder_info.first);
+    lip_filter_entry_proto->set_attribute_id(target_attr->getID());
+    lip_filter_entry_proto->mutable_attribute_type()->CopyFrom(
+        target_attr->getType().getProto());
+
+    execution_plan->addDirectDependency(prober_operator_index,
+                                        builder_info.second,
+                                        true /* is_pipeline_breaker */);
+
+    std::cerr << "Probe " << info.probe_attribute->toString()
+              << " @" << prober_node << "\n";
+  }
+
+  RelationalOperator *relop =
+      execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(prober_operator_index);
+  relop->deployLIPFilter(lip_deployment_index);
+}
+
+}  // namespace optimizer
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/query_optimizer/LIPFilterGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/LIPFilterGenerator.hpp b/query_optimizer/LIPFilterGenerator.hpp
new file mode 100644
index 0000000..4b597cf
--- /dev/null
+++ b/query_optimizer/LIPFilterGenerator.hpp
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_LIP_FILTER_GENERATOR_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_LIP_FILTER_GENERATOR_HPP_
+
+#include <map>
+#include <unordered_map>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "query_execution/QueryContext.hpp"
+#include "query_execution/QueryContext.pb.h"
+#include "query_optimizer/QueryPlan.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+class CatalogAttribute;
+
+namespace optimizer {
+
+/** \addtogroup QueryOptimizer
+ *  @{
+ */
+
+class LIPFilterGenerator {
+ public:
+  LIPFilterGenerator(const physical::LIPFilterConfigurationPtr &lip_filter_configuration)
+      : lip_filter_configuration_(lip_filter_configuration) {
+    DCHECK(lip_filter_configuration_ != nullptr);
+  }
+
+  void registerAttributeMap(
+      const physical::PhysicalPtr &node,
+      const std::unordered_map<expressions::ExprId, const CatalogAttribute *> &attribute_substitution_map);
+
+  void addAggregateInfo(const physical::AggregatePtr &aggregate,
+                        const QueryPlan::DAGNodeIndex aggregate_operator_index) {
+    prober_infos_.emplace_back(aggregate, aggregate_operator_index);
+  }
+
+  void addHashJoinInfo(const physical::HashJoinPtr &hash_join,
+                       const QueryPlan::DAGNodeIndex build_operator_index,
+                       const QueryPlan::DAGNodeIndex join_operator_index) {
+    builder_infos_.emplace_back(hash_join, build_operator_index);
+    prober_infos_.emplace_back(hash_join, join_operator_index);
+  }
+
+  void addSelectionInfo(const physical::SelectionPtr &selection,
+                        const QueryPlan::DAGNodeIndex select_operator_index) {
+    prober_infos_.emplace_back(selection, select_operator_index);
+  }
+
+  void deployLIPFilters(QueryPlan *execution_plan,
+                        serialization::QueryContext *query_context_proto) const;
+
+ private:
+  struct BuilderInfo {
+    BuilderInfo(const physical::PhysicalPtr &builder_node_in,
+                const QueryPlan::DAGNodeIndex builder_operator_index_in)
+        : builder_node(builder_node_in),
+          builder_operator_index(builder_operator_index_in) {
+    }
+    const physical::PhysicalPtr builder_node;
+    const QueryPlan::DAGNodeIndex builder_operator_index;
+  };
+
+  struct ProberInfo {
+    ProberInfo(const physical::PhysicalPtr &prober_node_in,
+               const QueryPlan::DAGNodeIndex prober_operator_index_in)
+        : prober_node(prober_node_in),
+          prober_operator_index(prober_operator_index_in) {
+    }
+    const physical::PhysicalPtr prober_node;
+    const QueryPlan::DAGNodeIndex prober_operator_index;
+  };
+
+  typedef std::map<std::pair<expressions::ExprId, physical::PhysicalPtr>,
+                   std::pair<QueryContext::lip_filter_id, QueryPlan::DAGNodeIndex>> LIPFilterBuilderMap;
+
+  void deployBuilderInternal(QueryPlan *execution_plan,
+                             serialization::QueryContext *query_context_proto,
+                             const physical::PhysicalPtr &builder_node,
+                             const QueryPlan::DAGNodeIndex builder_operator_index,
+                             const std::vector<physical::LIPFilterBuildInfo> &build_info_vec,
+                             LIPFilterBuilderMap *lip_filter_builder_map) const;
+
+  void deployProberInteral(QueryPlan *execution_plan,
+                           serialization::QueryContext *query_context_proto,
+                           const physical::PhysicalPtr &prober_node,
+                           const QueryPlan::DAGNodeIndex prober_operator_index,
+                           const std::vector<physical::LIPFilterProbeInfo> &probe_info_vec,
+                           const LIPFilterBuilderMap &lip_filter_builder_map) const;
+
+  const physical::LIPFilterConfigurationPtr lip_filter_configuration_;
+  std::map<physical::PhysicalPtr, std::map<expressions::ExprId, const CatalogAttribute *>> attribute_map_;
+  std::vector<BuilderInfo> builder_infos_;
+  std::vector<ProberInfo> prober_infos_;
+};
+
+
+/** @} */
+
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif /* QUICKSTEP_QUERY_OPTIMIZER_LIP_FILTER_GENERATOR_HPP_ */

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/relational_operators/AggregationOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/AggregationOperator.cpp b/relational_operators/AggregationOperator.cpp
index 056e76d..71baa53 100644
--- a/relational_operators/AggregationOperator.cpp
+++ b/relational_operators/AggregationOperator.cpp
@@ -38,14 +38,24 @@ bool AggregationOperator::getAllWorkOrders(
     StorageManager *storage_manager,
     const tmb::client_id scheduler_client_id,
     tmb::MessageBus *bus) {
+  const LIPFilterDeployment *lip_filter_deployment = nullptr;
+  if (lip_deployment_index_ != QueryContext::kInvalidILIPDeploymentId) {
+    lip_filter_deployment = query_context->getLIPDeployment(lip_deployment_index_);
+  }
+
   if (input_relation_is_stored_) {
     if (!started_) {
       for (const block_id input_block_id : input_relation_block_ids_) {
+        LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr;
+        if (lip_filter_deployment != nullptr) {
+          lip_filter_adaptive_prober = lip_filter_deployment->createLIPFilterAdaptiveProber();
+        }
         container->addNormalWorkOrder(
             new AggregationWorkOrder(
                 query_id_,
                 input_block_id,
-                query_context->getAggregationState(aggr_state_index_)),
+                query_context->getAggregationState(aggr_state_index_),
+                lip_filter_adaptive_prober),
             op_index_);
       }
       started_ = true;
@@ -53,11 +63,16 @@ bool AggregationOperator::getAllWorkOrders(
     return started_;
   } else {
     while (num_workorders_generated_ < input_relation_block_ids_.size()) {
+      LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr;
+      if (lip_filter_deployment != nullptr) {
+        lip_filter_adaptive_prober = lip_filter_deployment->createLIPFilterAdaptiveProber();
+      }
       container->addNormalWorkOrder(
           new AggregationWorkOrder(
               query_id_,
               input_relation_block_ids_[num_workorders_generated_],
-              query_context->getAggregationState(aggr_state_index_)),
+              query_context->getAggregationState(aggr_state_index_),
+              lip_filter_adaptive_prober),
           op_index_);
       ++num_workorders_generated_;
     }
@@ -98,7 +113,7 @@ serialization::WorkOrder* AggregationOperator::createWorkOrderProto(const block_
 
 
 void AggregationWorkOrder::execute() {
-  state_->aggregateBlock(input_block_id_);
+  state_->aggregateBlock(input_block_id_, lip_filter_adaptive_prober_.get());
 }
 
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/relational_operators/AggregationOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/AggregationOperator.hpp b/relational_operators/AggregationOperator.hpp
index 31c1da4..da36d57 100644
--- a/relational_operators/AggregationOperator.hpp
+++ b/relational_operators/AggregationOperator.hpp
@@ -30,6 +30,7 @@
 #include "relational_operators/WorkOrder.hpp"
 #include "storage/StorageBlockInfo.hpp"
 #include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilterAdaptiveProber.hpp"
 
 #include "glog/logging.h"
 
@@ -140,10 +141,12 @@ class AggregationWorkOrder : public WorkOrder {
    **/
   AggregationWorkOrder(const std::size_t query_id,
                        const block_id input_block_id,
-                       AggregationOperationState *state)
+                       AggregationOperationState *state,
+                       LIPFilterAdaptiveProber *lip_filter_adaptive_prober)
       : WorkOrder(query_id),
         input_block_id_(input_block_id),
-        state_(DCHECK_NOTNULL(state)) {}
+        state_(DCHECK_NOTNULL(state)),
+        lip_filter_adaptive_prober_(lip_filter_adaptive_prober) {}
 
   ~AggregationWorkOrder() override {}
 
@@ -153,6 +156,8 @@ class AggregationWorkOrder : public WorkOrder {
   const block_id input_block_id_;
   AggregationOperationState *state_;
 
+  std::unique_ptr<LIPFilterAdaptiveProber> lip_filter_adaptive_prober_;
+
   DISALLOW_COPY_AND_ASSIGN(AggregationWorkOrder);
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/relational_operators/BuildHashOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/BuildHashOperator.cpp b/relational_operators/BuildHashOperator.cpp
index 465621c..35bb5cf 100644
--- a/relational_operators/BuildHashOperator.cpp
+++ b/relational_operators/BuildHashOperator.cpp
@@ -34,6 +34,7 @@
 #include "storage/TupleReference.hpp"
 #include "storage/TupleStorageSubBlock.hpp"
 #include "storage/ValueAccessor.hpp"
+#include "utility/lip_filter/LIPFilterBuilder.hpp"
 
 #include "glog/logging.h"
 
@@ -68,6 +69,14 @@ bool BuildHashOperator::getAllWorkOrders(
     tmb::MessageBus *bus) {
   DCHECK(query_context != nullptr);
 
+  LIPFilterBuilderPtr lip_filter_builder = nullptr;
+  if (lip_deployment_index_ != QueryContext::kInvalidILIPDeploymentId) {
+    const LIPFilterDeployment *lip_filter_deployment =
+        query_context->getLIPDeployment(lip_deployment_index_);
+    lip_filter_builder = std::shared_ptr<LIPFilterBuilder>(
+        lip_filter_deployment->createLIPFilterBuilder());
+  }
+
   JoinHashTable *hash_table = query_context->getJoinHashTable(hash_table_index_);
   if (input_relation_is_stored_) {
     if (!started_) {
@@ -79,7 +88,8 @@ bool BuildHashOperator::getAllWorkOrders(
                                    any_join_key_attributes_nullable_,
                                    input_block_id,
                                    hash_table,
-                                   storage_manager),
+                                   storage_manager,
+                                   lip_filter_builder),
             op_index_);
       }
       started_ = true;
@@ -95,7 +105,8 @@ bool BuildHashOperator::getAllWorkOrders(
               any_join_key_attributes_nullable_,
               input_relation_block_ids_[num_workorders_generated_],
               hash_table,
-              storage_manager),
+              storage_manager,
+              lip_filter_builder),
           op_index_);
       ++num_workorders_generated_;
     }
@@ -136,17 +147,23 @@ serialization::WorkOrder* BuildHashOperator::createWorkOrderProto(const block_id
                       any_join_key_attributes_nullable_);
   proto->SetExtension(serialization::BuildHashWorkOrder::join_hash_table_index, hash_table_index_);
   proto->SetExtension(serialization::BuildHashWorkOrder::block_id, block);
+  // TODO(jianqiao): update lip_filter related stuff
 
   return proto;
 }
 
-
 void BuildHashWorkOrder::execute() {
   BlockReference block(
       storage_manager_->getBlock(build_block_id_, input_relation_));
 
   TupleReferenceGenerator generator(build_block_id_);
   std::unique_ptr<ValueAccessor> accessor(block->getTupleStorageSubBlock().createValueAccessor());
+
+  if (lip_filter_builder_ != nullptr) {
+    lip_filter_builder_->insertValueAccessor(accessor.get());
+    accessor->beginIterationVirtual();
+  }
+
   HashTablePutResult result;
   if (join_key_attributes_.size() == 1) {
     result = hash_table_->putValueAccessor(accessor.get(),

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/relational_operators/BuildHashOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/BuildHashOperator.hpp b/relational_operators/BuildHashOperator.hpp
index 4a80a8a..940298c 100644
--- a/relational_operators/BuildHashOperator.hpp
+++ b/relational_operators/BuildHashOperator.hpp
@@ -20,6 +20,7 @@
 #ifndef QUICKSTEP_RELATIONAL_OPERATORS_BUILD_HASH_OPERATOR_HPP_
 #define QUICKSTEP_RELATIONAL_OPERATORS_BUILD_HASH_OPERATOR_HPP_
 
+#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
@@ -31,6 +32,7 @@
 #include "relational_operators/WorkOrder.hpp"
 #include "storage/StorageBlockInfo.hpp"
 #include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilterBuilder.hpp"
 
 #include "glog/logging.h"
 
@@ -162,6 +164,7 @@ class BuildHashWorkOrder : public WorkOrder {
    * @param build_block_id The block id.
    * @param hash_table The JoinHashTable to use.
    * @param storage_manager The StorageManager to use.
+   * @param lip_filter_builder The attached builder for building LIP filters.
    **/
   BuildHashWorkOrder(const std::size_t query_id,
                      const CatalogRelationSchema &input_relation,
@@ -169,14 +172,16 @@ class BuildHashWorkOrder : public WorkOrder {
                      const bool any_join_key_attributes_nullable,
                      const block_id build_block_id,
                      JoinHashTable *hash_table,
-                     StorageManager *storage_manager)
+                     StorageManager *storage_manager,
+                     LIPFilterBuilderPtr lip_filter_builder = nullptr)
       : WorkOrder(query_id),
         input_relation_(input_relation),
         join_key_attributes_(join_key_attributes),
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
         build_block_id_(build_block_id),
         hash_table_(DCHECK_NOTNULL(hash_table)),
-        storage_manager_(DCHECK_NOTNULL(storage_manager)) {}
+        storage_manager_(DCHECK_NOTNULL(storage_manager)),
+        lip_filter_builder_(lip_filter_builder) {}
 
   /**
    * @brief Constructor for the distributed version.
@@ -189,6 +194,7 @@ class BuildHashWorkOrder : public WorkOrder {
    * @param build_block_id The block id.
    * @param hash_table The JoinHashTable to use.
    * @param storage_manager The StorageManager to use.
+   * @param lip_filter_builder The attached builder for building LIP filters.
    **/
   BuildHashWorkOrder(const std::size_t query_id,
                      const CatalogRelationSchema &input_relation,
@@ -196,14 +202,16 @@ class BuildHashWorkOrder : public WorkOrder {
                      const bool any_join_key_attributes_nullable,
                      const block_id build_block_id,
                      JoinHashTable *hash_table,
-                     StorageManager *storage_manager)
+                     StorageManager *storage_manager,
+                     LIPFilterBuilderPtr lip_filter_builder = nullptr)
       : WorkOrder(query_id),
         input_relation_(input_relation),
         join_key_attributes_(std::move(join_key_attributes)),
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
         build_block_id_(build_block_id),
         hash_table_(DCHECK_NOTNULL(hash_table)),
-        storage_manager_(DCHECK_NOTNULL(storage_manager)) {}
+        storage_manager_(DCHECK_NOTNULL(storage_manager)),
+        lip_filter_builder_(lip_filter_builder) {}
 
   ~BuildHashWorkOrder() override {}
 
@@ -222,6 +230,8 @@ class BuildHashWorkOrder : public WorkOrder {
   JoinHashTable *hash_table_;
   StorageManager *storage_manager_;
 
+  LIPFilterBuilderPtr lip_filter_builder_;
+
   DISALLOW_COPY_AND_ASSIGN(BuildHashWorkOrder);
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index 779c0fe..ddc2a40 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -48,6 +48,7 @@
 #include "types/TypedValue.hpp"
 #include "types/containers/ColumnVector.hpp"
 #include "types/containers/ColumnVectorsValueAccessor.hpp"
+#include "utility/lip_filter/LIPFilterAdaptiveProber.hpp"
 
 #include "gflags/gflags.h"
 #include "glog/logging.h"
@@ -95,8 +96,8 @@ class MapBasedJoinedTupleCollector {
 
 class SemiAntiJoinTupleCollector {
  public:
-  explicit SemiAntiJoinTupleCollector(const TupleStorageSubBlock &tuple_store) {
-    filter_.reset(tuple_store.getExistenceMap());
+  explicit SemiAntiJoinTupleCollector(TupleIdSequence *existence_map) {
+    filter_ = existence_map;
   }
 
   template <typename ValueAccessorT>
@@ -104,12 +105,8 @@ class SemiAntiJoinTupleCollector {
     filter_->set(accessor.getCurrentPosition(), false);
   }
 
-  const TupleIdSequence* filter() const {
-    return filter_.get();
-  }
-
  private:
-  std::unique_ptr<TupleIdSequence> filter_;
+  TupleIdSequence *filter_;
 };
 
 class OuterJoinTupleCollector {
@@ -180,6 +177,11 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
   if (blocking_dependencies_met_) {
     DCHECK(query_context != nullptr);
 
+    const LIPFilterDeployment *lip_filter_deployment = nullptr;
+    if (lip_deployment_index_ != QueryContext::kInvalidILIPDeploymentId) {
+      lip_filter_deployment = query_context->getLIPDeployment(lip_deployment_index_);
+    }
+
     const Predicate *residual_predicate =
         query_context->getPredicate(residual_predicate_index_);
     const vector<unique_ptr<const Scalar>> &selection =
@@ -192,6 +194,10 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
     if (probe_relation_is_stored_) {
       if (!started_) {
         for (const block_id probe_block_id : probe_relation_block_ids_) {
+          LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr;
+          if (lip_filter_deployment != nullptr) {
+            lip_filter_adaptive_prober = lip_filter_deployment->createLIPFilterAdaptiveProber();
+          }
           container->addNormalWorkOrder(
               new JoinWorkOrderClass(query_id_,
                                      build_relation_,
@@ -203,7 +209,8 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
                                      selection,
                                      hash_table,
                                      output_destination,
-                                     storage_manager),
+                                     storage_manager,
+                                     lip_filter_adaptive_prober),
               op_index_);
         }
         started_ = true;
@@ -211,6 +218,10 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
       return started_;
     } else {
       while (num_workorders_generated_ < probe_relation_block_ids_.size()) {
+        LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr;
+        if (lip_filter_deployment != nullptr) {
+          lip_filter_adaptive_prober = lip_filter_deployment->createLIPFilterAdaptiveProber();
+        }
         container->addNormalWorkOrder(
             new JoinWorkOrderClass(
                 query_id_,
@@ -223,7 +234,8 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
                 selection,
                 hash_table,
                 output_destination,
-                storage_manager),
+                storage_manager,
+                lip_filter_adaptive_prober),
             op_index_);
         ++num_workorders_generated_;
       }  // end while
@@ -423,6 +435,17 @@ void HashInnerJoinWorkOrder::execute() {
   const TupleStorageSubBlock &probe_store = probe_block->getTupleStorageSubBlock();
 
   std::unique_ptr<ValueAccessor> probe_accessor(probe_store.createValueAccessor());
+
+  std::unique_ptr<TupleIdSequence> lip_filter_existence_map;
+  std::unique_ptr<ValueAccessor> base_accessor;
+  if (lip_filter_adaptive_prober_ != nullptr) {
+    base_accessor.reset(probe_accessor.release());
+    lip_filter_existence_map.reset(
+        lip_filter_adaptive_prober_->filterValueAccessor(base_accessor.get()));
+    probe_accessor.reset(
+        base_accessor->createSharedTupleIdSequenceAdapterVirtual(*lip_filter_existence_map));
+  }
+
   MapBasedJoinedTupleCollector collector;
   if (join_key_attributes_.size() == 1) {
     hash_table_.getAllFromValueAccessor(
@@ -529,6 +552,16 @@ void HashSemiJoinWorkOrder::executeWithResidualPredicate() {
 
   std::unique_ptr<ValueAccessor> probe_accessor(probe_store.createValueAccessor());
 
+  std::unique_ptr<TupleIdSequence> lip_filter_existence_map;
+  std::unique_ptr<ValueAccessor> base_accessor;
+  if (lip_filter_adaptive_prober_ != nullptr) {
+    base_accessor.reset(probe_accessor.release());
+    lip_filter_existence_map.reset(
+        lip_filter_adaptive_prober_->filterValueAccessor(base_accessor.get()));
+    probe_accessor.reset(
+        base_accessor->createSharedTupleIdSequenceAdapterVirtual(*lip_filter_existence_map));
+  }
+
   // We collect all the matching probe relation tuples, as there's a residual
   // preidcate that needs to be applied after collecting these matches.
   MapBasedJoinedTupleCollector collector;
@@ -548,7 +581,6 @@ void HashSemiJoinWorkOrder::executeWithResidualPredicate() {
 
   // Get a filter for tuples in the given probe block.
   TupleIdSequence filter(probe_store.getMaxTupleID() + 1);
-  filter.setRange(0, filter.length(), false);
   for (const std::pair<const block_id,
                        std::vector<std::pair<tuple_id, tuple_id>>>
            &build_block_entry : *collector.getJoinedTuples()) {
@@ -609,7 +641,22 @@ void HashSemiJoinWorkOrder::executeWithoutResidualPredicate() {
   const TupleStorageSubBlock &probe_store = probe_block->getTupleStorageSubBlock();
 
   std::unique_ptr<ValueAccessor> probe_accessor(probe_store.createValueAccessor());
-  SemiAntiJoinTupleCollector collector(probe_store);
+  std::unique_ptr<TupleIdSequence> existence_map;
+
+  std::unique_ptr<ValueAccessor> base_accessor;
+  if (lip_filter_adaptive_prober_ != nullptr) {
+    base_accessor.reset(probe_accessor.release());
+    existence_map.reset(
+        lip_filter_adaptive_prober_->filterValueAccessor(base_accessor.get()));
+    probe_accessor.reset(
+        base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
+  }
+
+  if (existence_map == nullptr) {
+    existence_map.reset(probe_store.getExistenceMap());
+  }
+
+  SemiAntiJoinTupleCollector collector(existence_map.get());
   // We collect all the probe relation tuples which have at least one matching
   // tuple in the build relation. As a performance optimization, the hash table
   // just looks for the existence of the probing key in the hash table and sets
@@ -636,8 +683,15 @@ void HashSemiJoinWorkOrder::executeWithoutResidualPredicate() {
                                     probe_block->getIndices(),
                                     probe_block->getIndicesConsistent());
 
-  std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
-      probe_store.createValueAccessor(collector.filter()));
+  std::unique_ptr<ValueAccessor> probe_accessor_with_filter;
+  if (base_accessor != nullptr) {
+    probe_accessor_with_filter.reset(
+      base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
+  } else {
+    probe_accessor_with_filter.reset(
+      probe_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
+  }
+
   ColumnVectorsValueAccessor temp_result;
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
        selection_it != selection_.end(); ++selection_it) {
@@ -656,7 +710,9 @@ void HashAntiJoinWorkOrder::executeWithoutResidualPredicate() {
   const TupleStorageSubBlock &probe_store = probe_block->getTupleStorageSubBlock();
 
   std::unique_ptr<ValueAccessor> probe_accessor(probe_store.createValueAccessor());
-  SemiAntiJoinTupleCollector collector(probe_store);
+  std::unique_ptr<TupleIdSequence> existence_map(probe_store.getExistenceMap());
+
+  SemiAntiJoinTupleCollector collector(existence_map.get());
   // We probe the hash table to find the keys which have an entry in the
   // hash table.
   if (join_key_attributes_.size() == 1) {
@@ -680,7 +736,7 @@ void HashAntiJoinWorkOrder::executeWithoutResidualPredicate() {
                                     probe_block->getIndicesConsistent());
 
   std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
-      probe_store.createValueAccessor(collector.filter()));
+      probe_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
   ColumnVectorsValueAccessor temp_result;
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
        selection_it != selection_.end(); ++selection_it) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index fa393b6..29d6eba 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -35,6 +35,7 @@
 #include "storage/HashTable.hpp"
 #include "storage/StorageBlockInfo.hpp"
 #include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilterAdaptiveProber.hpp"
 
 #include "glog/logging.h"
 
@@ -307,7 +308,8 @@ class HashInnerJoinWorkOrder : public WorkOrder {
       const std::vector<std::unique_ptr<const Scalar>> &selection,
       const JoinHashTable &hash_table,
       InsertDestination *output_destination,
-      StorageManager *storage_manager)
+      StorageManager *storage_manager,
+      LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr)
       : WorkOrder(query_id),
         build_relation_(build_relation),
         probe_relation_(probe_relation),
@@ -318,7 +320,8 @@ class HashInnerJoinWorkOrder : public WorkOrder {
         selection_(selection),
         hash_table_(hash_table),
         output_destination_(DCHECK_NOTNULL(output_destination)),
-        storage_manager_(DCHECK_NOTNULL(storage_manager)) {}
+        storage_manager_(DCHECK_NOTNULL(storage_manager)),
+        lip_filter_adaptive_prober_(lip_filter_adaptive_prober) {}
 
   /**
    * @brief Constructor for the distributed version.
@@ -354,7 +357,8 @@ class HashInnerJoinWorkOrder : public WorkOrder {
       const std::vector<std::unique_ptr<const Scalar>> &selection,
       const JoinHashTable &hash_table,
       InsertDestination *output_destination,
-      StorageManager *storage_manager)
+      StorageManager *storage_manager,
+      LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr)
       : WorkOrder(query_id),
         build_relation_(build_relation),
         probe_relation_(probe_relation),
@@ -365,7 +369,8 @@ class HashInnerJoinWorkOrder : public WorkOrder {
         selection_(selection),
         hash_table_(hash_table),
         output_destination_(DCHECK_NOTNULL(output_destination)),
-        storage_manager_(DCHECK_NOTNULL(storage_manager)) {}
+        storage_manager_(DCHECK_NOTNULL(storage_manager)),
+        lip_filter_adaptive_prober_(lip_filter_adaptive_prober) {}
 
   ~HashInnerJoinWorkOrder() override {}
 
@@ -392,6 +397,8 @@ class HashInnerJoinWorkOrder : public WorkOrder {
   InsertDestination *output_destination_;
   StorageManager *storage_manager_;
 
+  std::unique_ptr<LIPFilterAdaptiveProber> lip_filter_adaptive_prober_;
+
   DISALLOW_COPY_AND_ASSIGN(HashInnerJoinWorkOrder);
 };
 
@@ -435,7 +442,8 @@ class HashSemiJoinWorkOrder : public WorkOrder {
       const std::vector<std::unique_ptr<const Scalar>> &selection,
       const JoinHashTable &hash_table,
       InsertDestination *output_destination,
-      StorageManager *storage_manager)
+      StorageManager *storage_manager,
+      LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr)
       : WorkOrder(query_id),
         build_relation_(build_relation),
         probe_relation_(probe_relation),
@@ -446,7 +454,8 @@ class HashSemiJoinWorkOrder : public WorkOrder {
         selection_(selection),
         hash_table_(hash_table),
         output_destination_(DCHECK_NOTNULL(output_destination)),
-        storage_manager_(DCHECK_NOTNULL(storage_manager)) {}
+        storage_manager_(DCHECK_NOTNULL(storage_manager)),
+        lip_filter_adaptive_prober_(lip_filter_adaptive_prober) {}
 
   /**
    * @brief Constructor for the distributed version.
@@ -482,7 +491,8 @@ class HashSemiJoinWorkOrder : public WorkOrder {
       const std::vector<std::unique_ptr<const Scalar>> &selection,
       const JoinHashTable &hash_table,
       InsertDestination *output_destination,
-      StorageManager *storage_manager)
+      StorageManager *storage_manager,
+      LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr)
       : WorkOrder(query_id),
         build_relation_(build_relation),
         probe_relation_(probe_relation),
@@ -493,7 +503,8 @@ class HashSemiJoinWorkOrder : public WorkOrder {
         selection_(selection),
         hash_table_(hash_table),
         output_destination_(DCHECK_NOTNULL(output_destination)),
-        storage_manager_(DCHECK_NOTNULL(storage_manager)) {}
+        storage_manager_(DCHECK_NOTNULL(storage_manager)),
+        lip_filter_adaptive_prober_(lip_filter_adaptive_prober) {}
 
   ~HashSemiJoinWorkOrder() override {}
 
@@ -516,6 +527,8 @@ class HashSemiJoinWorkOrder : public WorkOrder {
   InsertDestination *output_destination_;
   StorageManager *storage_manager_;
 
+  std::unique_ptr<LIPFilterAdaptiveProber> lip_filter_adaptive_prober_;
+
   DISALLOW_COPY_AND_ASSIGN(HashSemiJoinWorkOrder);
 };
 
@@ -559,7 +572,8 @@ class HashAntiJoinWorkOrder : public WorkOrder {
       const std::vector<std::unique_ptr<const Scalar>> &selection,
       const JoinHashTable &hash_table,
       InsertDestination *output_destination,
-      StorageManager *storage_manager)
+      StorageManager *storage_manager,
+      LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr)
       : WorkOrder(query_id),
         build_relation_(build_relation),
         probe_relation_(probe_relation),
@@ -570,7 +584,8 @@ class HashAntiJoinWorkOrder : public WorkOrder {
         selection_(selection),
         hash_table_(hash_table),
         output_destination_(DCHECK_NOTNULL(output_destination)),
-        storage_manager_(DCHECK_NOTNULL(storage_manager)) {}
+        storage_manager_(DCHECK_NOTNULL(storage_manager)),
+        lip_filter_adaptive_prober_(lip_filter_adaptive_prober) {}
 
   /**
    * @brief Constructor for the distributed version.
@@ -606,7 +621,8 @@ class HashAntiJoinWorkOrder : public WorkOrder {
       const std::vector<std::unique_ptr<const Scalar>> &selection,
       const JoinHashTable &hash_table,
       InsertDestination *output_destination,
-      StorageManager *storage_manager)
+      StorageManager *storage_manager,
+      LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr)
       : WorkOrder(query_id),
         build_relation_(build_relation),
         probe_relation_(probe_relation),
@@ -617,7 +633,8 @@ class HashAntiJoinWorkOrder : public WorkOrder {
         selection_(selection),
         hash_table_(hash_table),
         output_destination_(DCHECK_NOTNULL(output_destination)),
-        storage_manager_(DCHECK_NOTNULL(storage_manager)) {}
+        storage_manager_(DCHECK_NOTNULL(storage_manager)),
+        lip_filter_adaptive_prober_(lip_filter_adaptive_prober) {}
 
   ~HashAntiJoinWorkOrder() override {}
 
@@ -646,6 +663,8 @@ class HashAntiJoinWorkOrder : public WorkOrder {
   InsertDestination *output_destination_;
   StorageManager *storage_manager_;
 
+  std::unique_ptr<LIPFilterAdaptiveProber> lip_filter_adaptive_prober_;
+
   DISALLOW_COPY_AND_ASSIGN(HashAntiJoinWorkOrder);
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/relational_operators/RelationalOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/RelationalOperator.hpp b/relational_operators/RelationalOperator.hpp
index f0303e5..c2c8f11 100644
--- a/relational_operators/RelationalOperator.hpp
+++ b/relational_operators/RelationalOperator.hpp
@@ -245,6 +245,13 @@ class RelationalOperator {
     return op_index_;
   }
 
+  /**
+   * @brief TODO
+   */
+  void deployLIPFilter(const QueryContext::lip_deployment_id lip_deployment_index) {
+    lip_deployment_index_ = lip_deployment_index;
+  }
+
  protected:
   /**
    * @brief Constructor
@@ -257,7 +264,8 @@ class RelationalOperator {
                               const bool blocking_dependencies_met = false)
       : query_id_(query_id),
         blocking_dependencies_met_(blocking_dependencies_met),
-        done_feeding_input_relation_(false) {}
+        done_feeding_input_relation_(false),
+        lip_deployment_index_(QueryContext::kInvalidILIPDeploymentId) {}
 
   const std::size_t query_id_;
 
@@ -265,6 +273,8 @@ class RelationalOperator {
   bool done_feeding_input_relation_;
   std::size_t op_index_;
 
+  QueryContext::lip_deployment_id lip_deployment_index_;
+
  private:
   DISALLOW_COPY_AND_ASSIGN(RelationalOperator);
 };

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/relational_operators/SelectOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/SelectOperator.cpp b/relational_operators/SelectOperator.cpp
index d56326e..d7855cf 100644
--- a/relational_operators/SelectOperator.cpp
+++ b/relational_operators/SelectOperator.cpp
@@ -30,6 +30,7 @@
 #include "storage/StorageBlock.hpp"
 #include "storage/StorageBlockInfo.hpp"
 #include "storage/StorageManager.hpp"
+#include "utility/lip_filter/LIPFilterDeployment.hpp"
 
 #include "glog/logging.h"
 
@@ -43,9 +44,14 @@ void SelectOperator::addWorkOrders(WorkOrdersContainer *container,
                                    StorageManager *storage_manager,
                                    const Predicate *predicate,
                                    const std::vector<std::unique_ptr<const Scalar>> *selection,
-                                   InsertDestination *output_destination) {
+                                   InsertDestination *output_destination,
+                                   const LIPFilterDeployment *lip_filter_deployment) {
   if (input_relation_is_stored_) {
     for (const block_id input_block_id : input_relation_block_ids_) {
+      LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr;
+      if (lip_filter_deployment != nullptr) {
+        lip_filter_adaptive_prober = lip_filter_deployment->createLIPFilterAdaptiveProber();
+      }
       container->addNormalWorkOrder(new SelectWorkOrder(query_id_,
                                                         input_relation_,
                                                         input_block_id,
@@ -54,11 +60,16 @@ void SelectOperator::addWorkOrders(WorkOrdersContainer *container,
                                                         simple_selection_,
                                                         selection,
                                                         output_destination,
-                                                        storage_manager),
+                                                        storage_manager,
+                                                        lip_filter_adaptive_prober),
                                     op_index_);
     }
   } else {
     while (num_workorders_generated_ < input_relation_block_ids_.size()) {
+      LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr;
+      if (lip_filter_deployment != nullptr) {
+        lip_filter_adaptive_prober = lip_filter_deployment->createLIPFilterAdaptiveProber();
+      }
       container->addNormalWorkOrder(
           new SelectWorkOrder(
               query_id_,
@@ -81,13 +92,18 @@ void SelectOperator::addPartitionAwareWorkOrders(WorkOrdersContainer *container,
                                                  StorageManager *storage_manager,
                                                  const Predicate *predicate,
                                                  const std::vector<std::unique_ptr<const Scalar>> *selection,
-                                                 InsertDestination *output_destination) {
+                                                 InsertDestination *output_destination,
+                                                 const LIPFilterDeployment *lip_filter_deployment) {
   DCHECK(placement_scheme_ != nullptr);
   const std::size_t num_partitions = input_relation_.getPartitionScheme().getPartitionSchemeHeader().getNumPartitions();
   if (input_relation_is_stored_) {
     for (std::size_t part_id = 0; part_id < num_partitions; ++part_id) {
       for (const block_id input_block_id :
            input_relation_block_ids_in_partition_[part_id]) {
+        LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr;
+        if (lip_filter_deployment != nullptr) {
+          lip_filter_adaptive_prober = lip_filter_deployment->createLIPFilterAdaptiveProber();
+        }
         container->addNormalWorkOrder(
             new SelectWorkOrder(
                 query_id_,
@@ -99,6 +115,7 @@ void SelectOperator::addPartitionAwareWorkOrders(WorkOrdersContainer *container,
                 selection,
                 output_destination,
                 storage_manager,
+                lip_filter_adaptive_prober,
                 placement_scheme_->getNUMANodeForBlock(input_block_id)),
             op_index_);
       }
@@ -109,6 +126,10 @@ void SelectOperator::addPartitionAwareWorkOrders(WorkOrdersContainer *container,
              input_relation_block_ids_in_partition_[part_id].size()) {
         block_id block_in_partition
             = input_relation_block_ids_in_partition_[part_id][num_workorders_generated_in_partition_[part_id]];
+        LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr;
+        if (lip_filter_deployment != nullptr) {
+          lip_filter_adaptive_prober = lip_filter_deployment->createLIPFilterAdaptiveProber();
+        }
         container->addNormalWorkOrder(
             new SelectWorkOrder(
                 query_id_,
@@ -120,6 +141,7 @@ void SelectOperator::addPartitionAwareWorkOrders(WorkOrdersContainer *container,
                 selection,
                 output_destination,
                 storage_manager,
+                lip_filter_adaptive_prober,
                 placement_scheme_->getNUMANodeForBlock(block_in_partition)),
             op_index_);
         ++num_workorders_generated_in_partition_[part_id];
@@ -146,16 +168,31 @@ bool SelectOperator::getAllWorkOrders(
   InsertDestination *output_destination =
       query_context->getInsertDestination(output_destination_index_);
 
+  const LIPFilterDeployment *lip_filter_deployment = nullptr;
+  if (lip_deployment_index_ != QueryContext::kInvalidILIPDeploymentId) {
+    lip_filter_deployment = query_context->getLIPDeployment(lip_deployment_index_);
+  }
+
   if (input_relation_is_stored_) {
     if (!started_) {
       if (input_relation_.hasPartitionScheme()) {
 #ifdef QUICKSTEP_HAVE_LIBNUMA
         if (input_relation_.hasNUMAPlacementScheme()) {
-          addPartitionAwareWorkOrders(container, storage_manager, predicate, selection, output_destination);
+          addPartitionAwareWorkOrders(container,
+                                      storage_manager,
+                                      predicate,
+                                      selection,
+                                      output_destination,
+                                      lip_filter_deployment);
         }
 #endif
       } else {
-        addWorkOrders(container, storage_manager, predicate, selection, output_destination);
+        addWorkOrders(container,
+                      storage_manager,
+                      predicate,
+                      selection,
+                      output_destination,
+                      lip_filter_deployment);
       }
       started_ = true;
     }
@@ -164,11 +201,21 @@ bool SelectOperator::getAllWorkOrders(
     if (input_relation_.hasPartitionScheme()) {
 #ifdef QUICKSTEP_HAVE_LIBNUMA
         if (input_relation_.hasNUMAPlacementScheme()) {
-          addPartitionAwareWorkOrders(container, storage_manager, predicate, selection, output_destination);
+          addPartitionAwareWorkOrders(container,
+                                      storage_manager,
+                                      predicate,
+                                      selection,
+                                      output_destination,
+                                      lip_filter_deployment);
         }
 #endif
     } else {
-        addWorkOrders(container, storage_manager, predicate, selection, output_destination);
+        addWorkOrders(container,
+                      storage_manager,
+                      predicate,
+                      selection,
+                      output_destination,
+                      lip_filter_deployment);
     }
     return done_feeding_input_relation_;
   }
@@ -222,11 +269,13 @@ void SelectWorkOrder::execute() {
   if (simple_projection_) {
     block->selectSimple(simple_selection_,
                         predicate_,
-                        output_destination_);
+                        output_destination_,
+                        lip_filter_adaptive_prober_.get());
   } else {
     block->select(*DCHECK_NOTNULL(selection_),
                   predicate_,
-                  output_destination_);
+                  output_destination_,
+                  lip_filter_adaptive_prober_.get());
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/relational_operators/SelectOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/SelectOperator.hpp b/relational_operators/SelectOperator.hpp
index 0f5c712..0d2ae16 100644
--- a/relational_operators/SelectOperator.hpp
+++ b/relational_operators/SelectOperator.hpp
@@ -38,6 +38,7 @@
 #include "relational_operators/WorkOrder.hpp"
 #include "storage/StorageBlockInfo.hpp"
 #include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilterAdaptiveProber.hpp"
 
 #include "glog/logging.h"
 
@@ -49,6 +50,7 @@ namespace quickstep {
 
 class CatalogRelationSchema;
 class InsertDestination;
+class LIPFilterDeployment;
 class Predicate;
 class Scalar;
 class StorageManager;
@@ -250,13 +252,15 @@ class SelectOperator : public RelationalOperator {
                      StorageManager *storage_manager,
                      const Predicate *predicate,
                      const std::vector<std::unique_ptr<const Scalar>> *selection,
-                     InsertDestination *output_destination);
+                     InsertDestination *output_destination,
+                     const LIPFilterDeployment *lip_filter_deployment);
 
   void addPartitionAwareWorkOrders(WorkOrdersContainer *container,
                                    StorageManager *storage_manager,
                                    const Predicate *predicate,
                                    const std::vector<std::unique_ptr<const Scalar>> *selection,
-                                   InsertDestination *output_destination);
+                                   InsertDestination *output_destination,
+                                   const LIPFilterDeployment *lip_filter_deployment);
 
  private:
   /**
@@ -328,6 +332,7 @@ class SelectWorkOrder : public WorkOrder {
                   const std::vector<std::unique_ptr<const Scalar>> *selection,
                   InsertDestination *output_destination,
                   StorageManager *storage_manager,
+                  LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr,
                   const numa_node_id numa_node = 0)
       : WorkOrder(query_id),
         input_relation_(input_relation),
@@ -337,7 +342,8 @@ class SelectWorkOrder : public WorkOrder {
         simple_selection_(simple_selection),
         selection_(selection),
         output_destination_(DCHECK_NOTNULL(output_destination)),
-        storage_manager_(DCHECK_NOTNULL(storage_manager)) {
+        storage_manager_(DCHECK_NOTNULL(storage_manager)),
+        lip_filter_adaptive_prober_(lip_filter_adaptive_prober) {
     preferred_numa_nodes_.push_back(numa_node);
   }
 
@@ -370,6 +376,7 @@ class SelectWorkOrder : public WorkOrder {
                   const std::vector<std::unique_ptr<const Scalar>> *selection,
                   InsertDestination *output_destination,
                   StorageManager *storage_manager,
+                  LIPFilterAdaptiveProber *lip_filter_adaptive_prober = nullptr,
                   const numa_node_id numa_node = 0)
       : WorkOrder(query_id),
         input_relation_(input_relation),
@@ -379,7 +386,8 @@ class SelectWorkOrder : public WorkOrder {
         simple_selection_(std::move(simple_selection)),
         selection_(selection),
         output_destination_(DCHECK_NOTNULL(output_destination)),
-        storage_manager_(DCHECK_NOTNULL(storage_manager)) {
+        storage_manager_(DCHECK_NOTNULL(storage_manager)),
+        lip_filter_adaptive_prober_(lip_filter_adaptive_prober) {
     preferred_numa_nodes_.push_back(numa_node);
   }
 
@@ -407,6 +415,8 @@ class SelectWorkOrder : public WorkOrder {
   InsertDestination *output_destination_;
   StorageManager *storage_manager_;
 
+  std::unique_ptr<LIPFilterAdaptiveProber> lip_filter_adaptive_prober_;
+
   DISALLOW_COPY_AND_ASSIGN(SelectWorkOrder);
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/storage/AggregationOperationState.cpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.cpp b/storage/AggregationOperationState.cpp
index 7908db1..707f0fe 100644
--- a/storage/AggregationOperationState.cpp
+++ b/storage/AggregationOperationState.cpp
@@ -46,10 +46,12 @@
 #include "storage/StorageBlock.hpp"
 #include "storage/StorageBlockInfo.hpp"
 #include "storage/StorageManager.hpp"
+#include "storage/ValueAccessor.hpp"
 #include "types/TypedValue.hpp"
 #include "types/containers/ColumnVector.hpp"
 #include "types/containers/ColumnVectorsValueAccessor.hpp"
 #include "types/containers/Tuple.hpp"
+#include "utility/lip_filter/LIPFilterAdaptiveProber.hpp"
 
 #include "glog/logging.h"
 
@@ -332,11 +334,12 @@ bool AggregationOperationState::ProtoIsValid(
   return true;
 }
 
-void AggregationOperationState::aggregateBlock(const block_id input_block) {
+void AggregationOperationState::aggregateBlock(const block_id input_block,
+                                               LIPFilterAdaptiveProber *lip_filter_adaptive_prober) {
   if (group_by_list_.empty()) {
-    aggregateBlockSingleState(input_block);
+    aggregateBlockSingleState(input_block, lip_filter_adaptive_prober);
   } else {
-    aggregateBlockHashTable(input_block);
+    aggregateBlockHashTable(input_block, lip_filter_adaptive_prober);
   }
 }
 
@@ -361,17 +364,27 @@ void AggregationOperationState::mergeSingleState(
 }
 
 void AggregationOperationState::aggregateBlockSingleState(
-    const block_id input_block) {
+    const block_id input_block,
+    LIPFilterAdaptiveProber *lip_filter_adaptive_prober) {
   // Aggregate per-block state for each aggregate.
   std::vector<std::unique_ptr<AggregationState>> local_state;
 
   BlockReference block(
       storage_manager_->getBlock(input_block, input_relation_));
 
-  // If there is a filter predicate, 'reuse_matches' holds the set of matching
-  // tuples so that it can be reused across multiple aggregates (i.e. we only
-  // pay the cost of evaluating the predicate once).
-  std::unique_ptr<TupleIdSequence> reuse_matches;
+  // TODO: predicate + lip_filter
+  std::unique_ptr<TupleIdSequence> filter;
+  if (lip_filter_adaptive_prober != nullptr || predicate_ != nullptr) {
+    std::unique_ptr<ValueAccessor> accessor(
+        block->getTupleStorageSubBlock().createValueAccessor());
+    if (lip_filter_adaptive_prober != nullptr) {
+      filter.reset(lip_filter_adaptive_prober->filterValueAccessor(accessor.get()));
+    }
+    if (predicate_ != nullptr) {
+      filter.reset(block->getMatchesForPredicate(predicate_.get(), filter.get()));
+    }
+  }
+
   for (std::size_t agg_idx = 0; agg_idx < handles_.size(); ++agg_idx) {
     const std::vector<attribute_id> *local_arguments_as_attributes = nullptr;
 #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
@@ -388,9 +401,8 @@ void AggregationOperationState::aggregateBlockSingleState(
                                arguments_[agg_idx],
                                local_arguments_as_attributes,
                                {}, /* group_by */
-                               predicate_.get(),
+                               filter.get(),
                                distinctify_hashtables_[agg_idx].get(),
-                               &reuse_matches,
                                nullptr /* reuse_group_by_vectors */);
       local_state.emplace_back(nullptr);
     } else {
@@ -398,8 +410,7 @@ void AggregationOperationState::aggregateBlockSingleState(
       local_state.emplace_back(block->aggregate(*handles_[agg_idx],
                                                 arguments_[agg_idx],
                                                 local_arguments_as_attributes,
-                                                predicate_.get(),
-                                                &reuse_matches));
+                                                filter.get()));
     }
   }
 
@@ -408,14 +419,23 @@ void AggregationOperationState::aggregateBlockSingleState(
 }
 
 void AggregationOperationState::aggregateBlockHashTable(
-    const block_id input_block) {
+    const block_id input_block,
+    LIPFilterAdaptiveProber *lip_filter_adaptive_prober) {
   BlockReference block(
       storage_manager_->getBlock(input_block, input_relation_));
 
-  // If there is a filter predicate, 'reuse_matches' holds the set of matching
-  // tuples so that it can be reused across multiple aggregates (i.e. we only
-  // pay the cost of evaluating the predicate once).
-  std::unique_ptr<TupleIdSequence> reuse_matches;
+  // TODO: predicate + lip_filter
+  std::unique_ptr<TupleIdSequence> filter;
+  if (lip_filter_adaptive_prober != nullptr || predicate_ != nullptr) {
+    std::unique_ptr<ValueAccessor> accessor(
+        block->getTupleStorageSubBlock().createValueAccessor());
+    if (lip_filter_adaptive_prober != nullptr) {
+      filter.reset(lip_filter_adaptive_prober->filterValueAccessor(accessor.get()));
+    }
+    if (predicate_ != nullptr) {
+      filter.reset(block->getMatchesForPredicate(predicate_.get(), filter.get()));
+    }
+  }
 
   // This holds values of all the GROUP BY attributes so that the can be reused
   // across multiple aggregates (i.e. we only pay the cost of evaluatin the
@@ -432,9 +452,8 @@ void AggregationOperationState::aggregateBlockHashTable(
                                arguments_[agg_idx],
                                nullptr, /* arguments_as_attributes */
                                group_by_list_,
-                               predicate_.get(),
+                               filter.get(),
                                distinctify_hashtables_[agg_idx].get(),
-                               &reuse_matches,
                                &reuse_group_by_vectors);
     }
   }
@@ -448,9 +467,8 @@ void AggregationOperationState::aggregateBlockHashTable(
   DCHECK(agg_hash_table != nullptr);
   block->aggregateGroupBy(arguments_,
                           group_by_list_,
-                          predicate_.get(),
+                          filter.get(),
                           agg_hash_table,
-                          &reuse_matches,
                           &reuse_group_by_vectors);
   group_by_hashtable_pool_->returnHashTable(agg_hash_table);
 }



[09/12] incubator-quickstep git commit: Optimizer changes for the LIPFilter feature.

Posted by ji...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/CMakeLists.txt b/query_optimizer/tests/CMakeLists.txt
index 597dbe0..ac4548a 100644
--- a/query_optimizer/tests/CMakeLists.txt
+++ b/query_optimizer/tests/CMakeLists.txt
@@ -94,22 +94,6 @@ add_executable(quickstep_queryoptimizer_tests_ExecutionGeneratorTest
                ExecutionGeneratorTestRunner.hpp
                "${PROJECT_SOURCE_DIR}/utility/textbased_test/TextBasedTest.cpp"
                "${PROJECT_SOURCE_DIR}/utility/textbased_test/TextBasedTest.hpp")
-add_executable(ExecutionHeuristics_unittest ExecutionHeuristics_unittest.cpp)
-target_link_libraries(ExecutionHeuristics_unittest
-                      gtest
-                      gtest_main
-                      quickstep_catalog_Catalog
-                      quickstep_catalog_CatalogDatabase
-                      quickstep_catalog_CatalogTypedefs
-                      quickstep_queryexecution_QueryContext
-                      quickstep_queryexecution_QueryContext_proto
-                      quickstep_queryoptimizer_ExecutionHeuristics
-                      quickstep_queryoptimizer_QueryPlan
-                      quickstep_relationaloperators_BuildHashOperator
-                      quickstep_relationaloperators_HashJoinOperator
-                      quickstep_utility_Macros)
-add_test(ExecutionHeuristics_unittest ExecutionHeuristics_unittest)
-
 add_executable(quickstep_queryoptimizer_tests_OptimizerTextTest
                OptimizerTextTest.cpp
                OptimizerTextTestRunner.cpp

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/tests/ExecutionHeuristics_unittest.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/ExecutionHeuristics_unittest.cpp b/query_optimizer/tests/ExecutionHeuristics_unittest.cpp
deleted file mode 100644
index 73b3e84..0000000
--- a/query_optimizer/tests/ExecutionHeuristics_unittest.cpp
+++ /dev/null
@@ -1,311 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- **/
-
-#include <cstddef>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "catalog/Catalog.hpp"
-#include "catalog/CatalogDatabase.hpp"
-#include "catalog/CatalogTypedefs.hpp"
-#include "query_execution/QueryContext.hpp"
-#include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/ExecutionHeuristics.hpp"
-#include "query_optimizer/QueryPlan.hpp"
-#include "relational_operators/BuildHashOperator.hpp"
-#include "relational_operators/HashJoinOperator.hpp"
-#include "utility/Macros.hpp"
-
-#include "glog/logging.h"
-#include "gtest/gtest.h"
-
-namespace quickstep {
-namespace optimizer {
-
-namespace {
-constexpr std::size_t kQueryId = 0;
-}
-
-class ExecutionHeuristicsTest : public ::testing::Test {
- protected:
-  virtual void SetUp() {
-    db_ = cat_.getDatabaseByIdMutable(cat_.addDatabase(new CatalogDatabase(nullptr, "db")));
-    execution_heuristics_.reset(new ExecutionHeuristics());
-    query_plan_.reset(new QueryPlan());
-    query_context_proto_.reset(new serialization::QueryContext());
-  }
-
-  CatalogRelation* createCatalogRelation(const std::string &name, bool temporary = false) {
-    return db_->getRelationByIdMutable(db_->addRelation(new CatalogRelation(nullptr, name, -1, temporary)));
-  }
-
-  void addDummyHashJoinInfo(ExecutionHeuristics *execution_heuristics,
-                            const QueryPlan::DAGNodeIndex build_operator_index,
-                            const QueryPlan::DAGNodeIndex join_operator_index,
-                            const CatalogRelation *build_relation,
-                            const CatalogRelation *probe_relation,
-                            const attribute_id build_attribute_id,
-                            const attribute_id probe_attribute_id,
-                            const QueryContext::join_hash_table_id join_hash_table_id) {
-    std::vector<attribute_id> build_attribute_ids(1, build_attribute_id);
-    std::vector<attribute_id> probe_attribute_ids(1, probe_attribute_id);
-    execution_heuristics->addHashJoinInfo(build_operator_index,
-                                          join_operator_index,
-                                          build_relation,
-                                          probe_relation,
-                                          std::move(build_attribute_ids),
-                                          std::move(probe_attribute_ids),
-                                          join_hash_table_id);
-  }
-
-  QueryPlan::DAGNodeIndex createDummyBuildHashOperator(QueryPlan *query_plan,
-                                                       const CatalogRelation *build_relation,
-                                                       const attribute_id build_attribute_id,
-                                                       const QueryContext::join_hash_table_id join_hash_table_index) {
-    std::vector<attribute_id> build_attribute_ids;
-    build_attribute_ids.push_back(build_attribute_id);
-    QueryPlan::DAGNodeIndex build_operator_index =
-        query_plan->addRelationalOperator(new BuildHashOperator(kQueryId,
-                                                                *build_relation,
-                                                                true,
-                                                                build_attribute_ids,
-                                                                false,
-                                                                join_hash_table_index));
-    return build_operator_index;
-  }
-
-  QueryPlan::DAGNodeIndex createDummyHashJoinOperator(QueryPlan *query_plan,
-                                                      const CatalogRelation *build_relation,
-                                                      const CatalogRelation *probe_relation,
-                                                      const attribute_id probe_attribute_id,
-                                                      const QueryContext::join_hash_table_id join_hash_table_index) {
-    std::vector<attribute_id> probe_attribute_ids;
-    probe_attribute_ids.push_back(probe_attribute_id);
-    QueryPlan::DAGNodeIndex join_operator_index =
-        query_plan->addRelationalOperator(
-            new HashJoinOperator(kQueryId,
-                                 *build_relation,
-                                 *probe_relation,
-                                 true,
-                                 probe_attribute_ids,
-                                 false,
-                                 *probe_relation,
-                                 0,
-                                 join_hash_table_index,
-                                 0,
-                                 0));
-    return join_operator_index;
-  }
-
-  Catalog cat_;
-  CatalogDatabase *db_;  // db_ is owned by cat_.
-  std::unique_ptr<QueryPlan> query_plan_;
-  std::unique_ptr<serialization::QueryContext> query_context_proto_;
-  std::unique_ptr<ExecutionHeuristics> execution_heuristics_;
-};
-
-TEST_F(ExecutionHeuristicsTest, HashJoinOptimizedTest) {
-  // This test case creates three hash joins, all of which are being probed on the same relation.
-  // Since the probe are being made on the same relation, ExecutionHeuristics should optimize
-  // these hash joins using bloom filters.
-
-  const CatalogRelation *build_relation_1 = createCatalogRelation("build_relation_1");
-  const CatalogRelation *build_relation_2 = createCatalogRelation("build_relation_2");
-  const CatalogRelation *build_relation_3 = createCatalogRelation("build_relation_3");
-  const CatalogRelation *probe_relation_1 = createCatalogRelation("probe_relation_1");
-
-  const attribute_id build_attribute_id_1 = 0;
-  const attribute_id build_attribute_id_2 = 0;
-  const attribute_id build_attribute_id_3 = 0;
-  const attribute_id probe_attribute_id_1 = 1;
-  const attribute_id probe_attribute_id_2 = 2;
-  const attribute_id probe_attribute_id_3 = 3;
-
-  const QueryContext::join_hash_table_id join_hash_table_index_1 = 0;
-  const QueryContext::join_hash_table_id join_hash_table_index_2 = 1;
-  const QueryContext::join_hash_table_id join_hash_table_index_3 = 2;
-  query_context_proto_->add_join_hash_tables();
-  query_context_proto_->add_join_hash_tables();
-  query_context_proto_->add_join_hash_tables();
-
-  const QueryPlan::DAGNodeIndex build_operator_index_1 = createDummyBuildHashOperator(query_plan_.get(),
-                                                                                      build_relation_1,
-                                                                                      build_attribute_id_1,
-                                                                                      join_hash_table_index_1);
-  const QueryPlan::DAGNodeIndex probe_operator_index_1 = createDummyHashJoinOperator(query_plan_.get(),
-                                                                                     build_relation_1,
-                                                                                     probe_relation_1,
-                                                                                     probe_attribute_id_1,
-                                                                                     join_hash_table_index_1);
-  const QueryPlan::DAGNodeIndex build_operator_index_2 = createDummyBuildHashOperator(query_plan_.get(),
-                                                                                      build_relation_2,
-                                                                                      build_attribute_id_2,
-                                                                                      join_hash_table_index_2);
-  const QueryPlan::DAGNodeIndex probe_operator_index_2 = createDummyHashJoinOperator(query_plan_.get(),
-                                                                                     build_relation_2,
-                                                                                     probe_relation_1,
-                                                                                     probe_attribute_id_2,
-                                                                                     join_hash_table_index_2);
-  const QueryPlan::DAGNodeIndex build_operator_index_3 = createDummyBuildHashOperator(query_plan_.get(),
-                                                                                      build_relation_3,
-                                                                                      build_attribute_id_3,
-                                                                                      join_hash_table_index_3);
-  const QueryPlan::DAGNodeIndex probe_operator_index_3 = createDummyHashJoinOperator(query_plan_.get(),
-                                                                                     build_relation_3,
-                                                                                     probe_relation_1,
-                                                                                     probe_attribute_id_3,
-                                                                                     join_hash_table_index_3);
-
-  addDummyHashJoinInfo(execution_heuristics_.get(),
-                       build_operator_index_1,
-                       probe_operator_index_1,
-                       build_relation_1,
-                       probe_relation_1,
-                       build_attribute_id_1,
-                       probe_attribute_id_1,
-                       join_hash_table_index_1);
-  addDummyHashJoinInfo(execution_heuristics_.get(),
-                       build_operator_index_2,
-                       probe_operator_index_2,
-                       build_relation_2,
-                       probe_relation_1,
-                       build_attribute_id_2,
-                       probe_attribute_id_2,
-                       join_hash_table_index_2);
-  addDummyHashJoinInfo(execution_heuristics_.get(),
-                       build_operator_index_3,
-                       probe_operator_index_3,
-                       build_relation_3,
-                       probe_relation_1,
-                       build_attribute_id_3,
-                       probe_attribute_id_3,
-                       join_hash_table_index_3);
-
-  execution_heuristics_->optimizeExecutionPlan(query_plan_.get(), query_context_proto_.get());
-
-  // Test whether correct number of bloom filters were added.
-  EXPECT_EQ(1, query_context_proto_->join_hash_tables(0).build_side_bloom_filter_id_size());
-  EXPECT_EQ(1, query_context_proto_->join_hash_tables(1).build_side_bloom_filter_id_size());
-  EXPECT_EQ(1, query_context_proto_->join_hash_tables(2).build_side_bloom_filter_id_size());
-  EXPECT_EQ(3, query_context_proto_->join_hash_tables(0).probe_side_bloom_filters_size());
-
-  // Test that the DAG was modified correctly or not.
-  // Probe operator 1 should have now build operator 1 and build operator 2 added as dependencies.
-  auto const probe_node_dependencies = query_plan_->getQueryPlanDAG().getDependencies(probe_operator_index_1);
-  EXPECT_EQ(1u, probe_node_dependencies.count(build_operator_index_2));
-  EXPECT_EQ(1u, probe_node_dependencies.count(build_operator_index_3));
-}
-
-TEST_F(ExecutionHeuristicsTest, HashJoinNotOptimizedTest) {
-  // This test case creates three hash joins, all of which are being probed on different relations.
-  // Since the probe are being made on the different relations, ExecutionHeuristics should optimize
-  // these hash joins using bloom filters.
-
-  const CatalogRelation *build_relation_1 = createCatalogRelation("build_relation_1");
-  const CatalogRelation *build_relation_2 = createCatalogRelation("build_relation_2");
-  const CatalogRelation *build_relation_3 = createCatalogRelation("build_relation_3");
-  const CatalogRelation *probe_relation_1 = createCatalogRelation("probe_relation_1");
-  const CatalogRelation *probe_relation_2 = createCatalogRelation("probe_relation_2");
-  const CatalogRelation *probe_relation_3 = createCatalogRelation("probe_relation_3");
-
-  const attribute_id build_attribute_id_1 = 0;
-  const attribute_id build_attribute_id_2 = 0;
-  const attribute_id build_attribute_id_3 = 0;
-  const attribute_id probe_attribute_id_1 = 1;
-  const attribute_id probe_attribute_id_2 = 2;
-  const attribute_id probe_attribute_id_3 = 3;
-
-  const QueryContext::join_hash_table_id join_hash_table_index_1 = 0;
-  const QueryContext::join_hash_table_id join_hash_table_index_2 = 1;
-  const QueryContext::join_hash_table_id join_hash_table_index_3 = 2;
-  query_context_proto_->add_join_hash_tables();
-  query_context_proto_->add_join_hash_tables();
-  query_context_proto_->add_join_hash_tables();
-
-  const QueryPlan::DAGNodeIndex build_operator_index_1 = createDummyBuildHashOperator(query_plan_.get(),
-                                                                                      build_relation_1,
-                                                                                      build_attribute_id_1,
-                                                                                      join_hash_table_index_1);
-  const QueryPlan::DAGNodeIndex probe_operator_index_1 = createDummyHashJoinOperator(query_plan_.get(),
-                                                                                     build_relation_1,
-                                                                                     probe_relation_1,
-                                                                                     probe_attribute_id_1,
-                                                                                     join_hash_table_index_1);
-  const QueryPlan::DAGNodeIndex build_operator_index_2 = createDummyBuildHashOperator(query_plan_.get(),
-                                                                                      build_relation_2,
-                                                                                      build_attribute_id_2,
-                                                                                      join_hash_table_index_2);
-  const QueryPlan::DAGNodeIndex probe_operator_index_2 = createDummyHashJoinOperator(query_plan_.get(),
-                                                                                     build_relation_2,
-                                                                                     probe_relation_2,
-                                                                                     probe_attribute_id_2,
-                                                                                     join_hash_table_index_2);
-  const QueryPlan::DAGNodeIndex build_operator_index_3 = createDummyBuildHashOperator(query_plan_.get(),
-                                                                                      build_relation_3,
-                                                                                      build_attribute_id_3,
-                                                                                      join_hash_table_index_3);
-  const QueryPlan::DAGNodeIndex probe_operator_index_3 = createDummyHashJoinOperator(query_plan_.get(),
-                                                                                     build_relation_3,
-                                                                                     probe_relation_3,
-                                                                                     probe_attribute_id_3,
-                                                                                     join_hash_table_index_3);
-
-  addDummyHashJoinInfo(execution_heuristics_.get(),
-                       build_operator_index_1,
-                       probe_operator_index_1,
-                       build_relation_1,
-                       probe_relation_1,
-                       build_attribute_id_1,
-                       probe_attribute_id_1,
-                       join_hash_table_index_1);
-  addDummyHashJoinInfo(execution_heuristics_.get(),
-                       build_operator_index_2,
-                       probe_operator_index_2,
-                       build_relation_2,
-                       probe_relation_2,
-                       build_attribute_id_2,
-                       probe_attribute_id_2,
-                       join_hash_table_index_2);
-  addDummyHashJoinInfo(execution_heuristics_.get(),
-                       build_operator_index_3,
-                       probe_operator_index_3,
-                       build_relation_3,
-                       probe_relation_3,
-                       build_attribute_id_3,
-                       probe_attribute_id_3,
-                       join_hash_table_index_3);
-
-  execution_heuristics_->optimizeExecutionPlan(query_plan_.get(), query_context_proto_.get());
-
-  // Test that no bloom filters were added.
-  EXPECT_EQ(0, query_context_proto_->join_hash_tables(0).build_side_bloom_filter_id_size());
-  EXPECT_EQ(0, query_context_proto_->join_hash_tables(1).build_side_bloom_filter_id_size());
-  EXPECT_EQ(0, query_context_proto_->join_hash_tables(2).build_side_bloom_filter_id_size());
-  EXPECT_EQ(0, query_context_proto_->join_hash_tables(0).probe_side_bloom_filters_size());
-
-  // Test that the DAG was not modified at all.
-  // Probe operator 1 should not have build operator 1 and build operator 2 added as dependencies.
-  auto probe_node_dependencies = query_plan_->getQueryPlanDAG().getDependencies(probe_operator_index_1);
-  EXPECT_EQ(0u, probe_node_dependencies.count(build_operator_index_2));
-  EXPECT_EQ(0u, probe_node_dependencies.count(build_operator_index_3));
-}
-
-}  // namespace optimizer
-}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index ddaae45..395e264 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -156,6 +156,8 @@ QS_PROTOBUF_GENERATE_CPP(quickstep_utility_SortConfiguration_proto_srcs
                          quickstep_utility_SortConfiguration_proto_hdrs
                          SortConfiguration.proto)
 
+add_subdirectory(lip_filter)
+
 # Declare micro-libs:
 add_library(quickstep_utility_Alignment ../empty_src.cpp Alignment.hpp)
 add_library(quickstep_utility_BitManipulation ../empty_src.cpp BitManipulation.hpp)
@@ -168,6 +170,7 @@ add_library(quickstep_utility_CalculateInstalledMemory CalculateInstalledMemory.
 add_library(quickstep_utility_Cast ../empty_src.cpp Cast.hpp)
 add_library(quickstep_utility_CheckSnprintf ../empty_src.cpp CheckSnprintf.hpp)
 add_library(quickstep_utility_DAG ../empty_src.cpp DAG.hpp)
+add_library(quickstep_utility_DisjointTreeForest ../empty_src.cpp DisjointTreeForest.hpp)
 add_library(quickstep_utility_EqualsAnyConstant ../empty_src.cpp EqualsAnyConstant.hpp)
 add_library(quickstep_utility_ExecutionDAGVisualizer
             ExecutionDAGVisualizer.cpp
@@ -230,6 +233,8 @@ target_link_libraries(quickstep_utility_CheckSnprintf
 target_link_libraries(quickstep_utility_DAG
                       glog
                       quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_DisjointTreeForest
+                      glog)
 target_link_libraries(quickstep_utility_ExecutionDAGVisualizer
                       quickstep_catalog_CatalogRelationSchema
                       quickstep_queryexecution_QueryExecutionTypedefs
@@ -253,7 +258,9 @@ target_link_libraries(quickstep_utility_PlanVisualizer
                       quickstep_catalog_CatalogRelation
                       quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
                       quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_queryoptimizer_expressions_ExprId
                       quickstep_queryoptimizer_physical_HashJoin
+                      quickstep_queryoptimizer_physical_LIPFilterConfiguration
                       quickstep_queryoptimizer_physical_Physical
                       quickstep_queryoptimizer_physical_PhysicalType
                       quickstep_queryoptimizer_physical_TableReference
@@ -319,6 +326,7 @@ target_link_libraries(quickstep_utility
                       quickstep_utility_Cast
                       quickstep_utility_CheckSnprintf
                       quickstep_utility_DAG
+                      quickstep_utility_DisjointTreeForest
                       quickstep_utility_EqualsAnyConstant
                       quickstep_utility_ExecutionDAGVisualizer
                       quickstep_utility_Glob
@@ -375,6 +383,13 @@ target_link_libraries(DAG_unittest
                       ${LIBS})
 add_test(DAG_unittest DAG_unittest)
 
+add_executable(DisjointTreeForest_unittest "${CMAKE_CURRENT_SOURCE_DIR}/tests/DisjointTreeForest_unittest.cpp")
+target_link_libraries(DisjointTreeForest_unittest
+                      gtest
+                      gtest_main
+                      quickstep_utility_DisjointTreeForest)
+add_test(DisjointTreeForest_unittest DisjointTreeForest_unittest)
+
 add_executable(EqualsAnyConstant_unittest "${CMAKE_CURRENT_SOURCE_DIR}/tests/EqualsAnyConstant_unittest.cpp")
 target_link_libraries(EqualsAnyConstant_unittest
                       gtest

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/utility/DisjointTreeForest.hpp
----------------------------------------------------------------------
diff --git a/utility/DisjointTreeForest.hpp b/utility/DisjointTreeForest.hpp
new file mode 100644
index 0000000..971ba10
--- /dev/null
+++ b/utility/DisjointTreeForest.hpp
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_DISJOINT_TREE_FOREST_HPP_
+#define QUICKSTEP_UTILITY_DISJOINT_TREE_FOREST_HPP_
+
+#include <algorithm>
+#include <cstddef>
+#include <unordered_map>
+#include <vector>
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+/**
+ * @brief Disjoint sets implemented with tree data structures so that the
+ *        union/find operations have nearly O(1) time complexity.
+ */
+template <typename ElementT,
+          class MapperT = std::unordered_map<ElementT, std::size_t>>
+class DisjointTreeForest {
+ public:
+  /**
+   * @brief Whether the given element is in a subset.
+   *
+   * @param element The element.
+   * @return True if the element is in a subset.
+   */
+  bool hasElement(const ElementT &element) const {
+    return elements_map_.find(element) != elements_map_.end();
+  }
+
+  /**
+   * @brief If the given element is not in any subset yet, make a singleton
+   *        subset for it. Otherwise do nothing.
+   *
+   * @param element The element.
+   */
+  void makeSet(const ElementT &element) {
+    if (!hasElement(element)) {
+      std::size_t loc = nodes_.size();
+      nodes_.emplace_back(0, loc);
+      elements_map_.emplace(element, loc);
+    }
+  }
+
+  /**
+   * @brief Find the subset id for the given element.
+   *
+   * @param element The element.
+   */
+  std::size_t find(const ElementT &element) {
+    DCHECK(hasElement(element));
+
+    const std::size_t node_id = elements_map_.at(element);
+    std::size_t root_id = node_id;
+    std::size_t parent_id;
+    while ((parent_id = nodes_[root_id].parent) != root_id) {
+      root_id = parent_id;
+    }
+    compress_path(node_id, root_id);
+    return root_id;
+  }
+
+  /**
+   * @brief Union the two subsets that the two given elements belong to.
+   *
+   * @param element1 The first element.
+   * @param element2 The second element.
+   */
+  void merge(const ElementT &element1, const ElementT &element2) {
+    std::size_t root_id1 = find(element1);
+    std::size_t root_id2 = find(element2);
+    if (root_id1 != root_id2) {
+      Node &n1 = nodes_[root_id1];
+      Node &n2 = nodes_[root_id2];
+      if (n1.rank > n2.rank) {
+        n2.parent = root_id1;
+      } else if (n1.rank < n2.rank) {
+        n1.parent = root_id2;
+      } else {
+        n1.parent = root_id2;
+        n2.rank += 1;
+      }
+    }
+  }
+
+  /**
+   * @brief Whether the two given elements are in the same subset.
+   *
+   * @param element1 The first element.
+   * @param element2 The second element.
+   * @return True if the twos elements are in the same subset, false otherwise.
+   */
+  bool isConnected(const ElementT &element1, const ElementT &element2) {
+    return find(element1) == find(element2);
+  }
+
+ private:
+  struct Node {
+    Node(const std::size_t rank_in, const std::size_t parent_in)
+        : rank(rank_in), parent(parent_in) {
+    }
+    std::size_t rank;
+    std::size_t parent;
+  };
+
+  inline void compress_path(const std::size_t leaf_node_id,
+                            const std::size_t root_node_id) {
+    std::size_t node_id = leaf_node_id;
+    std::size_t max_rank = 0;
+    while (node_id != root_node_id) {
+      const Node &node = nodes_[node_id];
+      max_rank = std::max(max_rank, node.rank);
+
+      const std::size_t parent_id = node.parent;
+      nodes_[node_id].parent = root_node_id;
+      node_id = parent_id;
+    }
+    nodes_[root_node_id].rank = max_rank + 1;
+  }
+
+  std::vector<Node> nodes_;
+  MapperT elements_map_;
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_UTILITY_DISJOINT_TREE_FOREST_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/utility/PlanVisualizer.cpp
----------------------------------------------------------------------
diff --git a/utility/PlanVisualizer.cpp b/utility/PlanVisualizer.cpp
index 50cf7f0..2adf674 100644
--- a/utility/PlanVisualizer.cpp
+++ b/utility/PlanVisualizer.cpp
@@ -21,15 +21,16 @@
 
 #include <cstddef>
 #include <memory>
+#include <set>
 #include <sstream>
 #include <string>
 #include <unordered_map>
 #include <vector>
 
 #include "catalog/CatalogRelation.hpp"
-
 #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
 #include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
 #include "query_optimizer/physical/HashJoin.hpp"
 #include "query_optimizer/physical/Physical.hpp"
 #include "query_optimizer/physical/PhysicalType.hpp"
@@ -47,9 +48,12 @@ namespace C = ::quickstep::optimizer::cost;
 
 std::string PlanVisualizer::visualize(const P::PhysicalPtr &input) {
   DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+  const P::TopLevelPlanPtr top_level_plan =
+      std::static_pointer_cast<const P::TopLevelPlan>(input);
   cost_model_.reset(
       new C::StarSchemaSimpleCostModel(
-          std::static_pointer_cast<const P::TopLevelPlan>(input)->shared_subplans()));
+          top_level_plan->shared_subplans()));
+  lip_filter_conf_ = top_level_plan->lip_filter_configuration();
 
   color_map_["TableReference"] = "skyblue";
   color_map_["Selection"] = "#90EE90";
@@ -86,6 +90,9 @@ std::string PlanVisualizer::visualize(const P::PhysicalPtr &input) {
   for (const EdgeInfo &edge_info : edges_) {
     graph_oss << "  " << edge_info.src_node_id << " -> "
               << edge_info.dst_node_id << " [";
+    if (edge_info.dashed) {
+      graph_oss << "style=dashed ";
+    }
     if (!edge_info.labels.empty()) {
       graph_oss << "label=\""
                 << EscapeSpecialChars(JoinToString(edge_info.labels, "&#10;"))
@@ -103,6 +110,10 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
   int node_id = ++id_counter_;
   node_id_map_.emplace(input, node_id);
 
+  std::set<E::ExprId> referenced_ids;
+  for (const auto &attr : input->getReferencedAttributes()) {
+    referenced_ids.emplace(attr->id());
+  }
   for (const auto &child : input->children()) {
     visit(child);
 
@@ -112,12 +123,18 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
     EdgeInfo &edge_info = edges_.back();
     edge_info.src_node_id = child_id;
     edge_info.dst_node_id = node_id;
+    edge_info.dashed = false;
 
-    // Print output attributes except for TableReference -- there are just too many
-    // attributes out of TableReference.
-    if (child->getPhysicalType() != P::PhysicalType::kTableReference) {
-      for (const auto &attr : child->getOutputAttributes()) {
-        edge_info.labels.emplace_back(attr->attribute_alias());
+    if (input->getPhysicalType() == P::PhysicalType::kHashJoin &&
+        child == input->children()[1]) {
+      edge_info.dashed = true;
+    }
+
+    for (const auto &attr : child->getOutputAttributes()) {
+      if (referenced_ids.find(attr->id()) != referenced_ids.end()) {
+        edge_info.labels.emplace_back(
+            attr->attribute_alias() + ", est # distinct = " +
+            std::to_string(cost_model_->estimateNumDistinctValues(attr->id(), child)));
       }
     }
   }
@@ -154,6 +171,26 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) {
       break;
     }
   }
+
+  if (lip_filter_conf_ != nullptr) {
+    const auto &build_filters = lip_filter_conf_->getBuildInfoMap();
+    const auto build_it = build_filters.find(input);
+    if (build_it != build_filters.end()) {
+      for (const auto &build_info : build_it->second) {
+        node_info.labels.emplace_back(
+            std::string("[LIP build] ") + build_info.build_attribute->attribute_alias());
+      }
+    }
+    const auto &probe_filters = lip_filter_conf_->getProbeInfoMap();
+    const auto probe_it = probe_filters.find(input);
+    if (probe_it != probe_filters.end()) {
+      for (const auto &probe_info : probe_it->second) {
+        node_info.labels.emplace_back(
+            std::string("[LIP probe] ") + probe_info.probe_attribute->attribute_alias());
+      }
+    }
+  }
+
   node_info.labels.emplace_back(
       "est. # = " + std::to_string(cost_model_->estimateCardinality(input)));
   node_info.labels.emplace_back(

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/utility/PlanVisualizer.hpp
----------------------------------------------------------------------
diff --git a/utility/PlanVisualizer.hpp b/utility/PlanVisualizer.hpp
index 1c0df77..9b8b0db 100644
--- a/utility/PlanVisualizer.hpp
+++ b/utility/PlanVisualizer.hpp
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
 #include "query_optimizer/physical/Physical.hpp"
 #include "utility/Macros.hpp"
 
@@ -73,6 +74,7 @@ class PlanVisualizer {
     int src_node_id;
     int dst_node_id;
     std::vector<std::string> labels;
+    bool dashed;
   };
 
   void visit(const optimizer::physical::PhysicalPtr &input);
@@ -85,6 +87,7 @@ class PlanVisualizer {
   std::vector<EdgeInfo> edges_;
 
   std::unique_ptr<optimizer::cost::StarSchemaSimpleCostModel> cost_model_;
+  optimizer::physical::LIPFilterConfigurationPtr lip_filter_conf_;
 
   DISALLOW_COPY_AND_ASSIGN(PlanVisualizer);
 };

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/utility/lip_filter/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/lip_filter/CMakeLists.txt b/utility/lip_filter/CMakeLists.txt
new file mode 100644
index 0000000..2232abe
--- /dev/null
+++ b/utility/lip_filter/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Declare micro-libs:
+add_library(quickstep_utility_lipfilter_LIPFilter ../../empty_src.cpp LIPFilter.hpp)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/utility/lip_filter/LIPFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilter.hpp b/utility/lip_filter/LIPFilter.hpp
new file mode 100644
index 0000000..33165ed
--- /dev/null
+++ b/utility/lip_filter/LIPFilter.hpp
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_HPP_
+#define QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_HPP_
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+enum class LIPFilterType {
+  kBloomFilter,
+  kExactFilter,
+  kSingleIdentityHashFilter
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/utility/tests/DisjointTreeForest_unittest.cpp
----------------------------------------------------------------------
diff --git a/utility/tests/DisjointTreeForest_unittest.cpp b/utility/tests/DisjointTreeForest_unittest.cpp
new file mode 100644
index 0000000..2e12fad
--- /dev/null
+++ b/utility/tests/DisjointTreeForest_unittest.cpp
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "utility/DisjointTreeForest.hpp"
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+namespace quickstep {
+
+TEST(DisjointTreeForestTest, IntTest) {
+  DisjointTreeForest<int> forest;
+  for (int i = 10; i < 20; ++i) {
+    forest.makeSet(i);
+  }
+
+  for (int i = 10; i < 20; i += 2) {
+    EXPECT_NE(forest.find(i), forest.find(i+1));
+    EXPECT_FALSE(forest.isConnected(i, i+1));
+
+    forest.merge(i, i+1);
+    EXPECT_EQ(forest.find(i), forest.find(i+1));
+    EXPECT_TRUE(forest.isConnected(i, i+1));
+
+    forest.merge(i+1, i);
+    EXPECT_EQ(forest.find(i), forest.find(i+1));
+    EXPECT_TRUE(forest.isConnected(i, i+1));
+  }
+
+  for (int i = 12; i < 20; i += 2) {
+    EXPECT_NE(forest.find(i), forest.find(i-1));
+    EXPECT_FALSE(forest.isConnected(i, i-1));
+  }
+
+  forest.merge(10, 17);
+  forest.merge(11, 18);
+  EXPECT_EQ(forest.find(11), forest.find(16));
+  EXPECT_EQ(forest.find(10), forest.find(19));
+  EXPECT_NE(forest.find(10), forest.find(12));
+  EXPECT_NE(forest.find(15), forest.find(17));
+
+  forest.merge(12, 14);
+  forest.merge(15, 16);
+  const std::size_t id = forest.find(10);
+  for (int i = 10; i < 20; ++i) {
+    EXPECT_EQ(forest.find(i), id);
+  }
+}
+
+TEST(DisjointTreeForestTest, StringTest) {
+  DisjointTreeForest<std::string> forest;
+  const std::vector<std::string> elements = { "aaa", "bbb", "ccc", "ddd" };
+  for (const std::string &element : elements) {
+    forest.makeSet(element);
+  }
+
+  EXPECT_NE(forest.find("aaa"), forest.find("bbb"));
+  forest.merge("aaa", "bbb");
+  EXPECT_EQ(forest.find("aaa"), forest.find("bbb"));
+
+  EXPECT_NE(forest.find("ccc"), forest.find("ddd"));
+  forest.merge("ccc", "ddd");
+  EXPECT_EQ(forest.find("ccc"), forest.find("ddd"));
+
+  EXPECT_NE(forest.find("aaa"), forest.find("ccc"));
+  EXPECT_NE(forest.find("aaa"), forest.find("ddd"));
+  EXPECT_NE(forest.find("bbb"), forest.find("ccc"));
+  EXPECT_NE(forest.find("bbb"), forest.find("ddd"));
+
+  forest.merge("aaa", "ddd");
+  for (const std::string &e1 : elements) {
+    for (const std::string &e2 : elements) {
+      EXPECT_EQ(forest.find(e1), forest.find(e2));
+      EXPECT_TRUE(forest.isConnected(e1, e2));
+    }
+  }
+}
+
+}  // namespace quickstep



[10/12] incubator-quickstep git commit: Optimizer changes for the LIPFilter feature.

Posted by ji...@apache.org.
Optimizer changes for the LIPFilter feature.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/7a464434
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/7a464434
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/7a464434

Branch: refs/heads/lip-refactor
Commit: 7a46443491b1c25af3d7aaf738d6e9b096ed52d0
Parents: 160276c
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Wed Sep 7 13:20:43 2016 -0500
Committer: Harshad Deshmukh <hb...@apache.org>
Committed: Tue Oct 18 11:26:02 2016 -0500

----------------------------------------------------------------------
 query_optimizer/CMakeLists.txt                  |  12 +-
 query_optimizer/ExecutionGenerator.cpp          |  49 ---
 query_optimizer/ExecutionGenerator.hpp          |   5 +-
 query_optimizer/ExecutionHeuristics.cpp         | 129 --------
 query_optimizer/ExecutionHeuristics.hpp         | 157 ----------
 query_optimizer/PhysicalGenerator.cpp           |  18 +-
 .../cost_model/StarSchemaSimpleCostModel.cpp    |   2 +-
 query_optimizer/physical/CMakeLists.txt         |   7 +
 .../physical/LIPFilterConfiguration.hpp         | 171 ++++++++++
 query_optimizer/physical/TopLevelPlan.hpp       |  43 ++-
 query_optimizer/rules/AttachLIPFilters.cpp      | 248 +++++++++++++++
 query_optimizer/rules/AttachLIPFilters.hpp      | 151 +++++++++
 query_optimizer/rules/CMakeLists.txt            |  19 ++
 .../StarSchemaHashJoinOrderOptimization.cpp     | 273 ++++++++++------
 .../StarSchemaHashJoinOrderOptimization.hpp     | 118 +++++--
 query_optimizer/tests/CMakeLists.txt            |  16 -
 .../tests/ExecutionHeuristics_unittest.cpp      | 311 -------------------
 utility/CMakeLists.txt                          |  15 +
 utility/DisjointTreeForest.hpp                  | 152 +++++++++
 utility/PlanVisualizer.cpp                      |  51 ++-
 utility/PlanVisualizer.hpp                      |   3 +
 utility/lip_filter/CMakeLists.txt               |  19 ++
 utility/lip_filter/LIPFilter.hpp                |  39 +++
 utility/tests/DisjointTreeForest_unittest.cpp   |  98 ++++++
 24 files changed, 1277 insertions(+), 829 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 988ffd8..fa9141c 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -41,7 +41,6 @@ add_subdirectory(tests)
 
 # Declare micro-libs:
 add_library(quickstep_queryoptimizer_ExecutionGenerator ExecutionGenerator.cpp ExecutionGenerator.hpp)
-add_library(quickstep_queryoptimizer_ExecutionHeuristics ExecutionHeuristics.cpp ExecutionHeuristics.hpp)
 add_library(quickstep_queryoptimizer_LogicalGenerator LogicalGenerator.cpp LogicalGenerator.hpp)
 add_library(quickstep_queryoptimizer_LogicalToPhysicalMapper
             ../empty_src.cpp
@@ -73,7 +72,6 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
                       quickstep_expressions_windowaggregation_WindowAggregateFunction_proto
                       quickstep_queryexecution_QueryContext
                       quickstep_queryexecution_QueryContext_proto
-                      quickstep_queryoptimizer_ExecutionHeuristics
                       quickstep_queryoptimizer_OptimizerContext
                       quickstep_queryoptimizer_QueryHandle
                       quickstep_queryoptimizer_QueryPlan
@@ -153,14 +151,6 @@ if (ENABLE_DISTRIBUTED)
   target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
                         quickstep_catalog_Catalog_proto)
 endif()
-target_link_libraries(quickstep_queryoptimizer_ExecutionHeuristics
-                      glog
-                      quickstep_catalog_CatalogRelation
-                      quickstep_catalog_CatalogTypedefs
-                      quickstep_queryexecution_QueryContext
-                      quickstep_queryexecution_QueryContext_proto
-                      quickstep_queryoptimizer_QueryPlan
-                      quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_LogicalGenerator
                       glog
                       quickstep_parser_ParseStatement
@@ -196,6 +186,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
                       quickstep_queryoptimizer_LogicalToPhysicalMapper
                       quickstep_queryoptimizer_logical_Logical
                       quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_rules_AttachLIPFilters
                       quickstep_queryoptimizer_rules_PruneColumns
                       quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
                       quickstep_queryoptimizer_rules_SwapProbeBuild
@@ -233,7 +224,6 @@ target_link_libraries(quickstep_queryoptimizer_Validator
 add_library(quickstep_queryoptimizer ../empty_src.cpp QueryOptimizerModule.hpp)
 target_link_libraries(quickstep_queryoptimizer
                       quickstep_queryoptimizer_ExecutionGenerator
-                      quickstep_queryoptimizer_ExecutionHeuristics
                       quickstep_queryoptimizer_LogicalGenerator
                       quickstep_queryoptimizer_LogicalToPhysicalMapper
                       quickstep_queryoptimizer_Optimizer

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 9347c9c..09ef9e0 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -54,7 +54,6 @@
 #include "expressions/window_aggregation/WindowAggregateFunction.pb.h"
 #include "query_execution/QueryContext.hpp"
 #include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/ExecutionHeuristics.hpp"
 #include "query_optimizer/OptimizerContext.hpp"
 #include "query_optimizer/QueryHandle.hpp"
 #include "query_optimizer/QueryPlan.hpp"
@@ -211,11 +210,6 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) {
         temporary_relation_info.producer_operator_index);
   }
 
-  // Optimize execution plan based on heuristics captured during execution plan generation, if enabled.
-  if (FLAGS_optimize_joins) {
-    execution_heuristics_->optimizeExecutionPlan(execution_plan_, query_context_proto_);
-  }
-
 #ifdef QUICKSTEP_DISTRIBUTED
   catalog_database_cache_proto_->set_name(catalog_database_->getName());
 
@@ -600,34 +594,14 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
   std::vector<attribute_id> probe_attribute_ids;
   std::vector<attribute_id> build_attribute_ids;
 
-  std::vector<attribute_id> probe_original_attribute_ids;
-  std::vector<attribute_id> build_original_attribute_ids;
-
-  const CatalogRelation *referenced_stored_probe_relation = nullptr;
-  const CatalogRelation *referenced_stored_build_relation = nullptr;
-
   std::size_t build_cardinality = cost_model_->estimateCardinality(build_physical);
 
   bool any_probe_attributes_nullable = false;
   bool any_build_attributes_nullable = false;
 
-  bool skip_hash_join_optimization = false;
-
   const std::vector<E::AttributeReferencePtr> &left_join_attributes =
       physical_plan->left_join_attributes();
   for (const E::AttributeReferencePtr &left_join_attribute : left_join_attributes) {
-    // Try to determine the original stored relation referenced in the Hash Join.
-    referenced_stored_probe_relation =
-        catalog_database_->getRelationByName(left_join_attribute->relation_name());
-    if (referenced_stored_probe_relation == nullptr) {
-      // Hash Join optimizations are not possible, if the referenced relation cannot be determined.
-      skip_hash_join_optimization = true;
-    } else {
-      const attribute_id probe_operator_attribute_id =
-          referenced_stored_probe_relation->getAttributeByName(left_join_attribute->attribute_name())->getID();
-      probe_original_attribute_ids.emplace_back(probe_operator_attribute_id);
-    }
-
     const CatalogAttribute *probe_catalog_attribute
         = attribute_substitution_map_[left_join_attribute->id()];
     probe_attribute_ids.emplace_back(probe_catalog_attribute->getID());
@@ -640,18 +614,6 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
   const std::vector<E::AttributeReferencePtr> &right_join_attributes =
       physical_plan->right_join_attributes();
   for (const E::AttributeReferencePtr &right_join_attribute : right_join_attributes) {
-    // Try to determine the original stored relation referenced in the Hash Join.
-    referenced_stored_build_relation =
-        catalog_database_->getRelationByName(right_join_attribute->relation_name());
-    if (referenced_stored_build_relation == nullptr) {
-      // Hash Join optimizations are not possible, if the referenced relation cannot be determined.
-      skip_hash_join_optimization = true;
-    } else {
-      const attribute_id build_operator_attribute_id =
-          referenced_stored_build_relation->getAttributeByName(right_join_attribute->attribute_name())->getID();
-      build_original_attribute_ids.emplace_back(build_operator_attribute_id);
-    }
-
     const CatalogAttribute *build_catalog_attribute
         = attribute_substitution_map_[right_join_attribute->id()];
     build_attribute_ids.emplace_back(build_catalog_attribute->getID());
@@ -828,17 +790,6 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
       std::forward_as_tuple(join_operator_index,
                             output_relation));
   temporary_relation_info_vec_.emplace_back(join_operator_index, output_relation);
-
-  // Add heuristics for the Hash Join, if enabled.
-  if (FLAGS_optimize_joins && !skip_hash_join_optimization) {
-    execution_heuristics_->addHashJoinInfo(build_operator_index,
-                                           join_operator_index,
-                                           referenced_stored_build_relation,
-                                           referenced_stored_probe_relation,
-                                           std::move(build_original_attribute_ids),
-                                           std::move(probe_original_attribute_ids),
-                                           join_hash_table_index);
-  }
 }
 
 void ExecutionGenerator::convertNestedLoopsJoin(

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp
index 2aaf5ab..495955e 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -33,7 +33,6 @@
 #include "catalog/CatalogTypedefs.hpp"
 #include "query_execution/QueryContext.hpp"
 #include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/ExecutionHeuristics.hpp"
 #include "query_optimizer/QueryHandle.hpp"
 #include "query_optimizer/QueryPlan.hpp"
 #include "query_optimizer/cost_model/CostModel.hpp"
@@ -102,8 +101,7 @@ class ExecutionGenerator {
       : catalog_database_(DCHECK_NOTNULL(catalog_database)),
         query_handle_(DCHECK_NOTNULL(query_handle)),
         execution_plan_(DCHECK_NOTNULL(query_handle->getQueryPlanMutable())),
-        query_context_proto_(DCHECK_NOTNULL(query_handle->getQueryContextProtoMutable())),
-        execution_heuristics_(new ExecutionHeuristics()) {
+        query_context_proto_(DCHECK_NOTNULL(query_handle->getQueryContextProtoMutable())) {
     query_context_proto_->set_query_id(query_handle_->query_id());
 #ifdef QUICKSTEP_DISTRIBUTED
     catalog_database_cache_proto_ = DCHECK_NOTNULL(query_handle->getCatalogDatabaseCacheProtoMutable());
@@ -386,7 +384,6 @@ class ExecutionGenerator {
   QueryHandle *query_handle_;
   QueryPlan *execution_plan_;  // A part of QueryHandle.
   serialization::QueryContext *query_context_proto_;  // A part of QueryHandle.
-  std::unique_ptr<ExecutionHeuristics> execution_heuristics_;
 
 #ifdef QUICKSTEP_DISTRIBUTED
   serialization::CatalogDatabase *catalog_database_cache_proto_;  // A part of QueryHandle.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/ExecutionHeuristics.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.cpp b/query_optimizer/ExecutionHeuristics.cpp
deleted file mode 100644
index 4fd7320..0000000
--- a/query_optimizer/ExecutionHeuristics.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- **/
-
-#include "query_optimizer/ExecutionHeuristics.hpp"
-
-#include <cstddef>
-#include <utility>
-#include <unordered_map>
-#include <vector>
-
-#include "catalog/CatalogTypedefs.hpp"
-#include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/QueryPlan.hpp"
-#include "utility/Macros.hpp"
-
-#include "glog/logging.h"
-
-namespace quickstep {
-namespace optimizer {
-
-void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
-                                                serialization::QueryContext *query_context_proto) {
-  // Currently this only optimizes left deep joins using bloom filters.
-  // It uses a simple algorithm to discover the left deep joins.
-  // It starts with the first hash join in the plan and keeps on iterating
-  // over the next hash joins, till a probe on a different relation id is found.
-  // The set of hash joins found in this way forms a chain and can be recognized
-  // as a left deep join. It becomes a candidate for optimization.
-
-  // The optimization is done by modifying each of the build operators in the chain
-  // to generate a bloom filter on the build key during their hash table creation.
-  // The leaf-level probe operator is then modified to query all the bloom
-  // filters generated from all the build operators in the chain. These
-  // bloom filters are queried to test the membership of the probe key
-  // just prior to probing the hash table.
-
-  QueryPlan::DAGNodeIndex origin_node = 0;
-  while (origin_node < hash_joins_.size() - 1) {
-    std::vector<std::size_t> chained_nodes;
-    chained_nodes.push_back(origin_node);
-    for (std::size_t i = origin_node + 1; i < hash_joins_.size(); ++i) {
-      const relation_id checked_relation_id = hash_joins_[origin_node].referenced_stored_probe_relation_->getID();
-      const relation_id expected_relation_id = hash_joins_[i].referenced_stored_probe_relation_->getID();
-      if (checked_relation_id == expected_relation_id) {
-        chained_nodes.push_back(i);
-      } else {
-        break;
-      }
-    }
-
-    // Only chains of length greater than one are suitable candidates for semi-join optimization.
-    if (chained_nodes.size() > 1) {
-      std::unordered_map<QueryContext::bloom_filter_id, std::vector<attribute_id>> probe_bloom_filter_info;
-      for (const std::size_t node : chained_nodes) {
-        // Provision for a new bloom filter to be used by the build operator.
-        const QueryContext::bloom_filter_id bloom_filter_id =  query_context_proto->bloom_filters_size();
-        serialization::BloomFilter *bloom_filter_proto = query_context_proto->add_bloom_filters();
-
-        // Modify the bloom filter properties based on the statistics of the relation.
-        setBloomFilterProperties(bloom_filter_proto, hash_joins_[node].referenced_stored_build_relation_);
-
-        // Add build-side bloom filter information to the corresponding hash table proto.
-        query_context_proto->mutable_join_hash_tables(hash_joins_[node].join_hash_table_id_)
-            ->add_build_side_bloom_filter_id(bloom_filter_id);
-
-        probe_bloom_filter_info.insert(std::make_pair(bloom_filter_id, hash_joins_[node].probe_attributes_));
-      }
-
-      // Add probe-side bloom filter information to the corresponding hash table proto for each build-side bloom filter.
-      for (const std::pair<QueryContext::bloom_filter_id, std::vector<attribute_id>>
-               &bloom_filter_info : probe_bloom_filter_info) {
-        auto *probe_side_bloom_filter =
-            query_context_proto->mutable_join_hash_tables(hash_joins_[origin_node].join_hash_table_id_)
-                                  ->add_probe_side_bloom_filters();
-        probe_side_bloom_filter->set_probe_side_bloom_filter_id(bloom_filter_info.first);
-        for (const attribute_id &probe_attribute_id : bloom_filter_info.second) {
-          probe_side_bloom_filter->add_probe_side_attr_ids(probe_attribute_id);
-        }
-      }
-
-      // Add node dependencies from chained build nodes to origin node probe.
-      for (std::size_t i = 1; i < chained_nodes.size(); ++i) {  // Note: It starts from index 1.
-        query_plan->addDirectDependency(hash_joins_[origin_node].join_operator_index_,
-                                        hash_joins_[origin_node + i].build_operator_index_,
-                                        true /* is_pipeline_breaker */);
-      }
-    }
-
-    // Update the origin node.
-    origin_node = chained_nodes.back() + 1;
-  }
-}
-
-void ExecutionHeuristics::setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
-                                                   const CatalogRelation *relation) {
-  const std::size_t cardinality = relation->estimateTupleCardinality();
-  if (cardinality < kOneThousand) {
-    bloom_filter_proto->set_bloom_filter_size(kOneThousand / kCompressionFactor);
-    bloom_filter_proto->set_number_of_hashes(kVeryLowSparsityHash);
-  } else if (cardinality < kTenThousand) {
-    bloom_filter_proto->set_bloom_filter_size(kTenThousand / kCompressionFactor);
-    bloom_filter_proto->set_number_of_hashes(kLowSparsityHash);
-  } else if (cardinality < kHundredThousand) {
-    bloom_filter_proto->set_bloom_filter_size(kHundredThousand / kCompressionFactor);
-    bloom_filter_proto->set_number_of_hashes(kMediumSparsityHash);
-  } else {
-    bloom_filter_proto->set_bloom_filter_size(kMillion / kCompressionFactor);
-    bloom_filter_proto->set_number_of_hashes(kHighSparsityHash);
-  }
-}
-
-}  // namespace optimizer
-}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/ExecutionHeuristics.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.hpp b/query_optimizer/ExecutionHeuristics.hpp
deleted file mode 100644
index 8ad3b7a..0000000
--- a/query_optimizer/ExecutionHeuristics.hpp
+++ /dev/null
@@ -1,157 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- **/
-
-#ifndef QUICKSTEP_QUERY_OPTIMIZER_EXECUTION_HEURISTICS_HPP_
-#define QUICKSTEP_QUERY_OPTIMIZER_EXECUTION_HEURISTICS_HPP_
-
-#include <vector>
-
-#include "catalog/CatalogRelation.hpp"
-#include "catalog/CatalogTypedefs.hpp"
-#include "query_execution/QueryContext.hpp"
-#include "query_execution/QueryContext.pb.h"
-#include "query_optimizer/QueryPlan.hpp"
-#include "utility/Macros.hpp"
-
-#include "glog/logging.h"
-
-namespace quickstep {
-namespace optimizer {
-
-/** \addtogroup QueryOptimizer
- *  @{
- */
-
-/**
- * @brief The ExecutionHeuristics compiles certain heuristics for an execution plan
- *        as it is being converted to a physical plan. These heuristics can then be
- *        used to optimize the execution plan after it has been generated.
- **/
-class ExecutionHeuristics {
- public:
-  static const std::size_t kOneHundred = 100;
-  static const std::size_t kOneThousand = 1000;
-  static const std::size_t kTenThousand = 10000;
-  static const std::size_t kHundredThousand = 100000;
-  static const std::size_t kMillion = 1000000;
-
-  static const std::size_t kCompressionFactor = 10;
-
-  static const std::size_t kVeryLowSparsityHash = 1;
-  static const std::size_t kLowSparsityHash = 2;
-  static const std::size_t kMediumSparsityHash = 5;
-  static const std::size_t kHighSparsityHash = 10;
-
-  /**
-   * @brief A simple internal class that holds information about various
-   *        hash joins within the execution plan for a query.
-   **/
-  struct HashJoinInfo {
-    HashJoinInfo(const QueryPlan::DAGNodeIndex build_operator_index,
-                 const QueryPlan::DAGNodeIndex join_operator_index,
-                 const CatalogRelation *referenced_stored_build_relation,
-                 const CatalogRelation *referenced_stored_probe_relation,
-                 std::vector<attribute_id> &&build_attributes,
-                 std::vector<attribute_id> &&probe_attributes,
-                 const QueryContext::join_hash_table_id join_hash_table_id)
-        : build_operator_index_(build_operator_index),
-          join_operator_index_(join_operator_index),
-          referenced_stored_build_relation_(referenced_stored_build_relation),
-          referenced_stored_probe_relation_(referenced_stored_probe_relation),
-          build_attributes_(std::move(build_attributes)),
-          probe_attributes_(std::move(probe_attributes)),
-          join_hash_table_id_(join_hash_table_id) {
-    }
-
-    const QueryPlan::DAGNodeIndex build_operator_index_;
-    const QueryPlan::DAGNodeIndex join_operator_index_;
-    const CatalogRelation *referenced_stored_build_relation_;
-    const CatalogRelation *referenced_stored_probe_relation_;
-    const std::vector<attribute_id> build_attributes_;
-    const std::vector<attribute_id> probe_attributes_;
-    const QueryContext::join_hash_table_id join_hash_table_id_;
-  };
-
-
-  /**
-   * @brief Constructor.
-   **/
-  ExecutionHeuristics() {}
-
-  /**
-   * @brief Saves information about a hash join used within the execution plan
-   *        for a query.
-   *
-   * @param build_operator_index Index of the build operator of the hash join.
-   * @param join_operator_index Index of the join operator of the hash join.
-   * @param build_relation_id Id of the relation on which hash table is being built.
-   * @param probe_relation_id Id of the relation on which hash table is being probed.
-   * @param build_attributes List of attributes on which hash table is being built.
-   * @param probe_attributes List of attributes on which hash table is being probed.
-   * @param join_hash_table_id Id of the hash table which refers to the actual hash
-   *        table within the query context.
-   **/
-  inline void addHashJoinInfo(const QueryPlan::DAGNodeIndex build_operator_index,
-                              const QueryPlan::DAGNodeIndex join_operator_index,
-                              const CatalogRelation *referenced_stored_build_relation,
-                              const CatalogRelation *referenced_stored_probe_relation,
-                              std::vector<attribute_id> &&build_attributes,
-                              std::vector<attribute_id> &&probe_attributes,
-                              const QueryContext::join_hash_table_id join_hash_table_id) {
-    hash_joins_.push_back(HashJoinInfo(build_operator_index,
-                                       join_operator_index,
-                                       referenced_stored_build_relation,
-                                       referenced_stored_probe_relation,
-                                       std::move(build_attributes),
-                                       std::move(probe_attributes),
-                                       join_hash_table_id));
-  }
-
-  /**
-   * @brief Optimize the execution plan based on heuristics generated
-   *        during physical plan to execution plan conversion.
-   *
-   * @param query_plan A mutable reference to the query execution plan.
-   * @param query_context_proto A mutable reference to the protobuf representation
-   *        of the query context.
-   **/
-  void optimizeExecutionPlan(QueryPlan *query_plan, serialization::QueryContext *query_context_proto);
-
-  /**
-   * @brief Set the properties of the bloom filter proto based on the statistics
-   *        of the given relation.
-   *
-   * @param bloom_filter_proto A mutable reference to the bloom filter protobuf representation.
-   * @param relation The catalog relation on which bloom filter is being built.
-   **/
-  void setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
-                                const CatalogRelation *relation);
-
- private:
-  std::vector<HashJoinInfo> hash_joins_;
-
-  DISALLOW_COPY_AND_ASSIGN(ExecutionHeuristics);
-};
-
-/** @} */
-
-}  // namespace optimizer
-}  // namespace quickstep
-
-#endif /* QUICKSTEP_QUERY_OPTIMIZER_EXECUTION_HEURISTICS_HPP_ */

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 8f19702..9db4037 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -26,6 +26,7 @@
 #include "query_optimizer/Validator.hpp"
 #include "query_optimizer/logical/Logical.hpp"
 #include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/AttachLIPFilters.hpp"
 #include "query_optimizer/rules/PruneColumns.hpp"
 #include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
 #include "query_optimizer/rules/SwapProbeBuild.hpp"
@@ -49,6 +50,12 @@ DEFINE_bool(reorder_hash_joins, true,
             "cardinality and selective tables to be joined first, which is suitable "
             "for queries on star-schema tables.");
 
+DEFINE_bool(use_lip_filters, false,
+            "If true, use LIP (Lookahead Information Passing) filters to accelerate "
+            "query processing. LIP filters are effective for queries on star schema "
+            "tables (e.g. the SSB benchmark) and snowflake schema tables (e.g. the "
+            "TPC-H benchmark).");
+
 DEFINE_bool(visualize_plan, false,
             "If true, visualize the final physical plan into a graph in DOT format "
             "(DOT is a plain text graph description language). Then print the "
@@ -95,11 +102,16 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan(
 
 P::PhysicalPtr PhysicalGenerator::optimizePlan() {
   std::vector<std::unique_ptr<Rule<P::Physical>>> rules;
+  rules.emplace_back(new PruneColumns());
   if (FLAGS_reorder_hash_joins) {
     rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
+    rules.emplace_back(new PruneColumns());
+  } else {
+    rules.emplace_back(new SwapProbeBuild());
+  }
+  if (FLAGS_use_lip_filters) {
+    rules.emplace_back(new AttachLIPFilters());
   }
-  rules.emplace_back(new PruneColumns());
-  rules.emplace_back(new SwapProbeBuild());
 
   for (std::unique_ptr<Rule<P::Physical>> &rule : rules) {
     physical_plan_ = rule->apply(physical_plan_);
@@ -110,7 +122,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
   DVLOG(4) << "Optimized physical plan:\n" << physical_plan_->toString();
 
   if (FLAGS_visualize_plan) {
-  quickstep::PlanVisualizer plan_visualizer;
+    quickstep::PlanVisualizer plan_visualizer;
     std::cerr << "\n" << plan_visualizer.visualize(physical_plan_) << "\n";
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index 8d254fa..1075739 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -358,7 +358,7 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
           std::static_pointer_cast<const E::LogicalAnd>(filter_predicate);
       double selectivity = 1.0;
       for (const auto &predicate : logical_and->operands()) {
-        selectivity = selectivity * estimateSelectivityForPredicate(predicate, physical_plan);
+        selectivity = std::min(selectivity, estimateSelectivityForPredicate(predicate, physical_plan));
       }
       return selectivity;
     }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/physical/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/CMakeLists.txt b/query_optimizer/physical/CMakeLists.txt
index 3b7d3f0..5c2cd0b 100644
--- a/query_optimizer/physical/CMakeLists.txt
+++ b/query_optimizer/physical/CMakeLists.txt
@@ -27,6 +27,7 @@ add_library(quickstep_queryoptimizer_physical_HashJoin HashJoin.cpp HashJoin.hpp
 add_library(quickstep_queryoptimizer_physical_InsertSelection InsertSelection.cpp InsertSelection.hpp)
 add_library(quickstep_queryoptimizer_physical_InsertTuple InsertTuple.cpp InsertTuple.hpp)
 add_library(quickstep_queryoptimizer_physical_Join ../../empty_src.cpp Join.hpp)
+add_library(quickstep_queryoptimizer_physical_LIPFilterConfiguration ../../empty_src.cpp LIPFilterConfiguration.hpp)
 add_library(quickstep_queryoptimizer_physical_NestedLoopsJoin NestedLoopsJoin.cpp NestedLoopsJoin.hpp)
 add_library(quickstep_queryoptimizer_physical_PatternMatcher ../../empty_src.cpp PatternMatcher.hpp)
 add_library(quickstep_queryoptimizer_physical_Physical ../../empty_src.cpp Physical.hpp)
@@ -150,6 +151,10 @@ target_link_libraries(quickstep_queryoptimizer_physical_Join
                       quickstep_queryoptimizer_expressions_NamedExpression
                       quickstep_queryoptimizer_physical_Physical
                       quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_physical_LIPFilterConfiguration
+                      quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_utility_Macros
+                      quickstep_utility_lipfilter_LIPFilter)
 target_link_libraries(quickstep_queryoptimizer_physical_NestedLoopsJoin
                       glog
                       quickstep_queryoptimizer_OptimizerTree
@@ -237,6 +242,7 @@ target_link_libraries(quickstep_queryoptimizer_physical_TopLevelPlan
                       quickstep_queryoptimizer_expressions_AttributeReference
                       quickstep_queryoptimizer_expressions_ExprId
                       quickstep_queryoptimizer_expressions_ExpressionUtil
+                      quickstep_queryoptimizer_physical_LIPFilterConfiguration
                       quickstep_queryoptimizer_physical_Physical
                       quickstep_queryoptimizer_physical_PhysicalType
                       quickstep_utility_Cast
@@ -279,6 +285,7 @@ target_link_libraries(quickstep_queryoptimizer_physical
                       quickstep_queryoptimizer_physical_InsertSelection
                       quickstep_queryoptimizer_physical_InsertTuple
                       quickstep_queryoptimizer_physical_Join
+                      quickstep_queryoptimizer_physical_LIPFilterConfiguration
                       quickstep_queryoptimizer_physical_NestedLoopsJoin
                       quickstep_queryoptimizer_physical_PatternMatcher
                       quickstep_queryoptimizer_physical_Physical

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/physical/LIPFilterConfiguration.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/LIPFilterConfiguration.hpp b/query_optimizer/physical/LIPFilterConfiguration.hpp
new file mode 100644
index 0000000..62a6149
--- /dev/null
+++ b/query_optimizer/physical/LIPFilterConfiguration.hpp
@@ -0,0 +1,171 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_LIP_FILTER_CONFIGURATION_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_LIP_FILTER_CONFIGURATION_HPP_
+
+#include <cstddef>
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+
+namespace quickstep {
+namespace optimizer {
+namespace physical {
+
+/** \addtogroup OptimizerPhysical
+ *  @{
+ */
+
+class Physical;
+typedef std::shared_ptr<const Physical> PhysicalPtr;
+
+/**
+ * @brief Optimizer information for a LIP filter builder.
+ */
+struct LIPFilterBuildInfo {
+  /**
+   * @brief Constructor.
+   *
+   * @param build_attribute_in The attribute to build the LIP filter with.
+   * @param filter_cardinality_in The LIP filter's cardinality.
+   * @param filter_type_in The LIP filter's type.
+   */
+  LIPFilterBuildInfo(const expressions::AttributeReferencePtr &build_attribute_in,
+                     const std::size_t filter_cardinality_in,
+                     const LIPFilterType &filter_type_in)
+      : build_attribute(build_attribute_in),
+        filter_cardinality(filter_cardinality_in),
+        filter_type(filter_type_in) {
+  }
+  const expressions::AttributeReferencePtr build_attribute;
+  const std::size_t filter_cardinality;
+  const LIPFilterType filter_type;
+};
+
+/**
+ * @brief Optimizer information for a LIP filter prober.
+ */
+struct LIPFilterProbeInfo {
+  /**
+   * @brief Constructor.
+   *
+   * @param probe_attribute_in The attribute to probe the LIP filter with.
+   * @param build_attribute_in The attribute that the LIP filter is built with.
+   * @param builder_in The physical node that the LIP filter's builder is attached to.
+   */
+  LIPFilterProbeInfo(const expressions::AttributeReferencePtr &probe_attribute_in,
+                     const expressions::AttributeReferencePtr &build_attribute_in,
+                     const PhysicalPtr &builder_in)
+      : probe_attribute(probe_attribute_in),
+        build_attribute(build_attribute_in),
+        builder(builder_in) {
+  }
+  const expressions::AttributeReferencePtr probe_attribute;
+  const expressions::AttributeReferencePtr build_attribute;
+  const PhysicalPtr builder;
+};
+
+
+class LIPFilterConfiguration;
+typedef std::shared_ptr<const LIPFilterConfiguration> LIPFilterConfigurationPtr;
+
+/**
+ * @brief Configuration information of all the LIP filters in a query plan.
+ */
+class LIPFilterConfiguration {
+ public:
+  /**
+   * @brief Constructor.
+   */
+  LIPFilterConfiguration() {
+  }
+
+  /**
+   * @brief Add information for a LIP filter builder.
+   *
+   * @param build_attribute The attribute to build the LIP filter with.
+   * @param builder The physical node to attach the LIP filter builder to.
+   * @param filter_size The LIP filter's cardinality.
+   * @param filter_type The LIP filter's type.
+   */
+  void addBuildInfo(const expressions::AttributeReferencePtr &build_attribute,
+                    const PhysicalPtr &builder,
+                    const std::size_t filter_size,
+                    const LIPFilterType &filter_type) {
+    build_info_map_[builder].emplace_back(
+        build_attribute, filter_size, filter_type);
+  }
+
+  /**
+   * @brief Add information for a LIP filter prober.
+   *
+   * @param probe_attribute The attribute to probe the LIP filter with.
+   * @param prober The physical node to attach the LIP filter prober to.
+   * @param build_attribute The attribute that the LIP filter is built with.
+   * @param builder The physical node that the LIP filter's builder is attached to.
+   */
+  void addProbeInfo(const expressions::AttributeReferencePtr &probe_attribute,
+                    const PhysicalPtr &prober,
+                    const expressions::AttributeReferencePtr &build_attribute,
+                    const PhysicalPtr &builder) {
+    probe_info_map_[prober].emplace_back(
+        probe_attribute, build_attribute, builder);
+  }
+
+  /**
+   * @brief Get all the LIP filter builders.
+   *
+   * @return A map where each key is a physical node and each mapped value is
+   *         a vector of all the LIP filter builders that are attached to the
+   *         physical node.
+   */
+  const std::map<PhysicalPtr, std::vector<LIPFilterBuildInfo>>& getBuildInfoMap() const {
+    return build_info_map_;
+  }
+
+  /**
+   * @brief Get all the LIP filter probers.
+   *
+   * @return A map where each key is a physical node and each mapped value is
+   *         a vector of all the LIP filter probers that are attached to the
+   *         physical node.
+   */
+  const std::map<PhysicalPtr, std::vector<LIPFilterProbeInfo>>& getProbeInfoMap() const {
+    return probe_info_map_;
+  }
+
+ private:
+  std::map<PhysicalPtr, std::vector<LIPFilterBuildInfo>> build_info_map_;
+  std::map<PhysicalPtr, std::vector<LIPFilterProbeInfo>> probe_info_map_;
+
+  DISALLOW_COPY_AND_ASSIGN(LIPFilterConfiguration);
+};
+
+/** @} */
+
+}  // namespace physical
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif /* QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_LIP_FILTER_CONFIGURATION_HPP_ */

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/physical/TopLevelPlan.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/TopLevelPlan.hpp b/query_optimizer/physical/TopLevelPlan.hpp
index 8f07dec..7dfc2b6 100644
--- a/query_optimizer/physical/TopLevelPlan.hpp
+++ b/query_optimizer/physical/TopLevelPlan.hpp
@@ -29,6 +29,7 @@
 #include "query_optimizer/expressions/AttributeReference.hpp"
 #include "query_optimizer/expressions/ExprId.hpp"
 #include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
 #include "query_optimizer/physical/Physical.hpp"
 #include "query_optimizer/physical/PhysicalType.hpp"
 #include "utility/Macros.hpp"
@@ -89,6 +90,29 @@ class TopLevelPlan : public Physical {
     return shared_subplans_[index];
   }
 
+  /**
+   * @brief Creates a copy of the TopLevelPlan with lip_filter_configuration_
+   *        replaced by \p new_lip_filter_configuration.
+   *
+   * @param new_lip_filter_configuration The new lip_filter_configuration to be
+   *        substituted for the existing one.
+   * @return A copy of this TopLevelPlan with the new lip_filter_configuration.
+   */
+  TopLevelPlanPtr copyWithLIPFilterConfiguration(
+      const LIPFilterConfigurationPtr &new_lip_filter_configuration) const {
+    return TopLevelPlan::Create(plan_,
+                                shared_subplans_,
+                                uncorrelated_subquery_map_,
+                                new_lip_filter_configuration);
+  }
+
+  /**
+   * @return The LIPFilter configuration information for the overall query plan.
+   */
+  const LIPFilterConfigurationPtr& lip_filter_configuration() const {
+    return lip_filter_configuration_;
+  }
+
   PhysicalPtr copyWithNewChildren(
       const std::vector<PhysicalPtr> &new_children) const override {
     DCHECK_EQ(getNumChildren(), new_children.size());
@@ -125,18 +149,22 @@ class TopLevelPlan : public Physical {
    *
    * @param plan The query plan.
    * @param shared_subplans The subplans referenced in the main input plan.
-   * @param Map from the expression ID of an attribute reference to the
-   *        uncorrelated subquery that produces the attribute.
+   * @param uncorrelated_subquery_map Map from the expression ID of an attribute
+   *        reference to the uncorrelated subquery that produces the attribute.
+   * @param lip_filter_configuration The LIPFilter configuration information
+   *        for the overall query plan.
    * @return An immutable TopLevelPlan.
    */
   static TopLevelPlanPtr Create(
       const PhysicalPtr &plan,
       const std::vector<PhysicalPtr> &shared_subplans = {},
       const std::unordered_map<expressions::ExprId, int> &uncorrelated_subquery_map
-          = std::unordered_map<expressions::ExprId, int>()) {
+          = std::unordered_map<expressions::ExprId, int>(),
+      const LIPFilterConfigurationPtr &lip_filter_configuration = nullptr) {
     return TopLevelPlanPtr(new TopLevelPlan(plan,
                                             shared_subplans,
-                                            uncorrelated_subquery_map));
+                                            uncorrelated_subquery_map,
+                                            lip_filter_configuration));
   }
 
  protected:
@@ -151,10 +179,12 @@ class TopLevelPlan : public Physical {
  private:
   TopLevelPlan(const PhysicalPtr &plan,
                const std::vector<PhysicalPtr> &shared_subplans,
-               const std::unordered_map<expressions::ExprId, int> &uncorrelated_subquery_map)
+               const std::unordered_map<expressions::ExprId, int> &uncorrelated_subquery_map,
+               const LIPFilterConfigurationPtr &lip_filter_configuration)
       : plan_(plan),
         shared_subplans_(shared_subplans),
-        uncorrelated_subquery_map_(uncorrelated_subquery_map) {
+        uncorrelated_subquery_map_(uncorrelated_subquery_map),
+        lip_filter_configuration_(lip_filter_configuration) {
     addChild(plan);
     for (const PhysicalPtr &shared_subplan : shared_subplans) {
       addChild(shared_subplan);
@@ -165,6 +195,7 @@ class TopLevelPlan : public Physical {
   // Stored in the topological ordering based on dependencies.
   std::vector<PhysicalPtr> shared_subplans_;
   std::unordered_map<expressions::ExprId, int> uncorrelated_subquery_map_;
+  LIPFilterConfigurationPtr lip_filter_configuration_;
 
   DISALLOW_COPY_AND_ASSIGN(TopLevelPlan);
 };

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/rules/AttachLIPFilters.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachLIPFilters.cpp b/query_optimizer/rules/AttachLIPFilters.cpp
new file mode 100644
index 0000000..090fb8c
--- /dev/null
+++ b/query_optimizer/rules/AttachLIPFilters.cpp
@@ -0,0 +1,248 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/AttachLIPFilters.hpp"
+
+#include <map>
+#include <set>
+#include <unordered_set>
+#include <unordered_map>
+#include <vector>
+#include <utility>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr AttachLIPFilters::apply(const P::PhysicalPtr &input) {
+  DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+
+  const P::TopLevelPlanPtr top_level_plan =
+     std::static_pointer_cast<const P::TopLevelPlan>(input);
+  cost_model_.reset(
+      new cost::StarSchemaSimpleCostModel(
+          top_level_plan->shared_subplans()));
+  lip_filter_configuration_.reset(new P::LIPFilterConfiguration());
+
+  std::set<E::ExprId> already_filtered_attributes;
+  attachLIPFilters(NodeList(input), &already_filtered_attributes);
+
+  P::PhysicalPtr output;
+  if (!lip_filter_configuration_->getBuildInfoMap().empty() ||
+      !lip_filter_configuration_->getProbeInfoMap().empty()) {
+    output = top_level_plan->copyWithLIPFilterConfiguration(
+        P::LIPFilterConfigurationPtr(lip_filter_configuration_.release()));
+  } else {
+    output = input;
+  }
+  return output;
+}
+
+void AttachLIPFilters::attachLIPFilters(
+    const NodeList &path,
+    std::set<expressions::ExprId> *already_filtered_attributes) {
+  const P::PhysicalPtr &node = path.node;
+
+  // First process child nodes
+  for (const auto &child : node->children()) {
+    std::set<E::ExprId> child_filtered_attributes;
+    attachLIPFilters(path.cons(child), &child_filtered_attributes);
+    already_filtered_attributes->insert(child_filtered_attributes.begin(),
+                                        child_filtered_attributes.end());
+  }
+
+  // Attach LIP filters to HashJoin/Selection/Aggregate nodes
+  P::PhysicalPtr probe_child = nullptr;
+  switch (node->getPhysicalType()) {
+    case P::PhysicalType::kHashJoin:
+      probe_child = std::static_pointer_cast<const P::HashJoin>(node)->left();
+      break;
+    case P::PhysicalType::kSelection:
+      probe_child = std::static_pointer_cast<const P::Selection>(node)->input();
+      break;
+    case P::PhysicalType::kAggregate:
+      probe_child = std::static_pointer_cast<const P::Aggregate>(node)->input();
+      break;
+    default:
+      break;
+  }
+
+  if (probe_child != nullptr &&
+      cost_model_->estimateCardinality(probe_child) > 10000000) {
+    const auto &candidate_lip_filters = getProbeSideInfo(path.cons(probe_child));
+    if (!candidate_lip_filters.empty()) {
+      std::map<E::AttributeReferencePtr, LIPFilterInfoPtr> selected_filters;
+      for (const auto &info : candidate_lip_filters) {
+        auto it = selected_filters.find(info->attribute);
+        if (it == selected_filters.end()) {
+          selected_filters.emplace(info->attribute, info);
+        } else if (LIPFilterInfo::isBetterThan(*info, *it->second)) {
+          it->second = info;
+        }
+      }
+
+      for (const auto &pair : selected_filters) {
+        const E::ExprId source_attr_id = pair.second->source_attribute->id();
+        if (already_filtered_attributes->find(source_attr_id)
+                == already_filtered_attributes->end()) {
+          lip_filter_configuration_->addBuildInfo(
+              pair.second->source_attribute,
+              pair.second->source,
+              pair.second->estimated_cardinality * 8,
+              LIPFilterType::kSingleIdentityHashFilter);
+          lip_filter_configuration_->addProbeInfo(
+              pair.first,
+              node,
+              pair.second->source_attribute,
+              pair.second->source);
+          already_filtered_attributes->emplace(source_attr_id);
+        }
+      }
+    }
+  }
+}
+
+const std::vector<AttachLIPFilters::LIPFilterInfoPtr>& AttachLIPFilters
+    ::getBuildSideInfo(const NodeList &path) {
+  const P::PhysicalPtr &node = path.node;
+  if (build_side_info_.find(node) == build_side_info_.end()) {
+    std::vector<LIPFilterInfoPtr> lip_filters;
+
+    // 1. Gather candidate LIP filters propagated from descendant nodes.
+    std::unordered_set<E::ExprId> output_attribute_ids;
+    for (const auto &attr : node->getOutputAttributes()) {
+      output_attribute_ids.emplace(attr->id());
+    }
+    switch (node->getPhysicalType()) {
+      case P::PhysicalType::kAggregate:
+      case P::PhysicalType::kSelection:
+      case P::PhysicalType::kHashJoin: {
+        for (const P::PhysicalPtr &child : node->children()) {
+          for (const LIPFilterInfoPtr &info : getBuildSideInfo(path.cons(child))) {
+            lip_filters.emplace_back(info);
+          }
+        }
+        break;
+      }
+      default:
+        break;
+    }
+
+    // 2. Consider the parent physical node. If it is a HashJoin,
+    // then each build-side join attribute is a candidate LIP filter
+    // which can be built by the BuildHashOperator that corresponds
+    // to the parent HashJoin node.
+    P::HashJoinPtr hash_join;
+    if (path.cdr() != nullptr &&
+        P::SomeHashJoin::MatchesWithConditionalCast(path.cdr()->node, &hash_join)) {
+      const P::PhysicalPtr &build_node = hash_join->right();
+      // TODO(jianqiao): consider probe-side info to allow cascading propagation.
+      double selectivity = cost_model_->estimateSelectivity(build_node);
+      // Only consider attributes that are selective.
+      if (selectivity < 1.0) {
+        std::size_t cardinality = cost_model_->estimateCardinality(build_node);
+        for (const auto &attr : hash_join->right_join_attributes()) {
+          lip_filters.emplace_back(
+              std::make_shared<LIPFilterInfo>(attr,
+                                              path.cdr()->node,
+                                              path.depth,
+                                              selectivity,
+                                              cardinality));
+        }
+      }
+    }
+    build_side_info_.emplace(node, std::move(lip_filters));
+  }
+  return build_side_info_.at(node);
+}
+
+const std::vector<AttachLIPFilters::LIPFilterInfoPtr>& AttachLIPFilters
+    ::getProbeSideInfo(const NodeList &path) {
+  const P::PhysicalPtr &node = path.node;
+  if (probe_side_info_.find(node) == probe_side_info_.end()) {
+    std::vector<LIPFilterInfoPtr> lip_filters;
+    if (path.cdr() != nullptr) {
+      // 1. Gather candidate LIP filters propagated from ancestor nodes.
+      const auto &parent_lip_filters = getProbeSideInfo(*path.cdr());
+      if (!parent_lip_filters.empty()) {
+        std::unordered_set<E::ExprId> output_attribute_ids;
+        for (const auto &attr : node->getOutputAttributes()) {
+          output_attribute_ids.emplace(attr->id());
+        }
+        for (const auto &info : parent_lip_filters) {
+          if (output_attribute_ids.find(info->attribute->id()) != output_attribute_ids.end()) {
+            lip_filters.emplace_back(info);
+          }
+        }
+      }
+
+      // 2. Consider the parent physical node. If it is an InnerHashJoin or
+      // LeftSemiHashJoin, then we can propagate the build-side LIP filters
+      // to the probe-side.
+      P::HashJoinPtr hash_join;
+      if (P::SomeHashJoin::MatchesWithConditionalCast(path.cdr()->node, &hash_join) &&
+          (hash_join->join_type() == P::HashJoin::JoinType::kInnerJoin ||
+           hash_join->join_type() == P::HashJoin::JoinType::kLeftSemiJoin)) {
+        const P::PhysicalPtr &build_side_child = hash_join->right();
+        std::unordered_map<E::ExprId, E::AttributeReferencePtr> join_attribute_pairs;
+        for (std::size_t i = 0; i < hash_join->left_join_attributes().size(); ++i) {
+          const E::AttributeReferencePtr probe_side_join_attribute =
+              hash_join->left_join_attributes()[i];
+          const E::AttributeReferencePtr build_side_join_attribute =
+              hash_join->right_join_attributes()[i];
+          join_attribute_pairs.emplace(build_side_join_attribute->id(),
+                                       probe_side_join_attribute);
+        }
+        for (const auto &info : getBuildSideInfo(path.cdr()->cons(build_side_child))) {
+          const auto pair_it = join_attribute_pairs.find(info->attribute->id());
+          if (pair_it != join_attribute_pairs.end()) {
+            lip_filters.emplace_back(
+                std::make_shared<LIPFilterInfo>(pair_it->second,
+                                                info->source,
+                                                info->depth,
+                                                info->estimated_selectivity,
+                                                info->estimated_cardinality,
+                                                info->attribute));
+          }
+        }
+      }
+    }
+    probe_side_info_.emplace(node, std::move(lip_filters));
+  }
+  return probe_side_info_.at(node);
+}
+
+}  // namespace optimizer
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/rules/AttachLIPFilters.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachLIPFilters.hpp b/query_optimizer/rules/AttachLIPFilters.hpp
new file mode 100644
index 0000000..b8cfc39
--- /dev/null
+++ b/query_optimizer/rules/AttachLIPFilters.hpp
@@ -0,0 +1,151 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_LIP_FILTERS_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_LIP_FILTERS_HPP_
+
+#include <cstddef>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/physical/LIPFilterConfiguration.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ *  @{
+ */
+
+/**
+ * @brief Rule that applies to a physical plan to attach LIPFilters.
+ */
+class AttachLIPFilters : public Rule<physical::Physical> {
+ public:
+  /**
+   * @brief Constructor.
+   */
+  AttachLIPFilters() {}
+
+  ~AttachLIPFilters() override {}
+
+  std::string getName() const override {
+    return "AttachLIPFilters";
+  }
+
+  physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+  /**
+   * @brief Internal data structure for passing around LIPFilter information.
+   */
+  struct LIPFilterInfo {
+    LIPFilterInfo(const expressions::AttributeReferencePtr &attribute_in,
+                  const physical::PhysicalPtr &source_in,
+                  const int depth_in,
+                  const double estimated_selectivity_in,
+                  const std::size_t estimated_cardinality_in,
+                  const expressions::AttributeReferencePtr &source_attribute_in = nullptr)
+        : attribute(attribute_in),
+          source(source_in),
+          depth(depth_in),
+          estimated_selectivity(estimated_selectivity_in),
+          estimated_cardinality(estimated_cardinality_in),
+          source_attribute(
+              source_attribute_in == nullptr
+                  ? attribute_in
+                  : source_attribute_in) {}
+
+    static bool isBetterThan(const LIPFilterInfo &a, const LIPFilterInfo &b) {
+      if (a.estimated_selectivity == b.estimated_selectivity) {
+        return a.depth > b.depth;
+      } else {
+        return a.estimated_selectivity < b.estimated_selectivity;
+      }
+    }
+
+    expressions::AttributeReferencePtr attribute;
+    physical::PhysicalPtr source;
+    int depth;
+    double estimated_selectivity;
+    std::size_t estimated_cardinality;
+    expressions::AttributeReferencePtr source_attribute;
+  };
+
+  typedef std::shared_ptr<const LIPFilterInfo> LIPFilterInfoPtr;
+
+  /**
+   * @brief Functional list data structure for internal use.
+   */
+  struct NodeList {
+    explicit NodeList(const physical::PhysicalPtr &node_in)
+        : node(node_in),
+          next(nullptr),
+          depth(0) {}
+
+    NodeList(const physical::PhysicalPtr &node_in,
+             const NodeList *next_in,
+             const int depth_in)
+        : node(node_in),
+          next(next_in),
+          depth(depth_in) {}
+
+    inline const NodeList *cdr() const {
+      return next;
+    }
+
+    inline const NodeList cons(const physical::PhysicalPtr &new_node) const {
+      return NodeList(new_node, this, depth+1);
+    }
+
+    const physical::PhysicalPtr node;
+    const NodeList *next;
+    const int depth;
+  };
+
+  void attachLIPFilters(const NodeList &path,
+                        std::set<expressions::ExprId> *already_filtered_attributes);
+
+  const std::vector<LIPFilterInfoPtr>& getBuildSideInfo(const NodeList &path);
+
+  const std::vector<LIPFilterInfoPtr>& getProbeSideInfo(const NodeList &path);
+
+  std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+  std::map<physical::PhysicalPtr, std::vector<LIPFilterInfoPtr>> build_side_info_;
+  std::map<physical::PhysicalPtr, std::vector<LIPFilterInfoPtr>> probe_side_info_;
+  std::unique_ptr<physical::LIPFilterConfiguration> lip_filter_configuration_;
+
+  DISALLOW_COPY_AND_ASSIGN(AttachLIPFilters);
+};
+
+/** @} */
+
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif /* QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_LIP_FILTERS_HPP_ */

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index d9709ce..29875f6 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -18,6 +18,7 @@
 add_subdirectory(tests)
 
 # Declare micro-libs:
+add_library(quickstep_queryoptimizer_rules_AttachLIPFilters AttachLIPFilters.cpp AttachLIPFilters.hpp)
 add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp)
 add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp)
 add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
@@ -36,6 +37,21 @@ add_library(quickstep_queryoptimizer_rules_UnnestSubqueries UnnestSubqueries.cpp
 
 
 # Link dependencies:
+target_link_libraries(quickstep_queryoptimizer_rules_AttachLIPFilters
+                      quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+                      quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_queryoptimizer_expressions_ExprId
+                      quickstep_queryoptimizer_physical_Aggregate
+                      quickstep_queryoptimizer_physical_HashJoin
+                      quickstep_queryoptimizer_physical_LIPFilterConfiguration
+                      quickstep_queryoptimizer_physical_PatternMatcher
+                      quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_physical_PhysicalType
+                      quickstep_queryoptimizer_physical_Selection
+                      quickstep_queryoptimizer_physical_TopLevelPlan
+                      quickstep_queryoptimizer_rules_Rule
+                      quickstep_utility_Macros
+                      quickstep_utility_lipfilter_LIPFilter)
 target_link_libraries(quickstep_queryoptimizer_rules_BottomUpRule
                       glog
                       quickstep_queryoptimizer_rules_Rule
@@ -121,12 +137,14 @@ target_link_libraries(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOpti
                       quickstep_queryoptimizer_expressions_NamedExpression
                       quickstep_queryoptimizer_expressions_PatternMatcher
                       quickstep_queryoptimizer_expressions_Predicate
+                      quickstep_queryoptimizer_physical_Aggregate
                       quickstep_queryoptimizer_physical_HashJoin
                       quickstep_queryoptimizer_physical_PatternMatcher
                       quickstep_queryoptimizer_physical_Physical
                       quickstep_queryoptimizer_physical_PhysicalType
                       quickstep_queryoptimizer_physical_TopLevelPlan
                       quickstep_queryoptimizer_rules_Rule
+                      quickstep_utility_DisjointTreeForest
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_rules_SwapProbeBuild
                       quickstep_queryoptimizer_costmodel_SimpleCostModel
@@ -187,6 +205,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_UpdateExpression
 # Module all-in-one library:
 add_library(quickstep_queryoptimizer_rules ../../empty_src.cpp OptimizerRulesModule.hpp)
 target_link_libraries(quickstep_queryoptimizer_rules
+                      quickstep_queryoptimizer_rules_AttachLIPFilters
                       quickstep_queryoptimizer_rules_BottomUpRule
                       quickstep_queryoptimizer_rules_CollapseProject
                       quickstep_queryoptimizer_rules_GenerateJoins

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
index 946d316..5906b98 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
@@ -19,6 +19,8 @@
 
 #include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
 
+#include <algorithm>
+#include <map>
 #include <memory>
 #include <set>
 #include <unordered_map>
@@ -28,11 +30,13 @@
 #include "query_optimizer/expressions/AttributeReference.hpp"
 #include "query_optimizer/expressions/NamedExpression.hpp"
 #include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
 #include "query_optimizer/physical/HashJoin.hpp"
 #include "query_optimizer/physical/PatternMatcher.hpp"
 #include "query_optimizer/physical/Physical.hpp"
 #include "query_optimizer/physical/PhysicalType.hpp"
 #include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "utility/DisjointTreeForest.hpp"
 
 #include "glog/logging.h"
 
@@ -74,6 +78,9 @@ P::PhysicalPtr StarSchemaHashJoinOrderOptimization::applyInternal(const P::Physi
     JoinGroupInfo *join_group = nullptr;
     if (parent_join_group == nullptr || !is_valid_cascading_hash_join) {
       new_join_group.reset(new JoinGroupInfo());
+      for (const auto &attr : input->getOutputAttributes()) {
+        new_join_group->referenced_attributes.emplace(attr->id());
+      }
       join_group = new_join_group.get();
     } else {
       join_group = parent_join_group;
@@ -146,7 +153,10 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
         i,
         tables[i],
         cost_model_->estimateCardinality(tables[i]),
-        cost_model_->estimateSelectivity(tables[i]));
+        cost_model_->estimateSelectivity(tables[i]),
+        CountSharedAttributes(join_group.referenced_attributes,
+                              tables[i]->getOutputAttributes()),
+        tables[i]->getPhysicalType() == physical::PhysicalType::kAggregate);
   }
 
   // Auxiliary mapping info.
@@ -163,9 +173,25 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
     }
   }
 
-  // Create a join graph where tables are vertices, and add an edge between vertices
-  // t1 and t2 for each join predicate t1.x = t2.y
-  std::vector<std::unordered_set<std::size_t>> join_graph(table_info_storage.size());
+  // The pool of tables.
+  std::set<TableInfo*> remaining_tables;
+  for (auto &table_info : table_info_storage) {
+    remaining_tables.emplace(&table_info);
+  }
+
+  // The equal-join (e.g. =) operator defines an equivalence relation on the
+  // set of all the attributes. The disjoint set data structure is used to keep
+  // track of the equivalence classes that each attribute belongs to.
+  DisjointTreeForest<E::ExprId> join_attribute_forest;
+  for (const auto &attr_id_pair : join_group.join_attribute_pairs) {
+    join_attribute_forest.makeSet(attr_id_pair.first);
+    join_attribute_forest.makeSet(attr_id_pair.second);
+    join_attribute_forest.merge(attr_id_pair.first, attr_id_pair.second);
+  }
+
+  // Map each equivalence class id to the members (e.g. <table id, attribute id>
+  // pairs) in that equivalence class.
+  std::map<std::size_t, std::map<std::size_t, E::ExprId>> join_attribute_groups;
   for (const auto &attr_id_pair : join_group.join_attribute_pairs) {
     DCHECK(attribute_id_to_table_info_index_map.find(attr_id_pair.first)
                != attribute_id_to_table_info_index_map.end());
@@ -178,128 +204,148 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
         attribute_id_to_table_info_index_map[attr_id_pair.second];
     DCHECK_NE(first_table_idx, second_table_idx);
 
-    table_info_storage[first_table_idx].join_attribute_pairs.emplace(
-        attr_id_pair.first, attr_id_pair.second);
-    table_info_storage[second_table_idx].join_attribute_pairs.emplace(
-        attr_id_pair.second, attr_id_pair.first);
-
-    join_graph[first_table_idx].emplace(second_table_idx);
-    join_graph[second_table_idx].emplace(first_table_idx);
-  }
-
-  std::set<TableInfo*, TableInfoPtrLessComparator> table_info_ordered_by_priority;
-  for (std::size_t i = 0; i < table_info_storage.size(); ++i) {
-    table_info_ordered_by_priority.emplace(&table_info_storage[i]);
+    DCHECK_EQ(join_attribute_forest.find(attr_id_pair.first),
+              join_attribute_forest.find(attr_id_pair.second));
+    const std::size_t attr_group_id = join_attribute_forest.find(attr_id_pair.first);
+    auto &attr_group = join_attribute_groups[attr_group_id];
+    attr_group.emplace(first_table_idx, attr_id_pair.first);
+    attr_group.emplace(second_table_idx, attr_id_pair.second);
   }
 
-  // Contruct hash join tree.
   while (true) {
-    TableInfo *first_table_info = *table_info_ordered_by_priority.begin();
-    table_info_ordered_by_priority.erase(
-        table_info_ordered_by_priority.begin());
-    const std::size_t first_table_info_id = first_table_info->table_info_id;
-
-    TableInfo *second_table_info = nullptr;
-    std::set<TableInfo*, TableInfoPtrLessComparator>::iterator second_table_info_it;
-    for (auto candidate_table_info_it = table_info_ordered_by_priority.begin();
-         candidate_table_info_it != table_info_ordered_by_priority.end();
-         ++candidate_table_info_it) {
-      TableInfo *candidate_table_info = *candidate_table_info_it;
-      const std::size_t candidate_table_info_id = candidate_table_info->table_info_id;
-
-      if (join_graph[first_table_info_id].find(candidate_table_info_id)
-              == join_graph[first_table_info_id].end() &&
-          join_graph[candidate_table_info_id].find(first_table_info_id)
-              == join_graph[candidate_table_info_id].end()) {
-        continue;
-      } else if (second_table_info == nullptr) {
-        second_table_info = candidate_table_info;
-        second_table_info_it = candidate_table_info_it;
-      }
-
-      bool is_likely_many_to_many_join = false;
-      for (const auto join_attr_pair : first_table_info->join_attribute_pairs) {
-        if (candidate_table_info->joined_attribute_set.find(join_attr_pair.second)
-                != candidate_table_info->joined_attribute_set.end()) {
-          is_likely_many_to_many_join = true;
-          break;
+    // Find the best probe/build pair out of the remaining tables.
+    // TODO(jianqiao): design better data structure to improve efficiency here.
+    std::unique_ptr<JoinPair> best_join = nullptr;
+    for (TableInfo *probe_table_info : remaining_tables) {
+      for (TableInfo *build_table_info : remaining_tables) {
+        if (probe_table_info != build_table_info) {
+          const std::size_t probe_table_id = probe_table_info->table_info_id;
+          const std::size_t build_table_id = build_table_info->table_info_id;
+          std::size_t num_join_attributes = 0;
+          double build_side_uniqueness = 1.0;
+          for (const auto &attr_group_pair : join_attribute_groups) {
+            const auto &attr_group = attr_group_pair.second;
+            auto probe_it = attr_group.find(probe_table_id);
+            auto build_it = attr_group.find(build_table_id);
+            if (probe_it != attr_group.end() && build_it != attr_group.end()) {
+              ++num_join_attributes;
+              build_side_uniqueness *= std::max(
+                  1uL,
+                  cost_model_->estimateNumDistinctValues(
+                      build_it->second, build_table_info->table));
+            }
+          }
+          build_side_uniqueness /= build_table_info->estimated_cardinality;
+
+          if (num_join_attributes > 0) {
+            std::unique_ptr<JoinPair> new_join(
+                new JoinPair(probe_table_info,
+                             build_table_info,
+                             build_side_uniqueness >= 0.9,
+                             num_join_attributes));
+            if (best_join == nullptr || new_join->isBetterThan(*best_join)) {
+              best_join.reset(new_join.release());
+            }
+          }
         }
       }
-      for (const auto join_attr_pair : candidate_table_info->join_attribute_pairs) {
-        if (first_table_info->joined_attribute_set.find(join_attr_pair.second)
-                != first_table_info->joined_attribute_set.end()) {
-          is_likely_many_to_many_join = true;
-          break;
-        }
-      }
-      if (!is_likely_many_to_many_join) {
-        second_table_info = candidate_table_info;
-        second_table_info_it = candidate_table_info_it;
-        break;
+    }
+
+    CHECK(best_join != nullptr);
+
+    TableInfo *selected_probe_table_info = best_join->probe;
+    TableInfo *selected_build_table_info = best_join->build;
+
+    // Swap probe/build sides if:
+    // (1) Build side is an aggregation with large number of groups, so that
+    //     there is a change to push LIPFilters down the aggregation.
+    // (2) Build side's join attributes are not unique, and it has larger
+    //     cardinality than the probe side.
+    const std::size_t probe_num_groups_as_agg =
+        getEstimatedNumGroups(selected_probe_table_info->table);
+    const std::size_t build_num_groups_as_agg =
+        getEstimatedNumGroups(selected_build_table_info->table);
+    if (build_num_groups_as_agg > 1000000 || probe_num_groups_as_agg > 1000000) {
+      if (build_num_groups_as_agg > probe_num_groups_as_agg) {
+        std::swap(selected_probe_table_info, selected_build_table_info);
       }
+    } else if ((!best_join->build_side_unique || best_join->num_join_attributes > 1) &&
+        selected_probe_table_info->estimated_cardinality < selected_build_table_info->estimated_cardinality) {
+      std::swap(selected_probe_table_info, selected_build_table_info);
     }
-    DCHECK(second_table_info != nullptr);
-    table_info_ordered_by_priority.erase(second_table_info_it);
 
-    const P::PhysicalPtr &left_child = first_table_info->table;
-    const P::PhysicalPtr &right_child = second_table_info->table;
+    remaining_tables.erase(selected_probe_table_info);
+    remaining_tables.erase(selected_build_table_info);
+
+    // Figure out the output attributes.
+    const P::PhysicalPtr &probe_child = selected_probe_table_info->table;
+    const P::PhysicalPtr &build_child = selected_build_table_info->table;
     std::vector<E::NamedExpressionPtr> output_attributes;
-    for (const E::AttributeReferencePtr &left_attr : left_child->getOutputAttributes()) {
-      output_attributes.emplace_back(left_attr);
+    for (const E::AttributeReferencePtr &probe_attr : probe_child->getOutputAttributes()) {
+      output_attributes.emplace_back(probe_attr);
     }
-    for (const E::AttributeReferencePtr &right_attr : right_child->getOutputAttributes()) {
-      output_attributes.emplace_back(right_attr);
+    for (const E::AttributeReferencePtr &build_attr : build_child->getOutputAttributes()) {
+      output_attributes.emplace_back(build_attr);
     }
 
-    std::vector<E::AttributeReferencePtr> left_join_attributes;
-    std::vector<E::AttributeReferencePtr> right_join_attributes;
-    std::unordered_set<expressions::ExprId> new_joined_attribute_set;
-    for (const auto &join_attr_pair : first_table_info->join_attribute_pairs) {
-      if (second_table_info->join_attribute_pairs.find(join_attr_pair.second)
-              != second_table_info->join_attribute_pairs.end()) {
-        left_join_attributes.emplace_back(
-            attribute_id_to_reference_map[join_attr_pair.first]);
-        right_join_attributes.emplace_back(
-            attribute_id_to_reference_map[join_attr_pair.second]);
-
-        new_joined_attribute_set.emplace(join_attr_pair.first);
-        new_joined_attribute_set.emplace(join_attr_pair.second);
+    // Figure out the join attributes.
+    std::vector<E::AttributeReferencePtr> probe_attributes;
+    std::vector<E::AttributeReferencePtr> build_attributes;
+    const std::size_t probe_table_id = selected_probe_table_info->table_info_id;
+    const std::size_t build_table_id = selected_build_table_info->table_info_id;
+    for (const auto &attr_group_pair : join_attribute_groups) {
+      const auto &attr_group = attr_group_pair.second;
+      auto probe_it = attr_group.find(probe_table_id);
+      auto build_it = attr_group.find(build_table_id);
+      if (probe_it != attr_group.end() && build_it != attr_group.end()) {
+        probe_attributes.emplace_back(
+            attribute_id_to_reference_map.at(probe_it->second));
+        build_attributes.emplace_back(
+            attribute_id_to_reference_map.at(build_it->second));
       }
     }
-    DCHECK_GE(left_join_attributes.size(), static_cast<std::size_t>(1));
 
-    if (table_info_ordered_by_priority.size() > 0) {
+    // Create a hash join from the choosen probe/build pair and put it back to
+    // the table pool. Return the last table in the table pool if there is only
+    // one table left.
+    if (remaining_tables.size() > 0) {
       P::PhysicalPtr output =
-          P::HashJoin::Create(left_child,
-                              right_child,
-                              left_join_attributes,
-                              right_join_attributes,
+          P::HashJoin::Create(probe_child,
+                              build_child,
+                              probe_attributes,
+                              build_attributes,
                               nullptr,
                               output_attributes,
                               P::HashJoin::JoinType::kInnerJoin);
 
-      second_table_info->table = output;
+      selected_probe_table_info->table = output;
 
       // TODO(jianqiao): Cache the estimated cardinality for each plan in cost
       // model to avoid duplicated estimation.
-      second_table_info->estimated_cardinality = cost_model_->estimateCardinality(output);
-
-      second_table_info->join_attribute_pairs.insert(first_table_info->join_attribute_pairs.begin(),
-                                                     first_table_info->join_attribute_pairs.end());
-      second_table_info->joined_attribute_set.insert(first_table_info->joined_attribute_set.begin(),
-                                                     first_table_info->joined_attribute_set.end());
-      second_table_info->joined_attribute_set.insert(new_joined_attribute_set.begin(),
-                                                     new_joined_attribute_set.end());
-      table_info_ordered_by_priority.emplace(second_table_info);
-
-      join_graph[second_table_info->table_info_id].insert(join_graph[first_table_info_id].begin(),
-                                                          join_graph[first_table_info_id].end());
-
+      selected_probe_table_info->estimated_cardinality = cost_model_->estimateCardinality(output);
+      selected_probe_table_info->estimated_selectivity = cost_model_->estimateSelectivity(output);
+
+      selected_probe_table_info->estimated_num_output_attributes =
+          CountSharedAttributes(join_group.referenced_attributes,
+                                output->getOutputAttributes());
+
+      remaining_tables.emplace(selected_probe_table_info);
+
+      // Update join attribute groups.
+      for (auto &attr_group_pair : join_attribute_groups) {
+        auto &attr_group = attr_group_pair.second;
+        auto build_it = attr_group.find(build_table_id);
+        if (build_it != attr_group.end()) {
+          const E::ExprId attr_id = build_it->second;
+          attr_group.erase(build_it);
+          attr_group.emplace(probe_table_id, attr_id);
+        }
+      }
     } else {
-      return P::HashJoin::Create(left_child,
-                                 right_child,
-                                 left_join_attributes,
-                                 right_join_attributes,
+      return P::HashJoin::Create(probe_child,
+                                 build_child,
+                                 probe_attributes,
+                                 build_attributes,
                                  residual_predicate,
                                  project_expressions,
                                  P::HashJoin::JoinType::kInnerJoin);
@@ -307,5 +353,28 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
   }
 }
 
+std::size_t StarSchemaHashJoinOrderOptimization::CountSharedAttributes(
+    const std::unordered_set<expressions::ExprId> &attr_set1,
+    const std::vector<expressions::AttributeReferencePtr> &attr_set2) {
+  std::size_t cnt = 0;
+  for (const auto &attr : attr_set2) {
+    if (attr_set1.find(attr->id()) != attr_set1.end()) {
+      ++cnt;
+    }
+  }
+  return cnt;
+}
+
+std::size_t StarSchemaHashJoinOrderOptimization::getEstimatedNumGroups(
+    const physical::PhysicalPtr &input) {
+  P::AggregatePtr aggregate;
+  if (P::SomeAggregate::MatchesWithConditionalCast(input, &aggregate)) {
+    return cost_model_->estimateNumGroupsForAggregate(aggregate);
+  } else {
+    return 0;
+  }
+}
+
+
 }  // namespace optimizer
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7a464434/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
index c1a7bae..64e2478 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
@@ -20,16 +20,15 @@
 #ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_STAR_SCHEMA_HASH_JOIN_ORDER_OPTIMIZATION_HPP_
 #define QUICKSTEP_QUERY_OPTIMIZER_RULES_STAR_SCHEMA_HASH_JOIN_ORDER_OPTIMIZATION_HPP_
 
-#include <algorithm>
 #include <cstddef>
 #include <memory>
 #include <string>
-#include <unordered_map>
 #include <unordered_set>
 #include <utility>
 #include <vector>
 
 #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
 #include "query_optimizer/expressions/ExprId.hpp"
 #include "query_optimizer/expressions/NamedExpression.hpp"
 #include "query_optimizer/expressions/Predicate.hpp"
@@ -45,7 +44,11 @@ namespace optimizer {
  */
 
 /**
- * @brief TODO
+ * @brief Rule that applies to a physical plan to optimize hash join orders.
+ *
+ * This optimization applies a greedy algorithm to favor smaller cardinality
+ * and selective tables to be joined first, which is suitable for queries on
+ * star-schema or snowflake-schema tables.
  */
 class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
  public:
@@ -64,6 +67,7 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
    * @brief A group of tables to form a hash join tree.
    */
   struct JoinGroupInfo {
+    std::unordered_set<expressions::ExprId> referenced_attributes;
     std::vector<physical::PhysicalPtr> tables;
     std::vector<std::pair<expressions::ExprId, expressions::ExprId>> join_attribute_pairs;
   };
@@ -72,49 +76,91 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
    * @brief Auxiliary information of a table for the optimizer.
    */
   struct TableInfo {
-    TableInfo(const std::size_t in_table_info_id,
-              const physical::PhysicalPtr &in_table,
-              const std::size_t in_estimated_cardinality,
-              const double in_estimated_selectivity)
-        : table_info_id(in_table_info_id),
-          table(in_table),
-          estimated_cardinality(in_estimated_cardinality),
-          estimated_selectivity(in_estimated_selectivity) {
+    TableInfo(const std::size_t table_info_id_in,
+              const physical::PhysicalPtr &table_in,
+              const std::size_t estimated_cardinality_in,
+              const double estimated_selectivity_in,
+              const std::size_t estimated_num_output_attributes_in,
+              const bool is_aggregation_in)
+        : table_info_id(table_info_id_in),
+          table(table_in),
+          estimated_cardinality(estimated_cardinality_in),
+          estimated_selectivity(estimated_selectivity_in),
+          estimated_num_output_attributes(estimated_num_output_attributes_in) {
     }
 
     const std::size_t table_info_id;
     physical::PhysicalPtr table;
     std::size_t estimated_cardinality;
     double estimated_selectivity;
-    std::unordered_multimap<expressions::ExprId, expressions::ExprId> join_attribute_pairs;
-    std::unordered_set<expressions::ExprId> joined_attribute_set;
+    std::size_t estimated_num_output_attributes;
   };
 
-  /**
-   * @brief Comparator that compares the join priorities between two tables.
-   */
-  struct TableInfoPtrLessComparator {
-    inline bool operator() (const TableInfo *lhs, const TableInfo *rhs) {
-      bool swapped = false;
-      if (lhs->estimated_cardinality > rhs->estimated_cardinality) {
-        std::swap(lhs, rhs);
-        swapped = true;
+  struct JoinPair {
+    JoinPair(TableInfo *probe_in,
+             TableInfo *build_in,
+             const bool build_side_unique_in,
+             const std::size_t num_join_attributes_in)
+        : probe(probe_in),
+          build(build_in),
+          build_side_unique(build_side_unique_in),
+          num_join_attributes(num_join_attributes_in) {
+    }
+
+    inline bool isBetterThan(const JoinPair &rhs) const {
+      const auto &lhs = *this;
+
+      // Avoid carrying too many output attributes all the way through a long
+      // chain of hash joins.
+      const bool lhs_has_large_output =
+          lhs.build->estimated_num_output_attributes
+              + lhs.probe->estimated_num_output_attributes > 5;
+      const bool rhs_has_large_output =
+          rhs.build->estimated_num_output_attributes
+              + rhs.probe->estimated_num_output_attributes > 5;
+      if (lhs_has_large_output != rhs_has_large_output) {
+        return rhs_has_large_output;
+      }
+
+      // Prefer foreign-key primary-key style hash joins.
+      if (lhs.build_side_unique != rhs.build_side_unique) {
+        return lhs.build_side_unique;
+      }
+
+      // Prefer hash joins where the build side table is small.
+      const bool lhs_has_small_build = lhs.build->estimated_cardinality < 0x100;
+      const bool rhs_has_small_build = rhs.build->estimated_cardinality < 0x100;
+      if (lhs_has_small_build != rhs_has_small_build) {
+        return lhs_has_small_build;
       }
 
-      if (lhs->estimated_selectivity < rhs->estimated_selectivity) {
-        return !swapped;
-      } else if (lhs->estimated_cardinality < 100u &&
-                 rhs->estimated_cardinality > 10000u &&
-                 lhs->estimated_selectivity < rhs->estimated_selectivity * 1.5) {
-        return !swapped;
-      } else if (lhs->estimated_selectivity > rhs->estimated_selectivity) {
-        return swapped;
-      } else if (lhs->estimated_cardinality != rhs->estimated_cardinality) {
-        return !swapped;
+      // Prefer hash joins where the probe side table is small. This is effective
+      // for TPCH style (snowflake schema) queries, with the help of LIPFilters.
+      if (lhs.probe->estimated_cardinality != rhs.probe->estimated_cardinality) {
+        return lhs.probe->estimated_cardinality < rhs.probe->estimated_cardinality;
+      }
+
+      // Prefer build side tables with better selectivity. This is effective
+      // for SSB style queries.
+      if (lhs.build->estimated_selectivity != rhs.build->estimated_selectivity) {
+        return lhs.build->estimated_selectivity < rhs.build->estimated_selectivity;
+      }
+
+      // Residual rules that help provide a total order.
+      if (lhs.build->estimated_cardinality != rhs.build->estimated_cardinality) {
+        return lhs.build->estimated_cardinality < rhs.build->estimated_cardinality;
+      }
+      if (lhs.probe->table != rhs.probe->table) {
+        return lhs.probe->table < rhs.probe->table;
       } else {
-        return swapped ^ (lhs->table < rhs->table);
+        return lhs.build->table < rhs.build->table;
       }
     }
+
+    TableInfo *probe;
+    TableInfo *build;
+    const bool build_side_unique;
+    const std::size_t num_join_attributes;
   };
 
   physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input,
@@ -125,6 +171,12 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> {
       const expressions::PredicatePtr &residual_predicate,
       const std::vector<expressions::NamedExpressionPtr> &project_expressions);
 
+  std::size_t getEstimatedNumGroups(const physical::PhysicalPtr &input);
+
+  static std::size_t CountSharedAttributes(
+      const std::unordered_set<expressions::ExprId> &attr_set1,
+      const std::vector<expressions::AttributeReferencePtr> &attr_set2);
+
   std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
 
   DISALLOW_COPY_AND_ASSIGN(StarSchemaHashJoinOrderOptimization);



[11/12] incubator-quickstep git commit: Add LIPFilter feature.

Posted by ji...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/storage/AggregationOperationState.hpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.hpp b/storage/AggregationOperationState.hpp
index cbbfc22..a80bcb0 100644
--- a/storage/AggregationOperationState.hpp
+++ b/storage/AggregationOperationState.hpp
@@ -41,6 +41,7 @@ class AggregateFunction;
 class CatalogDatabaseLite;
 class CatalogRelationSchema;
 class InsertDestination;
+class LIPFilterAdaptiveProber;
 class StorageManager;
 
 /** \addtogroup Storage
@@ -156,7 +157,8 @@ class AggregationOperationState {
    * @param input_block The block ID of the storage block where the aggreates
    *        are going to be computed.
    **/
-  void aggregateBlock(const block_id input_block);
+  void aggregateBlock(const block_id input_block,
+                      LIPFilterAdaptiveProber *lip_filter_adaptive_prober);
 
   /**
    * @brief Generate the final results for the aggregates managed by this
@@ -179,8 +181,10 @@ class AggregationOperationState {
       const std::vector<std::unique_ptr<AggregationState>> &local_state);
 
   // Aggregate on input block.
-  void aggregateBlockSingleState(const block_id input_block);
-  void aggregateBlockHashTable(const block_id input_block);
+  void aggregateBlockSingleState(const block_id input_block,
+                                 LIPFilterAdaptiveProber *lip_filter_adaptive_prober);
+  void aggregateBlockHashTable(const block_id input_block,
+                               LIPFilterAdaptiveProber *lip_filter_adaptive_prober);
 
   void finalizeSingleState(InsertDestination *output_destination);
   void finalizeHashTable(InsertDestination *output_destination);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/storage/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt
index f05cc46..e85e005 100644
--- a/storage/CMakeLists.txt
+++ b/storage/CMakeLists.txt
@@ -643,7 +643,6 @@ target_link_libraries(quickstep_storage_FastHashTable
                       quickstep_threading_SpinSharedMutex
                       quickstep_types_Type
                       quickstep_types_TypedValue
-                      quickstep_utility_BloomFilter
                       quickstep_utility_HashPair
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_storage_FastHashTableFactory
@@ -659,7 +658,6 @@ target_link_libraries(quickstep_storage_FastHashTableFactory
                       quickstep_storage_SimpleScalarSeparateChainingHashTable
                       quickstep_storage_TupleReference
                       quickstep_types_TypeFactory
-                      quickstep_utility_BloomFilter
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_storage_FastSeparateChainingHashTable
                       quickstep_storage_FastHashTable

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/storage/FastHashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/FastHashTable.hpp b/storage/FastHashTable.hpp
index 4a95cd9..74d9ee3 100644
--- a/storage/FastHashTable.hpp
+++ b/storage/FastHashTable.hpp
@@ -39,7 +39,6 @@
 #include "threading/SpinSharedMutex.hpp"
 #include "types/Type.hpp"
 #include "types/TypedValue.hpp"
-#include "utility/BloomFilter.hpp"
 #include "utility/HashPair.hpp"
 #include "utility/Macros.hpp"
 
@@ -958,62 +957,6 @@ class FastHashTable : public HashTableBase<resizable,
   template <typename FunctorT>
   std::size_t forEachCompositeKeyFast(FunctorT *functor, int index) const;
 
-  /**
-   * @brief A call to this function will cause a bloom filter to be built
-   *        during the build phase of this hash table.
-   **/
-  inline void enableBuildSideBloomFilter() {
-    has_build_side_bloom_filter_ = true;
-  }
-
-  /**
-   * @brief A call to this function will cause a set of bloom filters to be
-   *        probed during the probe phase of this hash table.
-   **/
-  inline void enableProbeSideBloomFilter() {
-    has_probe_side_bloom_filter_ = true;
-  }
-
-  /**
-   * @brief This function sets the pointer to the bloom filter to be
-   *        used during the build phase of this hash table.
-   * @warning Should call enable_build_side_bloom_filter() first to enable
-   *          bloom filter usage during build phase.
-   * @note The ownership of the bloom filter lies with the caller.
-   *
-   * @param bloom_filter The pointer to the bloom filter.
-   **/
-  inline void setBuildSideBloomFilter(BloomFilter *bloom_filter) {
-    build_bloom_filter_ = bloom_filter;
-  }
-
-  /**
-   * @brief This function adds a pointer to the list of bloom filters to be
-   *        used during the probe phase of this hash table.
-   * @warning Should call enable_probe_side_bloom_filter() first to enable
-   *          bloom filter usage during probe phase.
-   * @note The ownership of the bloom filter lies with the caller.
-   *
-   * @param bloom_filter The pointer to the bloom filter.
-   **/
-  inline void addProbeSideBloomFilter(const BloomFilter *bloom_filter) {
-    probe_bloom_filters_.emplace_back(bloom_filter);
-  }
-
-  /**
-   * @brief This function adds a vector of attribute ids corresponding to a
-   *        bloom filter used during the probe phase of this hash table.
-   * @warning Should call enable_probe_side_bloom_filter() first to enable
-   *          bloom filter usage during probe phase.
-   *
-   * @param probe_attribute_ids The vector of attribute ids to use for probing
-   *        the bloom filter.
-   **/
-  inline void addProbeSideAttributeIds(
-      std::vector<attribute_id> &&probe_attribute_ids) {
-    probe_attribute_ids_.push_back(probe_attribute_ids);
-  }
-
  protected:
   /**
    * @brief Constructor for new resizable hash table.
@@ -1318,12 +1261,6 @@ class FastHashTable : public HashTableBase<resizable,
                                    const attribute_id key_attr_id,
                                    FunctorT *functor) const;
 
-  // Data structures used for bloom filter optimized semi-joins.
-  bool has_build_side_bloom_filter_ = false;
-  bool has_probe_side_bloom_filter_ = false;
-  BloomFilter *build_bloom_filter_;
-  std::vector<const BloomFilter *> probe_bloom_filters_;
-  std::vector<std::vector<attribute_id>> probe_attribute_ids_;
   DISALLOW_COPY_AND_ASSIGN(FastHashTable);
 };
 
@@ -1449,13 +1386,6 @@ FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>::
                 total_entries, total_variable_key_size, &prealloc_state);
           }
         }
-        std::unique_ptr<BloomFilter> thread_local_bloom_filter;
-        if (has_build_side_bloom_filter_) {
-          thread_local_bloom_filter.reset(
-              new BloomFilter(build_bloom_filter_->getRandomSeed(),
-                              build_bloom_filter_->getNumberOfHashes(),
-                              build_bloom_filter_->getBitArraySize()));
-        }
         if (resizable) {
           while (result == HashTablePutResult::kOutOfSpace) {
             {
@@ -1474,12 +1404,6 @@ FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>::
                     variable_size,
                     (*functor)(*accessor),
                     using_prealloc ? &prealloc_state : nullptr);
-                // Insert into bloom filter, if enabled.
-                if (has_build_side_bloom_filter_) {
-                  thread_local_bloom_filter->insertUnSafe(
-                      static_cast<const std::uint8_t *>(key.getDataPtr()),
-                      key.getDataSize());
-                }
                 if (result == HashTablePutResult::kDuplicateKey) {
                   DEBUG_ASSERT(!using_prealloc);
                   return result;
@@ -1507,22 +1431,11 @@ FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>::
                                   variable_size,
                                   (*functor)(*accessor),
                                   using_prealloc ? &prealloc_state : nullptr);
-            // Insert into bloom filter, if enabled.
-            if (has_build_side_bloom_filter_) {
-              thread_local_bloom_filter->insertUnSafe(
-                  static_cast<const std::uint8_t *>(key.getDataPtr()),
-                  key.getDataSize());
-            }
             if (result != HashTablePutResult::kOK) {
               return result;
             }
           }
         }
-        // Update the build side bloom filter with thread local copy, if
-        // available.
-        if (has_build_side_bloom_filter_) {
-          build_bloom_filter_->bitwiseOr(thread_local_bloom_filter.get());
-        }
 
         return HashTablePutResult::kOK;
       });
@@ -2462,52 +2375,27 @@ void FastHashTable<resizable,
   InvokeOnAnyValueAccessor(
       accessor,
       [&](auto *accessor) -> void {  // NOLINT(build/c++11)
-        while (accessor->next()) {
-          // Probe any bloom filters, if enabled.
-          if (has_probe_side_bloom_filter_) {
-            DCHECK_EQ(probe_bloom_filters_.size(), probe_attribute_ids_.size());
-            // Check if the key is contained in the BloomFilters or not.
-            bool bloom_miss = false;
-            for (std::size_t i = 0;
-                 i < probe_bloom_filters_.size() && !bloom_miss;
-                 ++i) {
-              const BloomFilter *bloom_filter = probe_bloom_filters_[i];
-              for (const attribute_id &attr_id : probe_attribute_ids_[i]) {
-                TypedValue bloom_key = accessor->getTypedValue(attr_id);
-                if (!bloom_filter->contains(static_cast<const std::uint8_t *>(
-                                                bloom_key.getDataPtr()),
-                                            bloom_key.getDataSize())) {
-                  bloom_miss = true;
-                  break;
-                }
-              }
-            }
-            if (bloom_miss) {
-              continue;  // On a bloom filter miss, probing the hash table can
-                         // be skipped.
-            }
-          }
-
-          TypedValue key = accessor->getTypedValue(key_attr_id);
-          if (check_for_null_keys && key.isNull()) {
-            continue;
-          }
-          const std::size_t true_hash = use_scalar_literal_hash_template
-                                            ? key.getHashScalarLiteral()
-                                            : key.getHash();
-          const std::size_t adjusted_hash =
-              adjust_hashes_template ? this->AdjustHash(true_hash) : true_hash;
-          std::size_t entry_num = 0;
-          const std::uint8_t *value;
-          while (this->getNextEntryForKey(
-              key, adjusted_hash, &value, &entry_num)) {
-            (*functor)(*accessor, *value);
-            if (!allow_duplicate_keys) {
-              break;
-            }
-          }
+    while (accessor->next()) {
+      TypedValue key = accessor->getTypedValue(key_attr_id);
+      if (check_for_null_keys && key.isNull()) {
+        continue;
+      }
+      const std::size_t true_hash = use_scalar_literal_hash_template
+                                        ? key.getHashScalarLiteral()
+                                        : key.getHash();
+      const std::size_t adjusted_hash =
+          adjust_hashes_template ? this->AdjustHash(true_hash) : true_hash;
+      std::size_t entry_num = 0;
+      const std::uint8_t *value;
+      while (this->getNextEntryForKey(
+          key, adjusted_hash, &value, &entry_num)) {
+        (*functor)(*accessor, *value);
+        if (!allow_duplicate_keys) {
+          break;
         }
-      });
+      }
+    }
+  });
 }
 
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/storage/FastHashTableFactory.hpp
----------------------------------------------------------------------
diff --git a/storage/FastHashTableFactory.hpp b/storage/FastHashTableFactory.hpp
index 6d0b693..682cc2a 100644
--- a/storage/FastHashTableFactory.hpp
+++ b/storage/FastHashTableFactory.hpp
@@ -32,7 +32,6 @@
 #include "storage/SimpleScalarSeparateChainingHashTable.hpp"
 #include "storage/TupleReference.hpp"
 #include "types/TypeFactory.hpp"
-#include "utility/BloomFilter.hpp"
 #include "utility/Macros.hpp"
 
 #include "glog/logging.h"
@@ -183,14 +182,11 @@ class FastHashTableFactory {
    * @param proto A protobuf description of a resizable HashTable.
    * @param storage_manager The StorageManager to use (a StorageBlob will be
    *        allocated to hold the HashTable's contents).
-   * @param bloom_filters A vector of pointers to bloom filters that may be used
-   *        during hash table construction in build/probe phase.
    * @return A new resizable HashTable with parameters specified by proto.
    **/
   static FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>*
       CreateResizableFromProto(const serialization::HashTable &proto,
-                               StorageManager *storage_manager,
-                               const std::vector<std::unique_ptr<BloomFilter>> &bloom_filters) {
+                               StorageManager *storage_manager) {
     DCHECK(ProtoIsValid(proto))
         << "Attempted to create HashTable from invalid proto description:\n"
         << proto.DebugString();
@@ -204,35 +200,6 @@ class FastHashTableFactory {
                                       key_types,
                                       proto.estimated_num_entries(),
                                       storage_manager);
-
-    // TODO(ssaurabh): These lazy initializations can be moved from here and pushed to the
-    //                 individual implementations of the hash table constructors.
-
-    // Check if there are any build side bloom filter defined on the hash table.
-    if (proto.build_side_bloom_filter_id_size() > 0) {
-      hash_table->enableBuildSideBloomFilter();
-      hash_table->setBuildSideBloomFilter(bloom_filters[proto.build_side_bloom_filter_id(0)].get());
-    }
-
-    // Check if there are any probe side bloom filters defined on the hash table.
-    if (proto.probe_side_bloom_filters_size() > 0) {
-      hash_table->enableProbeSideBloomFilter();
-      // Add as many probe bloom filters as defined by the proto.
-      for (int j = 0; j < proto.probe_side_bloom_filters_size(); ++j) {
-        // Add the pointer to the probe bloom filter within the list of probe bloom filters to use.
-        const auto probe_side_bloom_filter = proto.probe_side_bloom_filters(j);
-        hash_table->addProbeSideBloomFilter(bloom_filters[probe_side_bloom_filter.probe_side_bloom_filter_id()].get());
-
-        // Add the attribute ids corresponding to this probe bloom filter.
-        std::vector<attribute_id> probe_attribute_ids;
-        for (int k = 0; k < probe_side_bloom_filter.probe_side_attr_ids_size(); ++k) {
-          const attribute_id probe_attribute_id = probe_side_bloom_filter.probe_side_attr_ids(k);
-          probe_attribute_ids.push_back(probe_attribute_id);
-        }
-        hash_table->addProbeSideAttributeIds(std::move(probe_attribute_ids));
-      }
-    }
-
     return hash_table;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index f2dcb03..786a9bb 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -981,61 +981,6 @@ class HashTable : public HashTableBase<resizable,
   template <typename FunctorT>
   std::size_t forEachCompositeKey(FunctorT *functor) const;
 
-  /**
-   * @brief A call to this function will cause a bloom filter to be built
-   *        during the build phase of this hash table.
-   **/
-  inline void enableBuildSideBloomFilter() {
-    has_build_side_bloom_filter_ = true;
-  }
-
-  /**
-   * @brief A call to this function will cause a set of bloom filters to be
-   *        probed during the probe phase of this hash table.
-   **/
-  inline void enableProbeSideBloomFilter() {
-    has_probe_side_bloom_filter_ = true;
-  }
-
-  /**
-   * @brief This function sets the pointer to the bloom filter to be
-   *        used during the build phase of this hash table.
-   * @warning Should call enable_build_side_bloom_filter() first to enable
-   *          bloom filter usage during build phase.
-   * @note The ownership of the bloom filter lies with the caller.
-   *
-   * @param bloom_filter The pointer to the bloom filter.
-   **/
-  inline void setBuildSideBloomFilter(BloomFilter *bloom_filter) {
-    build_bloom_filter_ = bloom_filter;
-  }
-
-  /**
-   * @brief This function adds a pointer to the list of bloom filters to be
-   *        used during the probe phase of this hash table.
-   * @warning Should call enable_probe_side_bloom_filter() first to enable
-   *          bloom filter usage during probe phase.
-   * @note The ownership of the bloom filter lies with the caller.
-   *
-   * @param bloom_filter The pointer to the bloom filter.
-   **/
-  inline void addProbeSideBloomFilter(const BloomFilter *bloom_filter) {
-    probe_bloom_filters_.emplace_back(bloom_filter);
-  }
-
-  /**
-   * @brief This function adds a vector of attribute ids corresponding to a
-   *        bloom filter used during the probe phase of this hash table.
-   * @warning Should call enable_probe_side_bloom_filter() first to enable
-   *          bloom filter usage during probe phase.
-   *
-   * @param probe_attribute_ids The vector of attribute ids to use for probing
-   *        the bloom filter.
-   **/
-  inline void addProbeSideAttributeIds(std::vector<attribute_id> &&probe_attribute_ids) {
-    probe_attribute_ids_.push_back(probe_attribute_ids);
-  }
-
  protected:
   /**
    * @brief Constructor for new resizable hash table.
@@ -1316,13 +1261,6 @@ class HashTable : public HashTableBase<resizable,
                                    const attribute_id key_attr_id,
                                    FunctorT *functor) const;
 
-  // Data structures used for bloom filter optimized semi-joins.
-  bool has_build_side_bloom_filter_ = false;
-  bool has_probe_side_bloom_filter_ = false;
-  BloomFilter *build_bloom_filter_;
-  std::vector<const BloomFilter*> probe_bloom_filters_;
-  std::vector<std::vector<attribute_id>> probe_attribute_ids_;
-
   DISALLOW_COPY_AND_ASSIGN(HashTable);
 };
 
@@ -1467,12 +1405,6 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy, al
                                                         &prealloc_state);
       }
     }
-    std::unique_ptr<BloomFilter> thread_local_bloom_filter;
-    if (has_build_side_bloom_filter_) {
-      thread_local_bloom_filter.reset(new BloomFilter(build_bloom_filter_->getRandomSeed(),
-                                                      build_bloom_filter_->getNumberOfHashes(),
-                                                      build_bloom_filter_->getBitArraySize()));
-    }
     if (resizable) {
       while (result == HashTablePutResult::kOutOfSpace) {
         {
@@ -1488,11 +1420,6 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy, al
                                        variable_size,
                                        (*functor)(*accessor),
                                        using_prealloc ? &prealloc_state : nullptr);
-            // Insert into bloom filter, if enabled.
-            if (has_build_side_bloom_filter_) {
-              thread_local_bloom_filter->insertUnSafe(static_cast<const std::uint8_t *>(key.getDataPtr()),
-                                                      key.getDataSize());
-            }
             if (result == HashTablePutResult::kDuplicateKey) {
               DEBUG_ASSERT(!using_prealloc);
               return result;
@@ -1518,20 +1445,11 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy, al
                                    variable_size,
                                    (*functor)(*accessor),
                                    using_prealloc ? &prealloc_state : nullptr);
-        // Insert into bloom filter, if enabled.
-        if (has_build_side_bloom_filter_) {
-          thread_local_bloom_filter->insertUnSafe(static_cast<const std::uint8_t *>(key.getDataPtr()),
-                                                  key.getDataSize());
-        }
         if (result != HashTablePutResult::kOK) {
           return result;
         }
       }
     }
-    // Update the build side bloom filter with thread local copy, if available.
-    if (has_build_side_bloom_filter_) {
-      build_bloom_filter_->bitwiseOr(thread_local_bloom_filter.get());
-    }
 
     return HashTablePutResult::kOK;
   });
@@ -2237,27 +2155,6 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_
       accessor,
       [&](auto *accessor) -> void {  // NOLINT(build/c++11)
     while (accessor->next()) {
-      // Probe any bloom filters, if enabled.
-      if (has_probe_side_bloom_filter_) {
-        DCHECK_EQ(probe_bloom_filters_.size(), probe_attribute_ids_.size());
-        // Check if the key is contained in the BloomFilters or not.
-        bool bloom_miss = false;
-        for (std::size_t i = 0; i < probe_bloom_filters_.size() && !bloom_miss; ++i) {
-          const BloomFilter *bloom_filter = probe_bloom_filters_[i];
-          for (const attribute_id &attr_id : probe_attribute_ids_[i]) {
-            TypedValue bloom_key = accessor->getTypedValue(attr_id);
-            if (!bloom_filter->contains(static_cast<const std::uint8_t*>(bloom_key.getDataPtr()),
-                                        bloom_key.getDataSize())) {
-              bloom_miss = true;
-              break;
-            }
-          }
-        }
-        if (bloom_miss) {
-          continue;  // On a bloom filter miss, probing the hash table can be skipped.
-        }
-      }
-
       TypedValue key = accessor->getTypedValue(key_attr_id);
       if (check_for_null_keys && key.isNull()) {
         continue;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/storage/HashTable.proto
----------------------------------------------------------------------
diff --git a/storage/HashTable.proto b/storage/HashTable.proto
index ade30d8..1d4ccb0 100644
--- a/storage/HashTable.proto
+++ b/storage/HashTable.proto
@@ -34,10 +34,4 @@ message HashTable {
   required HashTableImplType hash_table_impl_type = 1;
   repeated Type key_types = 2;
   required uint64 estimated_num_entries = 3;
-  repeated uint32 build_side_bloom_filter_id = 4;
-  message ProbeSideBloomFilter {
-    required uint32 probe_side_bloom_filter_id = 1;
-    repeated uint32 probe_side_attr_ids = 2;
-  }
-  repeated ProbeSideBloomFilter probe_side_bloom_filters = 6;
 }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/storage/HashTableFactory.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTableFactory.hpp b/storage/HashTableFactory.hpp
index 40b39de..d690557 100644
--- a/storage/HashTableFactory.hpp
+++ b/storage/HashTableFactory.hpp
@@ -295,14 +295,11 @@ class HashTableFactory {
    * @param proto A protobuf description of a resizable HashTable.
    * @param storage_manager The StorageManager to use (a StorageBlob will be
    *        allocated to hold the HashTable's contents).
-   * @param bloom_filters A vector of pointers to bloom filters that may be used
-   *        during hash table construction in build/probe phase.
    * @return A new resizable HashTable with parameters specified by proto.
    **/
   static HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_keys>*
       CreateResizableFromProto(const serialization::HashTable &proto,
-                               StorageManager *storage_manager,
-                               const std::vector<std::unique_ptr<BloomFilter>> &bloom_filters) {
+                               StorageManager *storage_manager) {
     DCHECK(ProtoIsValid(proto))
         << "Attempted to create HashTable from invalid proto description:\n"
         << proto.DebugString();
@@ -316,35 +313,6 @@ class HashTableFactory {
                                       key_types,
                                       proto.estimated_num_entries(),
                                       storage_manager);
-
-    // TODO(ssaurabh): These lazy initializations can be moved from here and pushed to the
-    //                 individual implementations of the hash table constructors.
-
-    // Check if there are any build side bloom filter defined on the hash table.
-    if (proto.build_side_bloom_filter_id_size() > 0) {
-      hash_table->enableBuildSideBloomFilter();
-      hash_table->setBuildSideBloomFilter(bloom_filters[proto.build_side_bloom_filter_id(0)].get());
-    }
-
-    // Check if there are any probe side bloom filters defined on the hash table.
-    if (proto.probe_side_bloom_filters_size() > 0) {
-      hash_table->enableProbeSideBloomFilter();
-      // Add as many probe bloom filters as defined by the proto.
-      for (int j = 0; j < proto.probe_side_bloom_filters_size(); ++j) {
-        // Add the pointer to the probe bloom filter within the list of probe bloom filters to use.
-        const auto probe_side_bloom_filter = proto.probe_side_bloom_filters(j);
-        hash_table->addProbeSideBloomFilter(bloom_filters[probe_side_bloom_filter.probe_side_bloom_filter_id()].get());
-
-        // Add the attribute ids corresponding to this probe bloom filter.
-        std::vector<attribute_id> probe_attribute_ids;
-        for (int k = 0; k < probe_side_bloom_filter.probe_side_attr_ids_size(); ++k) {
-          const attribute_id probe_attribute_id = probe_side_bloom_filter.probe_side_attr_ids(k);
-          probe_attribute_ids.push_back(probe_attribute_id);
-        }
-        hash_table->addProbeSideAttributeIds(std::move(probe_attribute_ids));
-      }
-    }
-
     return hash_table;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/storage/StorageBlock.cpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.cpp b/storage/StorageBlock.cpp
index ec5990f..7c16c34 100644
--- a/storage/StorageBlock.cpp
+++ b/storage/StorageBlock.cpp
@@ -60,6 +60,7 @@
 #include "types/containers/Tuple.hpp"
 #include "types/operations/comparisons/ComparisonUtil.hpp"
 #include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilterAdaptiveProber.hpp"
 
 #include "glog/logging.h"
 
@@ -341,20 +342,30 @@ void StorageBlock::sample(const bool is_block_sample,
 
 void StorageBlock::select(const vector<unique_ptr<const Scalar>> &selection,
                           const Predicate *predicate,
-                          InsertDestinationInterface *destination) const {
+                          InsertDestinationInterface *destination,
+                          LIPFilterAdaptiveProber *lip_filter_adaptive_prober) const {
   ColumnVectorsValueAccessor temp_result;
   {
     SubBlocksReference sub_blocks_ref(*tuple_store_,
                                       indices_,
                                       indices_consistent_);
 
+    std::unique_ptr<ValueAccessor> base_accessor(tuple_store_->createValueAccessor());
     std::unique_ptr<TupleIdSequence> matches;
+
+    if (lip_filter_adaptive_prober != nullptr) {
+      matches.reset(lip_filter_adaptive_prober->filterValueAccessor(base_accessor.get()));
+    }
+
+    if (predicate != nullptr) {
+      matches.reset(getMatchesForPredicate(predicate));
+    }
+
     std::unique_ptr<ValueAccessor> accessor;
-    if (predicate == nullptr) {
-      accessor.reset(tuple_store_->createValueAccessor());
+    if (matches == nullptr) {
+      accessor.reset(base_accessor.release());
     } else {
-      matches.reset(getMatchesForPredicate(predicate));
-      accessor.reset(tuple_store_->createValueAccessor(matches.get()));
+      accessor.reset(base_accessor->createSharedTupleIdSequenceAdapterVirtual(*matches));
     }
 
     for (vector<unique_ptr<const Scalar>>::const_iterator selection_cit = selection.begin();
@@ -371,14 +382,24 @@ void StorageBlock::select(const vector<unique_ptr<const Scalar>> &selection,
 
 void StorageBlock::selectSimple(const std::vector<attribute_id> &selection,
                                 const Predicate *predicate,
-                                InsertDestinationInterface *destination) const {
-  std::unique_ptr<ValueAccessor> accessor;
+                                InsertDestinationInterface *destination,
+                                LIPFilterAdaptiveProber *lip_filter_adaptive_prober) const {
+  std::unique_ptr<ValueAccessor> base_accessor(tuple_store_->createValueAccessor());
   std::unique_ptr<TupleIdSequence> matches;
-  if (predicate == nullptr) {
-    accessor.reset(tuple_store_->createValueAccessor());
-  } else {
+
+  if (lip_filter_adaptive_prober != nullptr) {
+    matches.reset(lip_filter_adaptive_prober->filterValueAccessor(base_accessor.get()));
+  }
+
+  if (predicate != nullptr) {
     matches.reset(getMatchesForPredicate(predicate));
-    accessor.reset(tuple_store_->createValueAccessor(matches.get()));
+  }
+
+  std::unique_ptr<ValueAccessor> accessor;
+  if (matches == nullptr) {
+    accessor.reset(base_accessor.release());
+  } else {
+    accessor.reset(base_accessor->createSharedTupleIdSequenceAdapterVirtual(*matches));
   }
 
   destination->bulkInsertTuplesWithRemappedAttributes(selection,
@@ -389,37 +410,28 @@ AggregationState* StorageBlock::aggregate(
     const AggregationHandle &handle,
     const std::vector<std::unique_ptr<const Scalar>> &arguments,
     const std::vector<attribute_id> *arguments_as_attributes,
-    const Predicate *predicate,
-    std::unique_ptr<TupleIdSequence> *reuse_matches) const {
-  // If there is a filter predicate that hasn't already been evaluated,
-  // evaluate it now and save the results for other aggregates on this same
-  // block.
-  if (predicate && !*reuse_matches) {
-    reuse_matches->reset(getMatchesForPredicate(predicate));
-  }
-
+    const TupleIdSequence *filter) const {
 #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
   // If all the arguments to this aggregate are plain relation attributes,
   // aggregate directly on a ValueAccessor from this block to avoid a copy.
   if ((arguments_as_attributes != nullptr) && (!arguments_as_attributes->empty())) {
     DCHECK_EQ(arguments.size(), arguments_as_attributes->size())
         << "Mismatch between number of arguments and number of attribute_ids";
-    return aggregateHelperValueAccessor(handle, *arguments_as_attributes, reuse_matches->get());
+    return aggregateHelperValueAccessor(handle, *arguments_as_attributes, filter);
   }
   // TODO(shoban): We may want to optimize for ScalarLiteral here.
 #endif
 
   // Call aggregateHelperColumnVector() to materialize each argument as a
   // ColumnVector, then aggregate over those.
-  return aggregateHelperColumnVector(handle, arguments, reuse_matches->get());
+  return aggregateHelperColumnVector(handle, arguments, filter);
 }
 
 void StorageBlock::aggregateGroupBy(
     const std::vector<std::vector<std::unique_ptr<const Scalar>>> &arguments,
     const std::vector<std::unique_ptr<const Scalar>> &group_by,
-    const Predicate *predicate,
+    const TupleIdSequence *filter,
     AggregationStateHashTableBase *hash_table,
-    std::unique_ptr<TupleIdSequence> *reuse_matches,
     std::vector<std::unique_ptr<ColumnVector>> *reuse_group_by_vectors) const {
   DCHECK_GT(group_by.size(), 0u)
       << "Called aggregateGroupBy() with zero GROUP BY expressions";
@@ -438,23 +450,7 @@ void StorageBlock::aggregateGroupBy(
   // this aggregate, as well as the GROUP BY expression values.
   ColumnVectorsValueAccessor temp_result;
   {
-    std::unique_ptr<ValueAccessor> accessor;
-    if (predicate) {
-      if (!*reuse_matches) {
-        // If there is a filter predicate that hasn't already been evaluated,
-        // evaluate it now and save the results for other aggregates on this
-        // same block.
-        reuse_matches->reset(getMatchesForPredicate(predicate));
-      }
-
-      // Create a filtered ValueAccessor that only iterates over predicate
-      // matches.
-      accessor.reset(tuple_store_->createValueAccessor(reuse_matches->get()));
-    } else {
-      // Create a ValueAccessor that iterates over all tuples in this block
-      accessor.reset(tuple_store_->createValueAccessor());
-    }
-
+    std::unique_ptr<ValueAccessor> accessor(tuple_store_->createValueAccessor(filter));
     attribute_id attr_id = 0;
 
     // First, put GROUP BY keys into 'temp_result'.
@@ -503,9 +499,8 @@ void StorageBlock::aggregateDistinct(
     const std::vector<std::unique_ptr<const Scalar>> &arguments,
     const std::vector<attribute_id> *arguments_as_attributes,
     const std::vector<std::unique_ptr<const Scalar>> &group_by,
-    const Predicate *predicate,
+    const TupleIdSequence *filter,
     AggregationStateHashTableBase *distinctify_hash_table,
-    std::unique_ptr<TupleIdSequence> *reuse_matches,
     std::vector<std::unique_ptr<ColumnVector>> *reuse_group_by_vectors) const {
   DCHECK_GT(arguments.size(), 0u)
       << "Called aggregateDistinct() with zero argument expressions";
@@ -517,22 +512,7 @@ void StorageBlock::aggregateDistinct(
   // this aggregate, as well as the GROUP BY expression values.
   ColumnVectorsValueAccessor temp_result;
   {
-    std::unique_ptr<ValueAccessor> accessor;
-    if (predicate) {
-      if (!*reuse_matches) {
-        // If there is a filter predicate that hasn't already been evaluated,
-        // evaluate it now and save the results for other aggregates on this
-        // same block.
-        reuse_matches->reset(getMatchesForPredicate(predicate));
-      }
-
-      // Create a filtered ValueAccessor that only iterates over predicate
-      // matches.
-      accessor.reset(tuple_store_->createValueAccessor(reuse_matches->get()));
-    } else {
-      // Create a ValueAccessor that iterates over all tuples in this block
-      accessor.reset(tuple_store_->createValueAccessor());
-    }
+    std::unique_ptr<ValueAccessor> accessor(tuple_store_->createValueAccessor(filter));
 
 #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
     // If all the arguments to this aggregate are plain relation attributes,
@@ -1246,23 +1226,36 @@ bool StorageBlock::rebuildIndexes(bool short_circuit) {
   return all_indices_consistent_;
 }
 
-TupleIdSequence* StorageBlock::getMatchesForPredicate(const Predicate *predicate) const {
+TupleIdSequence* StorageBlock::getMatchesForPredicate(const Predicate *predicate,
+                                                      const TupleIdSequence *filter) const {
   if (predicate == nullptr) {
-    return tuple_store_->getExistenceMap();
+    TupleIdSequence *matched = tuple_store_->getExistenceMap();
+    if (filter != nullptr) {
+      matched->intersectWith(*filter);
+    }
+    return matched;
   }
 
   std::unique_ptr<ValueAccessor> value_accessor(tuple_store_->createValueAccessor());
-  std::unique_ptr<TupleIdSequence> existence_map;
-  if (!tuple_store_->isPacked()) {
-    existence_map.reset(tuple_store_->getExistenceMap());
-  }
   SubBlocksReference sub_blocks_ref(*tuple_store_,
                                     indices_,
                                     indices_consistent_);
-  return predicate->getAllMatches(value_accessor.get(),
-                                  &sub_blocks_ref,
-                                  nullptr,
-                                  existence_map.get());
+
+  if (!tuple_store_->isPacked()) {
+    std::unique_ptr<TupleIdSequence> existence_map(tuple_store_->getExistenceMap());
+    if (filter != nullptr) {
+      existence_map->intersectWith(*filter);
+    }
+    return predicate->getAllMatches(value_accessor.get(),
+                                    &sub_blocks_ref,
+                                    nullptr,
+                                    existence_map.get());
+  } else {
+    return predicate->getAllMatches(value_accessor.get(),
+                                    &sub_blocks_ref,
+                                    nullptr,
+                                    filter);
+  }
 }
 
 std::unordered_map<attribute_id, TypedValue>* StorageBlock::generateUpdatedValues(

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/storage/StorageBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.hpp b/storage/StorageBlock.hpp
index bab5bab..77fb137 100644
--- a/storage/StorageBlock.hpp
+++ b/storage/StorageBlock.hpp
@@ -44,6 +44,7 @@ class AggregationState;
 class CatalogRelationSchema;
 class ColumnVector;
 class InsertDestinationInterface;
+class LIPFilterAdaptiveProber;
 class Predicate;
 class Scalar;
 class StorageBlockLayout;
@@ -312,6 +313,9 @@ class StorageBlock : public StorageBlockBase {
       const std::vector<attribute_id> &attribute_map,
       ValueAccessor *accessor);
 
+  TupleIdSequence* getMatchesForPredicate(const Predicate *predicate,
+                                          const TupleIdSequence *filter = nullptr) const;
+
   /**
    * @brief Perform a random sampling of data on  the StorageBlock. The number
    *       of records sampled is determined by the sample percentage in case of
@@ -349,7 +353,8 @@ class StorageBlock : public StorageBlockBase {
    **/
   void select(const std::vector<std::unique_ptr<const Scalar>> &selection,
               const Predicate *predicate,
-              InsertDestinationInterface *destination) const;
+              InsertDestinationInterface *destination,
+              LIPFilterAdaptiveProber *lip_filter_adaptive_prober) const;
 
   /**
    * @brief Perform a simple SELECT query on this StorageBlock which only
@@ -372,7 +377,8 @@ class StorageBlock : public StorageBlockBase {
    **/
   void selectSimple(const std::vector<attribute_id> &selection,
                     const Predicate *predicate,
-                    InsertDestinationInterface *destination) const;
+                    InsertDestinationInterface *destination,
+                    LIPFilterAdaptiveProber *lip_filter_adaptive_prober) const;
 
   /**
    * @brief Perform non GROUP BY aggregation on the tuples in the this storage
@@ -412,8 +418,7 @@ class StorageBlock : public StorageBlockBase {
       const AggregationHandle &handle,
       const std::vector<std::unique_ptr<const Scalar>> &arguments,
       const std::vector<attribute_id> *arguments_as_attributes,
-      const Predicate *predicate,
-      std::unique_ptr<TupleIdSequence> *reuse_matches) const;
+      const TupleIdSequence *filter) const;
 
   /**
    * @brief Perform GROUP BY aggregation on the tuples in the this storage
@@ -461,9 +466,8 @@ class StorageBlock : public StorageBlockBase {
   void aggregateGroupBy(
       const std::vector<std::vector<std::unique_ptr<const Scalar>>> &arguments,
       const std::vector<std::unique_ptr<const Scalar>> &group_by,
-      const Predicate *predicate,
+      const TupleIdSequence *filter,
       AggregationStateHashTableBase *hash_table,
-      std::unique_ptr<TupleIdSequence> *reuse_matches,
       std::vector<std::unique_ptr<ColumnVector>> *reuse_group_by_vectors) const;
 
   /**
@@ -505,9 +509,8 @@ class StorageBlock : public StorageBlockBase {
                          const std::vector<std::unique_ptr<const Scalar>> &arguments,
                          const std::vector<attribute_id> *arguments_as_attributes,
                          const std::vector<std::unique_ptr<const Scalar>> &group_by,
-                         const Predicate *predicate,
+                         const TupleIdSequence *filter,
                          AggregationStateHashTableBase *distinctify_hash_table,
-                         std::unique_ptr<TupleIdSequence> *reuse_matches,
                          std::vector<std::unique_ptr<ColumnVector>> *reuse_group_by_vectors) const;
 
   /**
@@ -627,8 +630,6 @@ class StorageBlock : public StorageBlockBase {
   // StorageBlock's header.
   bool rebuildIndexes(bool short_circuit);
 
-  TupleIdSequence* getMatchesForPredicate(const Predicate *predicate) const;
-
   std::unordered_map<attribute_id, TypedValue>* generateUpdatedValues(
       const ValueAccessor &accessor,
       const tuple_id tuple,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/utility/DAG.hpp
----------------------------------------------------------------------
diff --git a/utility/DAG.hpp b/utility/DAG.hpp
index a1f2619..b35f2b5 100644
--- a/utility/DAG.hpp
+++ b/utility/DAG.hpp
@@ -293,8 +293,10 @@ class DAG {
      *                      node at node_index.
      **/
      inline void addDependent(const size_type_nodes node_index, const LinkMetadataT &link_metadata) {
-       DCHECK(dependents_with_metadata_.find(node_index) == dependents_with_metadata_.end());
-       dependents_with_metadata_.emplace(node_index, link_metadata);
+//       DCHECK(dependents_with_metadata_.find(node_index) == dependents_with_metadata_.end());
+//       dependents_with_metadata_.emplace(node_index, link_metadata);
+       // TODO(jianqiao): implement upsert
+       dependents_with_metadata_[node_index] = link_metadata;
      }
 
     /**

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/utility/lip_filter/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/lip_filter/CMakeLists.txt b/utility/lip_filter/CMakeLists.txt
index 2232abe..d78f5cd 100644
--- a/utility/lip_filter/CMakeLists.txt
+++ b/utility/lip_filter/CMakeLists.txt
@@ -15,5 +15,44 @@
 # specific language governing permissions and limitations
 # under the License.
 
+QS_PROTOBUF_GENERATE_CPP(utility_lipfilter_LIPFilter_proto_srcs
+                         utility_lipfilter_LIPFilter_proto_hdrs
+                         LIPFilter.proto)
+
 # Declare micro-libs:
-add_library(quickstep_utility_lipfilter_LIPFilter ../../empty_src.cpp LIPFilter.hpp)
\ No newline at end of file
+add_library(quickstep_utility_lipfilter_LIPFilter LIPFilter.cpp LIPFilter.hpp)
+add_library(quickstep_utility_lipfilter_LIPFilterAdaptiveProber ../../empty_src.cpp LIPFilterAdaptiveProber.hpp)
+add_library(quickstep_utility_lipfilter_LIPFilterBuilder ../../empty_src.cpp LIPFilterBuilder.hpp)
+add_library(quickstep_utility_lipfilter_LIPFilterDeployment LIPFilterDeployment.cpp LIPFilterDeployment.hpp)
+add_library(quickstep_utility_lipfilter_LIPFilterFactory LIPFilterFactory.cpp LIPFilterFactory.hpp)
+add_library(quickstep_utility_lipfilter_LIPFilter_proto
+            ${utility_lipfilter_LIPFilter_proto_srcs})
+add_library(quickstep_utility_lipfilter_SingleIdentityHashFilter ../../empty_src.cpp SingleIdentityHashFilter.hpp)
+
+# Link dependencies:
+target_link_libraries(quickstep_utility_lipfilter_LIPFilter
+                      quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_lipfilter_LIPFilterAdaptiveProber
+                      quickstep_catalog_CatalogTypedefs
+                      quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_lipfilter_LIPFilterBuilder
+                      quickstep_catalog_CatalogTypedefs
+                      quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_lipfilter_LIPFilterDeployment
+                      quickstep_catalog_CatalogTypedefs
+                      quickstep_types_TypeFactory
+                      quickstep_utility_Macros
+                      quickstep_utility_lipfilter_LIPFilter
+                      quickstep_utility_lipfilter_LIPFilter_proto)
+target_link_libraries(quickstep_utility_lipfilter_LIPFilterFactory
+                      quickstep_utility_lipfilter_LIPFilter
+                      quickstep_utility_lipfilter_LIPFilter_proto
+                      quickstep_utility_lipfilter_SingleIdentityHashFilter
+                      quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_lipfilter_LIPFilter_proto
+                      ${PROTOBUF_LIBRARY}
+                      quickstep_types_Type_proto)
+target_link_libraries(quickstep_utility_lipfilter_SingleIdentityHashFilter
+                      quickstep_storage_StorageConstants
+                      quickstep_utility_lipfilter_LIPFilter
+                      quickstep_utility_Macros)

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/utility/lip_filter/LIPFilter.cpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilter.cpp b/utility/lip_filter/LIPFilter.cpp
new file mode 100644
index 0000000..92bfab1
--- /dev/null
+++ b/utility/lip_filter/LIPFilter.cpp
@@ -0,0 +1,24 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "utility/lip_filter/LIPFilter.hpp"
+
+namespace quickstep {
+
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/utility/lip_filter/LIPFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilter.hpp b/utility/lip_filter/LIPFilter.hpp
index 33165ed..0df3c18 100644
--- a/utility/lip_filter/LIPFilter.hpp
+++ b/utility/lip_filter/LIPFilter.hpp
@@ -20,8 +20,20 @@
 #ifndef QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_HPP_
 #define QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_HPP_
 
+#include <cstddef>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "storage/StorageBlockInfo.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
 namespace quickstep {
 
+class Type;
+class ValueAccessor;
+
 /** \addtogroup Utility
  *  @{
  */
@@ -32,6 +44,35 @@ enum class LIPFilterType {
   kSingleIdentityHashFilter
 };
 
+class LIPFilter {
+ public:
+  LIPFilterType getType() const {
+    return type_;
+  }
+
+  virtual void insertValueAccessor(ValueAccessor *accessor,
+                                   const attribute_id attr_id,
+                                   const Type *attr_type) = 0;
+
+  virtual std::size_t filterBatch(ValueAccessor *accessor,
+                                  const attribute_id attr_id,
+                                  const bool is_attr_nullable,
+                                  std::vector<tuple_id> *batch,
+                                  const std::size_t batch_size) const = 0;
+
+  virtual std::size_t onesCount() const = 0;
+
+ protected:
+  LIPFilter(const LIPFilterType &type)
+      : type_(type) {
+  }
+
+ private:
+  LIPFilterType type_;
+
+  DISALLOW_COPY_AND_ASSIGN(LIPFilter);
+};
+
 /** @} */
 
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/utility/lip_filter/LIPFilter.proto
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilter.proto b/utility/lip_filter/LIPFilter.proto
new file mode 100644
index 0000000..def13dd
--- /dev/null
+++ b/utility/lip_filter/LIPFilter.proto
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+syntax = "proto2";
+
+package quickstep.serialization;
+
+import "types/Type.proto";
+
+enum LIPFilterType {
+  BLOOM_FILTER = 1;
+  EXACT_FILTER = 2;
+  SINGLE_IDENTITY_HASH_FILTER = 3;
+}
+
+message LIPFilter {
+  required LIPFilterType lip_filter_type = 1;
+
+  extensions 16 to max;
+}
+
+message SingleIdentityHashFilter {
+  extend LIPFilter {
+    // All required
+    optional uint64 filter_cardinality = 16;
+    optional uint64 attribute_size = 17;
+  }
+}
+
+enum LIPFilterActionType {
+  BUILD = 1;
+  PROBE = 2;
+}
+
+message LIPFilterDeployment {
+  message Entry {
+    required uint32 lip_filter_id = 1;
+    required int32 attribute_id = 2;
+    required Type attribute_type = 3;
+  }
+
+  required LIPFilterActionType action_type = 1;
+  repeated Entry entries = 2;
+}

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/utility/lip_filter/LIPFilterAdaptiveProber.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilterAdaptiveProber.hpp b/utility/lip_filter/LIPFilterAdaptiveProber.hpp
new file mode 100644
index 0000000..af42446
--- /dev/null
+++ b/utility/lip_filter/LIPFilterAdaptiveProber.hpp
@@ -0,0 +1,188 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_ADAPTIVE_PROBER_HPP_
+#define QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_ADAPTIVE_PROBER_HPP_
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "storage/StorageBlockInfo.hpp"
+#include "storage/TupleIdSequence.hpp"
+#include "storage/ValueAccessor.hpp"
+#include "storage/ValueAccessorUtil.hpp"
+#include "types/Type.hpp"
+#include "utility/Macros.hpp"
+#include "utility/lip_filter/SingleIdentityHashFilter.hpp"
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+class LIPFilterAdaptiveProber {
+ public:
+  LIPFilterAdaptiveProber(const std::vector<LIPFilter *> &lip_filters,
+                          const std::vector<attribute_id> &attr_ids,
+                          const std::vector<const Type *> &attr_types) {
+    DCHECK_EQ(lip_filters.size(), attr_ids.size());
+    DCHECK_EQ(lip_filters.size(), attr_types.size());
+
+    probe_entries_.reserve(lip_filters.size());
+    for (std::size_t i = 0; i < lip_filters.size(); ++i) {
+      probe_entries_.emplace_back(
+          new ProbeEntry(lip_filters[i], attr_ids[i], attr_types[i]));
+    }
+  }
+
+  ~LIPFilterAdaptiveProber() {
+    for (ProbeEntry *entry : probe_entries_) {
+      delete entry;
+    }
+  }
+
+  TupleIdSequence* filterValueAccessor(ValueAccessor *accessor) {
+    const TupleIdSequence *existence_map = accessor->getTupleIdSequenceVirtual();
+    if (existence_map == nullptr) {
+      return filterValueAccessorNoExistenceMap(accessor);
+    } else {
+      return filterValueAccessorWithExistenceMap(accessor, existence_map);
+    }
+  }
+
+ private:
+  struct ProbeEntry {
+    ProbeEntry(const LIPFilter *lip_filter_in,
+               const attribute_id attr_id_in,
+               const Type *attr_type_in)
+        : lip_filter(lip_filter_in),
+          attr_id(attr_id_in),
+          attr_type(attr_type_in),
+          miss(0),
+          cnt(0) {
+    }
+    static bool isBetterThan(const ProbeEntry *a,
+                             const ProbeEntry *b) {
+      return a->miss_rate > b->miss_rate;
+    }
+    const LIPFilter *lip_filter;
+    const attribute_id attr_id;
+    const Type *attr_type;
+    std::uint32_t miss;
+    std::uint32_t cnt;
+    float miss_rate;
+  };
+
+
+  inline TupleIdSequence* filterValueAccessorNoExistenceMap(ValueAccessor *accessor) {
+    const std::uint32_t num_tuples = accessor->getNumTuplesVirtual();
+    std::unique_ptr<TupleIdSequence> matches(new TupleIdSequence(num_tuples));
+    std::uint32_t next_batch_size = 64u;
+    std::vector<tuple_id> batch(num_tuples);
+
+    std::uint32_t batch_start = 0;
+    do {
+      const std::uint32_t batch_size =
+          std::min(next_batch_size, num_tuples - batch_start);
+      for (std::uint32_t i = 0; i < batch_size; ++i) {
+        batch[i] = batch_start + i;
+      }
+
+      const std::uint32_t num_hits = filterBatch(accessor, &batch, batch_size);
+      for (std::uint32_t i = 0; i < num_hits; ++i) {
+        matches->set(batch[i], true);
+      }
+
+      batch_start += batch_size;
+      next_batch_size *= 2;
+    } while (batch_start < num_tuples);
+
+    return matches.release();
+  }
+
+  inline TupleIdSequence* filterValueAccessorWithExistenceMap(ValueAccessor *accessor,
+                                                              const TupleIdSequence *existence_map) {
+    std::unique_ptr<TupleIdSequence> matches(
+        new TupleIdSequence(existence_map->length()));
+    std::uint32_t next_batch_size = 64u;
+    std::uint32_t num_tuples_left = existence_map->numTuples();
+    std::vector<tuple_id> batch(num_tuples_left);
+
+    TupleIdSequence::const_iterator tuple_it = existence_map->before_begin();
+    do {
+      const std::uint32_t batch_size =
+          next_batch_size < num_tuples_left ? next_batch_size : num_tuples_left;
+      for (std::uint32_t i = 0; i < batch_size; ++i) {
+        ++tuple_it;
+        batch[i] = *tuple_it;
+      }
+
+      const std::uint32_t num_hits = filterBatch(accessor, &batch, batch_size);
+      for (std::uint32_t i = 0; i < num_hits; ++i) {
+        matches->set(batch[i], true);
+      }
+
+      num_tuples_left -= batch_size;
+      next_batch_size *= 2;
+    } while (num_tuples_left > 0);
+
+    return matches.release();
+  }
+
+  inline std::size_t filterBatch(ValueAccessor *accessor,
+                                 std::vector<tuple_id> *batch,
+                                 std::uint32_t batch_size) {
+    for (auto *entry : probe_entries_) {
+      const std::uint32_t out_size =
+          entry->lip_filter->filterBatch(accessor,
+                                         entry->attr_id,
+                                         entry->attr_type->isNullable(),
+                                         batch,
+                                         batch_size);
+      entry->cnt += batch_size;
+      entry->miss += batch_size - out_size;
+      batch_size = out_size;
+    }
+    adaptEntryOrder();
+    return batch_size;
+  }
+
+  inline void adaptEntryOrder() {
+    for (auto &entry : probe_entries_) {
+      entry->miss_rate = static_cast<float>(entry->miss) / entry->cnt;
+    }
+    std::sort(probe_entries_.begin(),
+              probe_entries_.end(),
+              ProbeEntry::isBetterThan);
+  }
+
+  std::vector<ProbeEntry *> probe_entries_;
+
+  DISALLOW_COPY_AND_ASSIGN(LIPFilterAdaptiveProber);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_ADAPTIVE_PROBER_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/utility/lip_filter/LIPFilterBuilder.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilterBuilder.hpp b/utility/lip_filter/LIPFilterBuilder.hpp
new file mode 100644
index 0000000..0a2d465
--- /dev/null
+++ b/utility/lip_filter/LIPFilterBuilder.hpp
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_BUILDER_HPP_
+#define QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_BUILDER_HPP_
+
+#include <memory>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "types/Type.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+
+class ValueAccessor;
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+class LIPFilterBuilder;
+typedef std::shared_ptr<LIPFilterBuilder> LIPFilterBuilderPtr;
+
+class LIPFilterBuilder {
+ public:
+  LIPFilterBuilder(const std::vector<LIPFilter *> &lip_filters,
+                   const std::vector<attribute_id> &attr_ids,
+                   const std::vector<const Type *> &attr_types) {
+    DCHECK_EQ(lip_filters.size(), attr_ids.size());
+    DCHECK_EQ(lip_filters.size(), attr_types.size());
+
+    build_entries_.reserve(lip_filters.size());
+    for (std::size_t i = 0; i < lip_filters.size(); ++i) {
+      build_entries_.emplace_back(lip_filters[i], attr_ids[i], attr_types[i]);
+    }
+  }
+
+  void insertValueAccessor(ValueAccessor *accessor) {
+    for (auto &entry : build_entries_) {
+      entry.lip_filter->insertValueAccessor(accessor, entry.attr_id, entry.attr_type);
+    }
+  }
+
+ private:
+  struct BuildEntry {
+    BuildEntry(LIPFilter *lip_filter_in,
+               const attribute_id attr_id_in,
+               const Type *attr_type_in)
+        : lip_filter(lip_filter_in),
+          attr_id(attr_id_in),
+          attr_type(attr_type_in) {
+    }
+    LIPFilter *lip_filter;
+    const attribute_id attr_id;
+    const Type *attr_type;
+  };
+
+  std::vector<BuildEntry> build_entries_;
+
+  DISALLOW_COPY_AND_ASSIGN(LIPFilterBuilder);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/utility/lip_filter/LIPFilterDeployment.cpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilterDeployment.cpp b/utility/lip_filter/LIPFilterDeployment.cpp
new file mode 100644
index 0000000..0ac396b
--- /dev/null
+++ b/utility/lip_filter/LIPFilterDeployment.cpp
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "utility/lip_filter/LIPFilterDeployment.hpp"
+
+#include "types/TypeFactory.hpp"
+#include "utility/lip_filter/LIPFilter.pb.h"
+#include "utility/lip_filter/LIPFilterBuilder.hpp"
+#include "utility/lip_filter/LIPFilterAdaptiveProber.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+LIPFilterDeployment::LIPFilterDeployment(
+    const serialization::LIPFilterDeployment &proto,
+    const std::vector<std::unique_ptr<LIPFilter>> &lip_filters) {
+  switch (proto.action_type()) {
+    case serialization::LIPFilterActionType::BUILD:
+      action_type_ = LIPFilterActionType::kBuild;
+      break;
+    case serialization::LIPFilterActionType::PROBE:
+      action_type_ = LIPFilterActionType::kProbe;
+      break;
+    default:
+      LOG(FATAL) << "Unsupported LIPFilterActionType: "
+                 << serialization::LIPFilterActionType_Name(proto.action_type());
+  }
+
+  for (int i = 0; i < proto.entries_size(); ++i) {
+    const auto &entry_proto = proto.entries(i);
+    lip_filters_.emplace_back(lip_filters.at(entry_proto.lip_filter_id()).get());
+    attr_ids_.emplace_back(entry_proto.attribute_id());
+    attr_types_.emplace_back(&TypeFactory::ReconstructFromProto(entry_proto.attribute_type()));
+  }
+}
+
+bool LIPFilterDeployment::ProtoIsValid(
+    const serialization::LIPFilterDeployment &proto) {
+  return true;
+}
+
+LIPFilterBuilder* LIPFilterDeployment::createLIPFilterBuilder() const {
+  DCHECK(action_type_ == LIPFilterActionType::kBuild);
+  return new LIPFilterBuilder(lip_filters_, attr_ids_, attr_types_);
+}
+
+LIPFilterAdaptiveProber* LIPFilterDeployment::createLIPFilterAdaptiveProber() const {
+  DCHECK(action_type_ == LIPFilterActionType::kProbe);
+  return new LIPFilterAdaptiveProber(lip_filters_, attr_ids_, attr_types_);
+}
+
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/utility/lip_filter/LIPFilterDeployment.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilterDeployment.hpp b/utility/lip_filter/LIPFilterDeployment.hpp
new file mode 100644
index 0000000..d939e85
--- /dev/null
+++ b/utility/lip_filter/LIPFilterDeployment.hpp
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_DEPLOYMENT_HPP_
+#define QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_DEPLOYMENT_HPP_
+
+#include <memory>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+#include "utility/lip_filter/LIPFilter.pb.h"
+
+namespace quickstep {
+
+class LIPFilterBuilder;
+class LIPFilterAdaptiveProber;
+class Type;
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+enum class LIPFilterActionType {
+  kBuild = 0,
+  kProbe
+};
+
+class LIPFilterDeployment {
+ public:
+  LIPFilterDeployment(const serialization::LIPFilterDeployment &proto,
+                      const std::vector<std::unique_ptr<LIPFilter>> &lip_filters);
+
+  static bool ProtoIsValid(const serialization::LIPFilterDeployment &proto);
+
+  LIPFilterActionType getActionType() const {
+    return action_type_;
+  }
+
+  LIPFilterBuilder* createLIPFilterBuilder() const;
+
+  LIPFilterAdaptiveProber* createLIPFilterAdaptiveProber() const;
+
+ private:
+  LIPFilterActionType action_type_;
+  std::vector<LIPFilter *> lip_filters_;
+  std::vector<attribute_id> attr_ids_;
+  std::vector<const Type *> attr_types_;
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_DEPLOYMENT_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/utility/lip_filter/LIPFilterFactory.cpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilterFactory.cpp b/utility/lip_filter/LIPFilterFactory.cpp
new file mode 100644
index 0000000..f0e7725
--- /dev/null
+++ b/utility/lip_filter/LIPFilterFactory.cpp
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "utility/lip_filter/LIPFilterFactory.hpp"
+
+#include "utility/lip_filter/LIPFilter.hpp"
+#include "utility/lip_filter/SingleIdentityHashFilter.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+LIPFilter* LIPFilterFactory::ReconstructFromProto(const serialization::LIPFilter &proto) {
+  switch (proto.lip_filter_type()) {
+    case serialization::LIPFilterType::SINGLE_IDENTITY_HASH_FILTER: {
+      const std::size_t attr_size =
+          proto.GetExtension(serialization::SingleIdentityHashFilter::attribute_size);
+      const std::size_t filter_cardinality =
+          proto.GetExtension(serialization::SingleIdentityHashFilter::filter_cardinality);
+
+      if (attr_size >= 8) {
+        return new SingleIdentityHashFilter<std::uint64_t>(filter_cardinality);
+      } else if (attr_size >= 4) {
+        return new SingleIdentityHashFilter<std::uint32_t>(filter_cardinality);
+      } else if (attr_size >= 2) {
+        return new SingleIdentityHashFilter<std::uint16_t>(filter_cardinality);
+      } else {
+        return new SingleIdentityHashFilter<std::uint8_t>(filter_cardinality);
+      }
+    }
+    default:
+      LOG(FATAL) << "Unsupported LIP filter type: "
+                 << serialization::LIPFilterType_Name(proto.lip_filter_type());
+  }
+}
+
+bool LIPFilterFactory::ProtoIsValid(const serialization::LIPFilter &proto) {
+  return true;
+}
+
+}  // namespace quickstep
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/utility/lip_filter/LIPFilterFactory.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilterFactory.hpp b/utility/lip_filter/LIPFilterFactory.hpp
new file mode 100644
index 0000000..6a94ae4
--- /dev/null
+++ b/utility/lip_filter/LIPFilterFactory.hpp
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_FACTORY_HPP_
+#define QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_FACTORY_HPP_
+
+#include <vector>
+
+#include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilter.pb.h"
+
+namespace quickstep {
+
+class LIPFilter;
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+class LIPFilterFactory {
+ public:
+  static LIPFilter* ReconstructFromProto(const serialization::LIPFilter &proto);
+
+  static bool ProtoIsValid(const serialization::LIPFilter &proto);
+
+ private:
+
+  DISALLOW_COPY_AND_ASSIGN(LIPFilterFactory);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_FACTORY_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ca9c1790/utility/lip_filter/SingleIdentityHashFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/SingleIdentityHashFilter.hpp b/utility/lip_filter/SingleIdentityHashFilter.hpp
new file mode 100644
index 0000000..0258c24
--- /dev/null
+++ b/utility/lip_filter/SingleIdentityHashFilter.hpp
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_LIP_FILTER_SINGLE_IDENTITY_HASH_FILTER_HPP_
+#define QUICKSTEP_UTILITY_LIP_FILTER_SINGLE_IDENTITY_HASH_FILTER_HPP_
+
+#include <vector>
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <utility>
+#include <vector>
+
+#include "storage/StorageConstants.hpp"
+#include "storage/ValueAccessor.hpp"
+#include "storage/ValueAccessorUtil.hpp"
+#include "types/Type.hpp"
+#include "utility/BitManipulation.hpp"
+#include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+template <typename CppType>
+class SingleIdentityHashFilter : public LIPFilter {
+ public:
+  SingleIdentityHashFilter(const std::size_t filter_cardinality)
+      : LIPFilter(LIPFilterType::kSingleIdentityHashFilter),
+        filter_cardinality_(filter_cardinality),
+        bit_array_(GetByteSize(filter_cardinality)) {
+    std::memset(bit_array_.data(),
+                0x0,
+                sizeof(std::atomic<std::uint8_t>) * GetByteSize(filter_cardinality));
+  }
+
+  void insertValueAccessor(ValueAccessor *accessor,
+                           const attribute_id attr_id,
+                           const Type *attr_type) override {
+    InvokeOnAnyValueAccessor(
+        accessor,
+        [&](auto *accessor) -> void {  // NOLINT(build/c++11)
+      if (attr_type->isNullable()) {
+        insertValueAccessorInternal<true>(accessor, attr_id);
+      } else {
+        insertValueAccessorInternal<false>(accessor, attr_id);
+      }
+    });
+  }
+
+  std::size_t filterBatch(ValueAccessor *accessor,
+                          const attribute_id attr_id,
+                          const bool is_attr_nullable,
+                          std::vector<tuple_id> *batch,
+                          const std::size_t batch_size) const override {
+    return InvokeOnAnyValueAccessor(
+        accessor,
+        [&](auto *accessor) -> std::size_t {  // NOLINT(build/c++11)
+      if (is_attr_nullable) {
+        return filterBatchInternal<true>(accessor, attr_id, batch, batch_size);
+      } else {
+        return filterBatchInternal<false>(accessor, attr_id, batch, batch_size);
+      }
+    });
+  }
+
+  std::size_t onesCount() const override {
+    std::size_t count = 0;
+    for (std::size_t i = 0; i < bit_array_.size(); ++i) {
+      count += population_count<std::uint8_t>(bit_array_[i].load(std::memory_order_relaxed));
+    }
+    return count;
+  }
+
+  /**
+   * @brief Inserts a given value into the hash filter.
+   *
+   * @param key_begin A pointer to the value being inserted.
+   */
+  inline void insert(const void *key_begin) {
+    const CppType hash = *reinterpret_cast<const CppType *>(key_begin) % filter_cardinality_;
+    bit_array_[hash >> 3].fetch_or(1 << (hash & 0x7), std::memory_order_relaxed);
+  }
+
+  /**
+   * @brief Test membership of a given value in the hash filter.
+   *        If true is returned, then a value may or may not be present in the hash filter.
+   *        If false is returned, a value is certainly not present in the hash filter.
+   *
+   * @param key_begin A pointer to the value being tested for membership.
+   */
+  inline bool contains(const void *key_begin) const {
+    const CppType hash = *reinterpret_cast<const CppType *>(key_begin) % filter_cardinality_;
+    return ((bit_array_[hash >> 3].load(std::memory_order_relaxed) & (1 << (hash & 0x7))) > 0);
+  }
+
+ private:
+  inline static std::size_t GetByteSize(const std::size_t bit_size) {
+    return (bit_size + 7) / 8;
+  }
+
+  template <bool is_attr_nullable, typename ValueAccessorT>
+  inline void insertValueAccessorInternal(ValueAccessorT *accessor,
+                                          const attribute_id attr_id) {
+    accessor->beginIteration();
+    while (accessor->next()) {
+      const void *value = accessor->template getUntypedValue<is_attr_nullable>(attr_id);
+      if (!is_attr_nullable || value != nullptr) {
+        insert(value);
+      }
+    }
+  }
+
+  template <bool is_attr_nullable, typename ValueAccessorT>
+  inline std::size_t filterBatchInternal(const ValueAccessorT *accessor,
+                                         const attribute_id attr_id,
+                                         std::vector<tuple_id> *batch,
+                                         const std::size_t batch_size) const {
+    std::size_t out_size = 0;
+    for (std::size_t i = 0; i < batch_size; ++i) {
+      const tuple_id tid = batch->at(i);
+      const void *value =
+          accessor->template getUntypedValueAtAbsolutePosition(attr_id, tid);
+      if (is_attr_nullable && value == nullptr) {
+        continue;
+      }
+      if (contains(value)) {
+        batch->at(out_size) = tid;
+        ++out_size;
+      }
+    }
+    return out_size;
+  }
+
+  std::size_t filter_cardinality_;
+  alignas(kCacheLineBytes) std::vector<std::atomic<std::uint8_t>> bit_array_;
+
+  DISALLOW_COPY_AND_ASSIGN(SingleIdentityHashFilter);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_UTILITY_LIP_FILTER_SINGLE_IDENTITY_HASH_FILTER_HPP_


[06/12] incubator-quickstep git commit: Try fix for clang memory problems

Posted by ji...@apache.org.
Try fix for clang memory problems

Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/17ffbb05
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/17ffbb05
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/17ffbb05

Branch: refs/heads/lip-refactor
Commit: 17ffbb05d53e33ff846a1a6d774accadbd8e1b20
Parents: e4de241
Author: Saket Saurabh <sa...@users.noreply.github.com>
Authored: Tue Oct 11 10:06:42 2016 -0500
Committer: Saket Saurabh <ss...@cs.wisc.edu>
Committed: Tue Oct 11 11:36:57 2016 -0500

----------------------------------------------------------------------
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/17ffbb05/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 54a0c8a..6895c0d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,7 +21,7 @@ env:
   - BUILD_TYPE=Release VECTOR_COPY_ELISION_LEVEL=selection
 
 install:
-  - if [ "$CC" = "gcc" ]; then
+  - if [ "$CC" = "gcc" ] || [[ "$BUILD_TYPE" = "Release" &&  "$VECTOR_COPY_ELISION_LEVEL" = "selection" ]]; then
       export MAKE_JOBS=1;
     else
       export MAKE_JOBS=2;


[03/12] incubator-quickstep git commit: Refactor ScalarAttribute to take benefit of ColumnAccessors

Posted by ji...@apache.org.
Refactor ScalarAttribute to take benefit of ColumnAccessors


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/e8452468
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/e8452468
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/e8452468

Branch: refs/heads/lip-refactor
Commit: e84524686d77397912052772224d7cfe8dec824a
Parents: 262ad5a
Author: Saket Saurabh <ss...@cs.wisc.edu>
Authored: Fri Sep 23 12:42:40 2016 -0500
Committer: Saket Saurabh <ss...@cs.wisc.edu>
Committed: Tue Oct 11 11:36:57 2016 -0500

----------------------------------------------------------------------
 expressions/scalar/ScalarAttribute.cpp          | 43 ++++++++++++++++----
 storage/BasicColumnStoreValueAccessor.hpp       |  2 +-
 storage/PackedRowStoreValueAccessor.hpp         |  2 +-
 storage/ValueAccessor.hpp                       |  4 +-
 ...kedRowStoreTupleStorageSubBlock_unittest.cpp | 10 ++---
 .../comparisons/AsciiStringComparators-inl.hpp  |  8 ++--
 types/operations/comparisons/Comparison-inl.hpp |  4 +-
 .../comparisons/LiteralComparators-inl.hpp      | 10 ++---
 .../PatternMatchingComparators-inl.hpp          |  2 +-
 9 files changed, 55 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/e8452468/expressions/scalar/ScalarAttribute.cpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarAttribute.cpp b/expressions/scalar/ScalarAttribute.cpp
index 08dc9dd..b29286b 100644
--- a/expressions/scalar/ScalarAttribute.cpp
+++ b/expressions/scalar/ScalarAttribute.cpp
@@ -100,18 +100,43 @@ ColumnVector* ScalarAttribute::getAllValues(ValueAccessor *accessor,
                                                           accessor->getNumTuples());
       accessor->beginIteration();
       if (result_type.isNullable()) {
-        while (accessor->next()) {
-          const void *value = accessor->template getUntypedValue<true>(attr_id);
-          if (value == nullptr) {
-            result->appendNullValue();
-          } else {
-            result->appendUntypedValue(value);
+        if (accessor->isColumnAccessorSupported()) {
+          // If ColumnAccessor is supported on the underlying accessor, we have a fast strided
+          // column accessor available for the iteration on the underlying block.
+          // Since the attributes can be null, ColumnAccessor template takes a 'true' argument.
+          std::unique_ptr<const ColumnAccessor<true>>
+              column_accessor(accessor->template getColumnAccessor<true>(attr_id));
+          while (accessor->next()) {
+            const void *value = column_accessor->getUntypedValue();  // Fast strided access.
+            if (value == nullptr) {
+              result->appendNullValue();
+            } else {
+              result->appendUntypedValue(value);
+            }
+          }
+        } else {
+          while (accessor->next()) {
+            const void *value = accessor->template getUntypedValue<true>(attr_id);
+            if (value == nullptr) {
+              result->appendNullValue();
+            } else {
+              result->appendUntypedValue(value);
+            }
           }
         }
       } else {
-        while (accessor->next()) {
-          result->appendUntypedValue(
-              accessor->template getUntypedValue<false>(attr_id));
+        if (accessor->isColumnAccessorSupported()) {
+          // Since the attributes cannot be null, ColumnAccessor template takes a 'false' argument.
+          std::unique_ptr<const ColumnAccessor<false>>
+              column_accessor(accessor->template getColumnAccessor<false>(attr_id));
+          while (accessor->next()) {
+            result->appendUntypedValue(column_accessor->getUntypedValue());  // Fast strided access.
+          }
+        } else {
+          while (accessor->next()) {
+            result->appendUntypedValue(
+                accessor->template getUntypedValue<false>(attr_id));
+          }
         }
       }
       return result;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/e8452468/storage/BasicColumnStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/BasicColumnStoreValueAccessor.hpp b/storage/BasicColumnStoreValueAccessor.hpp
index 22d3c0b..7516dc9 100644
--- a/storage/BasicColumnStoreValueAccessor.hpp
+++ b/storage/BasicColumnStoreValueAccessor.hpp
@@ -81,7 +81,7 @@ class BasicColumnStoreValueAccessorHelper {
   template <bool check_null = true>
   inline const ColumnAccessor<check_null>* getColumnAccessor(const tuple_id &current_tuple_position,
                                                              const attribute_id attr_id) const {
-    DEBUG_ASSERT(relation_.hasAttributeWithId(attr_id));
+    DCHECK(relation_.hasAttributeWithId(attr_id));
     const void* base_location = static_cast<const char*>(column_stripes_[attr_id]);
     const std::size_t stride = relation_.getAttributeById(attr_id)->getType().maximumByteLength();
     std::unique_ptr<ColumnAccessor<check_null>> column_accessor;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/e8452468/storage/PackedRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/PackedRowStoreValueAccessor.hpp b/storage/PackedRowStoreValueAccessor.hpp
index 7eb2d41..9d43955 100644
--- a/storage/PackedRowStoreValueAccessor.hpp
+++ b/storage/PackedRowStoreValueAccessor.hpp
@@ -78,7 +78,7 @@ class PackedRowStoreValueAccessorHelper {
   template <bool check_null = true>
   inline const ColumnAccessor<check_null>* getColumnAccessor(const tuple_id &current_tuple_position,
                                                              const attribute_id attr_id) const {
-    DEBUG_ASSERT(relation_.hasAttributeWithId(attr_id));
+    DCHECK(relation_.hasAttributeWithId(attr_id));
     const void* base_location = static_cast<const char*>(tuple_storage_)
         + relation_.getFixedLengthAttributeOffset(attr_id);
     const std::size_t stride = relation_.getFixedByteLength();

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/e8452468/storage/ValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/ValueAccessor.hpp b/storage/ValueAccessor.hpp
index 3b58a7c..e4a2906 100644
--- a/storage/ValueAccessor.hpp
+++ b/storage/ValueAccessor.hpp
@@ -1016,9 +1016,9 @@ class ColumnAccessor {
    * @return An untyped pointer to the attribute value for the current tuple.
    **/
   inline const void* getUntypedValue() const {
-    DEBUG_ASSERT(current_tuple_position_ < num_tuples_);
+    DCHECK(current_tuple_position_ < num_tuples_);
     if (check_null) {
-      DEBUG_ASSERT(null_bitmap_ != nullptr);
+      DCHECK(null_bitmap_ != nullptr);
       if ((nullable_base_ != -1)
           && null_bitmap_->getBit(current_tuple_position_ * nullable_stride_ + nullable_base_)) {
         return nullptr;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/e8452468/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp
----------------------------------------------------------------------
diff --git a/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp b/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp
index a6f6606..924f9b1 100644
--- a/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp
+++ b/storage/tests/PackedRowStoreTupleStorageSubBlock_unittest.cpp
@@ -250,7 +250,7 @@ class PackedRowStoreTupleStorageSubBlockTest : public ::testing::TestWithParam<b
                                                     tuple_store_->getAttributeValueTyped(tid, 2)));
     }
   }
-  
+
   template<bool check_null>
   void checkColumnAccessor() {
     initializeNewBlock(kSubBlockSize);
@@ -269,7 +269,7 @@ class PackedRowStoreTupleStorageSubBlockTest : public ::testing::TestWithParam<b
       while (accessor->next()) {
         const void *va_value = column_accessor->getUntypedValue();
         std::unique_ptr<Tuple> expected_tuple(createSampleTuple(tid));
-         
+
         if (expected_tuple->getAttributeValue(value_accessor_id).isNull()) {
           ASSERT_TRUE(va_value == nullptr);
         } else {
@@ -406,11 +406,11 @@ TEST_P(PackedRowStoreTupleStorageSubBlockTest, InsertInBatchTest) {
   EXPECT_EQ(row_capacity - 1, tuple_store_->getMaxTupleID());
   EXPECT_EQ(row_capacity, tuple_store_->numTuples());
 }
-  
+
 TEST_P(PackedRowStoreTupleStorageSubBlockTest, ColumnAccessorTest) {
-  if (GetParam()) { // when true, the attributes can be nullable.
+  if (GetParam()) {   // when true, the attributes can be nullable.
     checkColumnAccessor<true>();
-  } else { // when false, the attributes are non-null.
+  } else {   // when false, the attributes are non-null.
     checkColumnAccessor<false>();
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/e8452468/types/operations/comparisons/AsciiStringComparators-inl.hpp
----------------------------------------------------------------------
diff --git a/types/operations/comparisons/AsciiStringComparators-inl.hpp b/types/operations/comparisons/AsciiStringComparators-inl.hpp
index b048c60..87d7168 100644
--- a/types/operations/comparisons/AsciiStringComparators-inl.hpp
+++ b/types/operations/comparisons/AsciiStringComparators-inl.hpp
@@ -280,8 +280,8 @@ TupleIdSequence* AsciiStringUncheckedComparator<ComparisonFunctor,
             left_column_accessor(accessor->template getColumnAccessor<left_nullable>(left_id));
         std::unique_ptr<const ColumnAccessor<right_nullable>>
             right_column_accessor(accessor->template getColumnAccessor<right_nullable>(right_id));
-        DEBUG_ASSERT(left_column_accessor != nullptr);
-        DEBUG_ASSERT(right_column_accessor != nullptr);
+        DCHECK(left_column_accessor != nullptr);
+        DCHECK(right_column_accessor != nullptr);
         while (accessor->next()) {
           const void *left_value = left_column_accessor->getUntypedValue();
           const void *right_value = right_column_accessor->getUntypedValue();
@@ -357,7 +357,7 @@ TupleIdSequence* AsciiStringUncheckedComparator<ComparisonFunctor,
         // column accessor available for the iteration on the underlying block.
         std::unique_ptr<const ColumnAccessor<va_nullable>>
             column_accessor(accessor->template getColumnAccessor<va_nullable>(value_accessor_attr_id));
-        DEBUG_ASSERT(column_accessor != nullptr);
+        DCHECK(column_accessor != nullptr);
         while (accessor->next()) {
           const void *va_value = column_accessor->getUntypedValue();
           result->set(accessor->getCurrentPosition(),
@@ -488,7 +488,7 @@ TupleIdSequence* AsciiStringUncheckedComparator<ComparisonFunctor,
             // column accessor available for the iteration on the underlying block.
             std::unique_ptr<const ColumnAccessor<va_nullable>>
                 column_accessor(accessor->template getColumnAccessor<va_nullable>(value_accessor_attr_id));
-            DEBUG_ASSERT(column_accessor != nullptr);
+            DCHECK(column_accessor != nullptr);
             while (accessor->next()) {
               const void *cv_value
                   = column_vector.template getUntypedValue<cv_nullable>(cv_pos);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/e8452468/types/operations/comparisons/Comparison-inl.hpp
----------------------------------------------------------------------
diff --git a/types/operations/comparisons/Comparison-inl.hpp b/types/operations/comparisons/Comparison-inl.hpp
index 96771bf..c892a16 100644
--- a/types/operations/comparisons/Comparison-inl.hpp
+++ b/types/operations/comparisons/Comparison-inl.hpp
@@ -316,8 +316,8 @@ TupleIdSequence* UncheckedComparator::compareSingleValueAccessorDefaultImpl(
             left_column_accessor(accessor->template getColumnAccessor<left_nullable>(left_id));
         std::unique_ptr<const ColumnAccessor<right_nullable>>
             right_column_accessor(accessor->template getColumnAccessor<right_nullable>(right_id));
-        DEBUG_ASSERT(left_column_accessor != nullptr);
-        DEBUG_ASSERT(right_column_accessor != nullptr);
+        DCHECK(left_column_accessor != nullptr);
+        DCHECK(right_column_accessor != nullptr);
         while (accessor->next()) {
           result->set(accessor->getCurrentPosition(),
                       this->compareDataPtrs(

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/e8452468/types/operations/comparisons/LiteralComparators-inl.hpp
----------------------------------------------------------------------
diff --git a/types/operations/comparisons/LiteralComparators-inl.hpp b/types/operations/comparisons/LiteralComparators-inl.hpp
index 31eec13..fd59e2e 100644
--- a/types/operations/comparisons/LiteralComparators-inl.hpp
+++ b/types/operations/comparisons/LiteralComparators-inl.hpp
@@ -280,8 +280,8 @@ TupleIdSequence* LiteralUncheckedComparator<ComparisonFunctor,
             left_column_accessor(accessor->template getColumnAccessor<left_nullable>(left_id));
         std::unique_ptr<const ColumnAccessor<right_nullable>>
             right_column_accessor(accessor->template getColumnAccessor<right_nullable>(right_id));
-        DEBUG_ASSERT(left_column_accessor != nullptr);
-        DEBUG_ASSERT(right_column_accessor != nullptr);
+        DCHECK(left_column_accessor != nullptr);
+        DCHECK(right_column_accessor != nullptr);
         while (accessor->next()) {
           const void *left_value = left_column_accessor->getUntypedValue();
           const void *right_value = right_column_accessor->getUntypedValue();
@@ -363,7 +363,7 @@ TupleIdSequence* LiteralUncheckedComparator<ComparisonFunctor,
         // column accessor available for the iteration on the underlying block.
         std::unique_ptr<const ColumnAccessor<va_nullable>>
             column_accessor(accessor->template getColumnAccessor<va_nullable>(value_accessor_attr_id));
-        DEBUG_ASSERT(column_accessor != nullptr);
+        DCHECK(column_accessor != nullptr);
         while (accessor->next()) {
           const void *va_value = column_accessor->getUntypedValue();
           result->set(accessor->getCurrentPosition(),
@@ -497,7 +497,7 @@ TupleIdSequence* LiteralUncheckedComparator<ComparisonFunctor,
           // column accessor available for the iteration on the underlying block.
           std::unique_ptr<const ColumnAccessor<va_nullable>>
               column_accessor(accessor->template getColumnAccessor<va_nullable>(value_accessor_attr_id));
-          DEBUG_ASSERT(column_accessor != nullptr);
+          DCHECK(column_accessor != nullptr);
           while (accessor->next()) {
             const void *cv_value
                 = native_column_vector.getUntypedValue<cv_nullable>(cv_pos);
@@ -554,7 +554,7 @@ TypedValue LiteralUncheckedComparator<ComparisonFunctor,
       // column accessor available for the iteration on the underlying block.
       std::unique_ptr<const ColumnAccessor<left_nullable>>
           column_accessor(accessor->template getColumnAccessor<left_nullable>(value_accessor_id));
-      DEBUG_ASSERT(column_accessor != nullptr);
+      DCHECK(column_accessor != nullptr);
       while (accessor->next()) {
         const void *va_value = column_accessor->getUntypedValue();
         if (left_nullable && !va_value) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/e8452468/types/operations/comparisons/PatternMatchingComparators-inl.hpp
----------------------------------------------------------------------
diff --git a/types/operations/comparisons/PatternMatchingComparators-inl.hpp b/types/operations/comparisons/PatternMatchingComparators-inl.hpp
index ca0f45e..a7f0777 100644
--- a/types/operations/comparisons/PatternMatchingComparators-inl.hpp
+++ b/types/operations/comparisons/PatternMatchingComparators-inl.hpp
@@ -247,7 +247,7 @@ TupleIdSequence* PatternMatchingUncheckedComparator<is_like_pattern, is_negation
         std::unique_ptr<const ColumnAccessor<left_nullable>>
             column_accessor
             (accessor->template getColumnAccessor<left_nullable>(value_accessor_attr_id));
-        DEBUG_ASSERT(column_accessor != nullptr);
+        DCHECK(column_accessor != nullptr);
         while (accessor->next()) {
           const void *va_value = column_accessor->getUntypedValue();
           result->set(accessor->getCurrentPosition(),