You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2017/04/21 21:11:46 UTC

[2/5] incubator-quickstep git commit: Add common-subexpression support.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/query_optimizer/tests/physical_generator/Select.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/physical_generator/Select.test b/query_optimizer/tests/physical_generator/Select.test
index f7de922..de640f6 100644
--- a/query_optimizer/tests/physical_generator/Select.test
+++ b/query_optimizer/tests/physical_generator/Select.test
@@ -1022,34 +1022,51 @@ TopLevelPlan
 [Physical Plan]
 TopLevelPlan
 +-plan=Selection
-| +-input=Aggregate
-| | +-input=TableReference[relation=Test,alias=test]
-| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
-| | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
-| | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
-| | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
-| | | +-AttributeReference[id=5,name=vchar_col,relation=test,
-| | |   type=VarChar(20) NULL]
-| | +-grouping_expressions=
-| | | +-[]
-| | +-aggregate_expressions=
-| |   +-Alias[id=6,name=,alias=$aggregate0,relation=$aggregate,type=Long]
-| |   | +-AggregateFunction[function=COUNT]
-| |   |   +-[]
-| |   +-Alias[id=7,name=,alias=$aggregate1,relation=$aggregate,type=Long]
-| |   | +-AggregateFunction[function=COUNT]
-| |   |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| |   +-Alias[id=8,name=,alias=$aggregate2,relation=$aggregate,type=Long NULL]
-| |   | +-AggregateFunction[function=SUM]
-| |   |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
-| |   +-Alias[id=9,name=,alias=$aggregate3,relation=$aggregate,type=Double NULL]
-| |   | +-AggregateFunction[function=AVG]
-| |   |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| |   +-Alias[id=11,name=,alias=$aggregate4,relation=$aggregate,type=Double NULL]
-| |     +-AggregateFunction[function=MAX]
-| |       +-AttributeReference[id=3,name=double_col,relation=test,
-| |         type=Double NULL]
+| +-input=Selection
+| | +-input=Aggregate
+| | | +-input=TableReference[relation=Test,alias=test]
+| | | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | | |   type=VarChar(20) NULL]
+| | | +-grouping_expressions=
+| | | | +-[]
+| | | +-aggregate_expressions=
+| | |   +-Alias[id=6,name=,alias=$aggregate0,relation=$aggregate,type=Long]
+| | |   | +-AggregateFunction[function=COUNT]
+| | |   |   +-[]
+| | |   +-Alias[id=7,name=,alias=$aggregate1,relation=$aggregate,type=Long]
+| | |   | +-AggregateFunction[function=COUNT]
+| | |   |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | |   +-Alias[id=8,name=,alias=$aggregate2,relation=$aggregate,type=Long NULL]
+| | |   | +-AggregateFunction[function=SUM]
+| | |   |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | |   +-Alias[id=12,name=,alias=$aggregate3,relation=,type=Long NULL]
+| | |   | +-AggregateFunction[function=SUM]
+| | |   |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | |   +-Alias[id=11,name=,alias=$aggregate4,relation=$aggregate,
+| | |     type=Double NULL]
+| | |     +-AggregateFunction[function=MAX]
+| | |       +-AttributeReference[id=3,name=double_col,relation=test,
+| | |         type=Double NULL]
+| | +-project_expressions=
+| |   +-AttributeReference[id=6,name=,alias=$aggregate0,relation=$aggregate,
+| |   | type=Long]
+| |   +-AttributeReference[id=7,name=,alias=$aggregate1,relation=$aggregate,
+| |   | type=Long]
+| |   +-AttributeReference[id=8,name=,alias=$aggregate2,relation=$aggregate,
+| |   | type=Long NULL]
+| |   +-Alias[id=9,name=,alias=$aggregate3,relation=,type=Long NULL]
+| |   | +-Divide
+| |   |   +-AttributeReference[id=12,name=,alias=$aggregate3,relation=,
+| |   |   | type=Long NULL]
+| |   |   +-AttributeReference[id=7,name=,alias=$aggregate1,relation=$aggregate,
+| |   |     type=Long]
+| |   +-AttributeReference[id=11,name=,alias=$aggregate4,relation=$aggregate,
+| |     type=Double NULL]
 | +-filter_predicate=Greater
 | | +-Add
 | | | +-AttributeReference[id=11,name=,alias=$aggregate4,relation=$aggregate,
@@ -1308,34 +1325,43 @@ TopLevelPlan
 | | | +-Alias[id=4,name=subquery_col3,relation=subquery,type=Char(20)]
 | | |   +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
 | | +-aggregate_expressions=
-| |   +-Alias[id=8,name=,alias=$aggregate0,relation=$aggregate,type=Long]
+| |   +-Alias[id=8,name=,alias=$aggregate0,relation=,type=Long]
 | |   | +-AggregateFunction[function=COUNT]
 | |   |   +-Add
-| |   |     +-Add
-| |   |     | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| |   |     | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
-| |   |     +-Add
-| |   |       +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
-| |   |       +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
-| |   +-Alias[id=9,name=,alias=$aggregate1,relation=$aggregate,type=Long NULL]
+| |   |     +-CommonSubexpression[common_subexpression_id=13]
+| |   |     | +-Operand=Add
+| |   |     |   +-AttributeReference[id=0,name=int_col,relation=test,
+| |   |     |   | type=Int NULL]
+| |   |     |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |   |     +-CommonSubexpression[common_subexpression_id=14]
+| |   |       +-Operand=Add
+| |   |         +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |   |         +-AttributeReference[id=2,name=float_col,relation=test,
+| |   |           type=Float]
+| |   +-Alias[id=9,name=,alias=$aggregate1,relation=,type=Long NULL]
 | |   | +-AggregateFunction[function=MAX]
 | |   |   +-Divide
-| |   |     +-Add
-| |   |     | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| |   |     | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |   |     +-CommonSubexpression[common_subexpression_id=13]
+| |   |     | +-Operand=Add
+| |   |     |   +-AttributeReference[id=0,name=int_col,relation=test,
+| |   |     |   | type=Int NULL]
+| |   |     |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
 | |   |     +-Literal[value=2,type=Int]
-| |   +-Alias[id=11,name=,alias=$aggregate2,relation=$aggregate,type=Long NULL]
+| |   +-Alias[id=11,name=,alias=$aggregate2,relation=,type=Long NULL]
 | |   | +-AggregateFunction[function=MAX]
 | |   |   +-Add
 | |   |     +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| |   |     +-Add
-| |   |       +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| |   |       +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
-| |   +-Alias[id=12,name=,alias=$aggregate3,relation=$aggregate,type=Double NULL]
+| |   |     +-CommonSubexpression[common_subexpression_id=13]
+| |   |       +-Operand=Add
+| |   |         +-AttributeReference[id=0,name=int_col,relation=test,
+| |   |         | type=Int NULL]
+| |   |         +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |   +-Alias[id=12,name=,alias=$aggregate3,relation=,type=Double NULL]
 | |     +-AggregateFunction[function=SUM]
-| |       +-Add
-| |         +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
-| |         +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| |       +-CommonSubexpression[common_subexpression_id=14]
+| |         +-Operand=Add
+| |           +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |           +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
 | +-filter_predicate=Greater
 | | +-AttributeReference[id=11,name=,alias=$aggregate2,relation=$aggregate,
 | | | type=Long NULL]

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index 39538ea..a6a3cd0 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -282,6 +282,7 @@ target_link_libraries(quickstep_relationaloperators_HashJoinOperator
                       quickstep_types_TypedValue
                       quickstep_types_containers_ColumnVector
                       quickstep_types_containers_ColumnVectorsValueAccessor
+                      quickstep_utility_ColumnVectorCache
                       quickstep_utility_Macros
                       quickstep_utility_lipfilter_LIPFilterAdaptiveProber
                       quickstep_utility_lipfilter_LIPFilterUtil
@@ -331,6 +332,7 @@ target_link_libraries(quickstep_relationaloperators_NestedLoopsJoinOperator
                       quickstep_storage_TupleStorageSubBlock
                       quickstep_storage_ValueAccessor
                       quickstep_types_containers_ColumnVectorsValueAccessor
+                      quickstep_utility_ColumnVectorCache
                       quickstep_utility_Macros
                       tmb)
 target_link_libraries(quickstep_relationaloperators_RebuildWorkOrder

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index 0e75411..ea90828 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -50,6 +50,7 @@
 #include "types/TypedValue.hpp"
 #include "types/containers/ColumnVector.hpp"
 #include "types/containers/ColumnVectorsValueAccessor.hpp"
+#include "utility/ColumnVectorCache.hpp"
 #include "utility/lip_filter/LIPFilterAdaptiveProber.hpp"
 #include "utility/lip_filter/LIPFilterUtil.hpp"
 
@@ -532,6 +533,7 @@ void HashInnerJoinWorkOrder::executeWithoutCopyElision(ValueAccessor *probe_acce
     }
 
     ColumnVectorsValueAccessor temp_result;
+    std::unique_ptr<ColumnVectorCache> cv_cache = std::make_unique<ColumnVectorCache>();
     for (auto selection_cit = selection_.begin();
          selection_cit != selection_.end();
          ++selection_cit) {
@@ -539,8 +541,10 @@ void HashInnerJoinWorkOrder::executeWithoutCopyElision(ValueAccessor *probe_acce
                                                                   build_accessor.get(),
                                                                   probe_relation_id,
                                                                   probe_accessor,
-                                                                  build_block_entry.second));
+                                                                  build_block_entry.second,
+                                                                  cv_cache.get()));
     }
+    cv_cache.reset();
 
     output_destination_->bulkInsertTuples(&temp_result);
   }
@@ -649,12 +653,14 @@ void HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor *probe_accesso
         zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]);
       }
 
+      ColumnVectorCache cv_cache;
       for (const Scalar *scalar : non_trivial_expressions) {
         temp_result.addColumn(scalar->getAllValuesForJoin(build_relation_id,
                                                           build_accessor.get(),
                                                           probe_relation_id,
                                                           probe_accessor,
-                                                          zipped_joined_tuple_ids));
+                                                          zipped_joined_tuple_ids,
+                                                          &cv_cache));
       }
     }
 
@@ -765,13 +771,16 @@ void HashSemiJoinWorkOrder::executeWithResidualPredicate() {
 
   std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
       probe_store.createValueAccessor(&filter));
+
   ColumnVectorsValueAccessor temp_result;
+  std::unique_ptr<ColumnVectorCache> cv_cache = std::make_unique<ColumnVectorCache>();
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
        selection_it != selection_.end();
        ++selection_it) {
     temp_result.addColumn((*selection_it)->getAllValues(
-        probe_accessor_with_filter.get(), &sub_blocks_ref));
+        probe_accessor_with_filter.get(), &sub_blocks_ref, cv_cache.get()));
   }
+  cv_cache.reset();
 
   output_destination_->bulkInsertTuples(&temp_result);
 }
@@ -828,12 +837,15 @@ void HashSemiJoinWorkOrder::executeWithoutResidualPredicate() {
 
   std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
       probe_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
+
   ColumnVectorsValueAccessor temp_result;
+  std::unique_ptr<ColumnVectorCache> cv_cache = std::make_unique<ColumnVectorCache>();
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
        selection_it != selection_.end(); ++selection_it) {
     temp_result.addColumn((*selection_it)->getAllValues(
-        probe_accessor_with_filter.get(), &sub_blocks_ref));
+        probe_accessor_with_filter.get(), &sub_blocks_ref, cv_cache.get()));
   }
+  cv_cache.reset();
 
   output_destination_->bulkInsertTuples(&temp_result);
 }
@@ -886,12 +898,15 @@ void HashAntiJoinWorkOrder::executeWithoutResidualPredicate() {
 
   std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
       probe_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
+
   ColumnVectorsValueAccessor temp_result;
+  std::unique_ptr<ColumnVectorCache> cv_cache = std::make_unique<ColumnVectorCache>();
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
        selection_it != selection_.end(); ++selection_it) {
     temp_result.addColumn((*selection_it)->getAllValues(
-        probe_accessor_with_filter.get(), &sub_blocks_ref));
+        probe_accessor_with_filter.get(), &sub_blocks_ref, cv_cache.get()));
   }
+  cv_cache.reset();
 
   output_destination_->bulkInsertTuples(&temp_result);
 }
@@ -976,14 +991,18 @@ void HashAntiJoinWorkOrder::executeWithResidualPredicate() {
 
   std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
       probe_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
+
   ColumnVectorsValueAccessor temp_result;
+  std::unique_ptr<ColumnVectorCache> cv_cache = std::make_unique<ColumnVectorCache>();
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
        selection_it != selection_.end();
        ++selection_it) {
     temp_result.addColumn(
         (*selection_it)->getAllValues(probe_accessor_with_filter.get(),
-                                      &sub_blocks_ref));
+                                      &sub_blocks_ref,
+                                      cv_cache.get()));
   }
+  cv_cache.reset();
 
   output_destination_->bulkInsertTuples(&temp_result);
 }
@@ -1032,12 +1051,11 @@ void HashOuterJoinWorkOrder::execute() {
            &build_block_entry : *collector.getJoinedTupleMap()) {
     const BlockReference build_block =
         storage_manager_->getBlock(build_block_entry.first, build_relation_);
-    const TupleStorageSubBlock &build_store =
-        build_block->getTupleStorageSubBlock();
+    const TupleStorageSubBlock &build_store = build_block->getTupleStorageSubBlock();
+    std::unique_ptr<ValueAccessor> build_accessor(build_store.createValueAccessor());
 
-    std::unique_ptr<ValueAccessor> build_accessor(
-        build_store.createValueAccessor());
     ColumnVectorsValueAccessor temp_result;
+    std::unique_ptr<ColumnVectorCache> cv_cache = std::make_unique<ColumnVectorCache>();
     for (auto selection_it = selection_.begin();
          selection_it != selection_.end();
          ++selection_it) {
@@ -1047,8 +1065,11 @@ void HashOuterJoinWorkOrder::execute() {
               build_accessor.get(),
               probe_relation_id,
               probe_accessor.get(),
-              build_block_entry.second));
+              build_block_entry.second,
+              cv_cache.get()));
     }
+    cv_cache.reset();
+
     output_destination_->bulkInsertTuples(&temp_result);
   }
 
@@ -1061,8 +1082,9 @@ void HashOuterJoinWorkOrder::execute() {
   if (num_tuples_without_matches > 0) {
     std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
         probe_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
-    ColumnVectorsValueAccessor temp_result;
 
+    ColumnVectorsValueAccessor temp_result;
+    std::unique_ptr<ColumnVectorCache> cv_cache = std::make_unique<ColumnVectorCache>();
     for (std::size_t i = 0; i < selection_.size(); ++i) {
       if (is_selection_on_build_[i]) {
         // NOTE(harshad, jianqiao): The assumption here is that any operation
@@ -1090,9 +1112,12 @@ void HashOuterJoinWorkOrder::execute() {
       } else {
         temp_result.addColumn(
             selection_[i]->getAllValues(probe_accessor_with_filter.get(),
-                                        &sub_blocks_ref));
+                                        &sub_blocks_ref,
+                                        cv_cache.get()));
       }
     }
+    cv_cache.reset();
+
     output_destination_->bulkInsertTuples(&temp_result);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/relational_operators/NestedLoopsJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/NestedLoopsJoinOperator.cpp b/relational_operators/NestedLoopsJoinOperator.cpp
index f17402f..4ef2a70 100644
--- a/relational_operators/NestedLoopsJoinOperator.cpp
+++ b/relational_operators/NestedLoopsJoinOperator.cpp
@@ -38,6 +38,7 @@
 #include "storage/TupleStorageSubBlock.hpp"
 #include "storage/ValueAccessor.hpp"
 #include "types/containers/ColumnVectorsValueAccessor.hpp"
+#include "utility/ColumnVectorCache.hpp"
 
 #include "glog/logging.h"
 
@@ -417,6 +418,7 @@ void NestedLoopsJoinWorkOrder::executeHelper(const TupleStorageSubBlock &left_st
     // evaluation and data movement, but low enough that temporary memory
     // requirements don't get out of hand).
     ColumnVectorsValueAccessor temp_result;
+    std::unique_ptr<ColumnVectorCache> cv_cache = std::make_unique<ColumnVectorCache>();
     for (vector<unique_ptr<const Scalar>>::const_iterator selection_cit = selection_.begin();
          selection_cit != selection_.end();
          ++selection_cit) {
@@ -424,8 +426,10 @@ void NestedLoopsJoinWorkOrder::executeHelper(const TupleStorageSubBlock &left_st
                                                                   left_accessor.get(),
                                                                   right_input_relation_id,
                                                                   right_accessor.get(),
-                                                                  joined_tuple_ids));
+                                                                  joined_tuple_ids,
+                                                                  cv_cache.get()));
     }
+    cv_cache.reset();
 
     output_destination_->bulkInsertTuples(&temp_result);
   }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/storage/AggregationOperationState.cpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.cpp b/storage/AggregationOperationState.cpp
index 90543c4..e5dc93e 100644
--- a/storage/AggregationOperationState.cpp
+++ b/storage/AggregationOperationState.cpp
@@ -57,6 +57,7 @@
 #include "types/containers/ColumnVector.hpp"
 #include "types/containers/ColumnVectorsValueAccessor.hpp"
 #include "types/containers/Tuple.hpp"
+#include "utility/ColumnVectorCache.hpp"
 #include "utility/lip_filter/LIPFilterAdaptiveProber.hpp"
 
 #include "gflags/gflags.h"
@@ -491,9 +492,10 @@ void AggregationOperationState::aggregateBlock(const block_id input_block,
     SubBlocksReference sub_blocks_ref(tuple_store,
                                       block->getIndices(),
                                       block->getIndicesConsistent());
+    ColumnVectorCache cv_cache;
     for (const auto &expression : non_trivial_expressions_) {
       non_trivial_results->addColumn(
-          expression->getAllValues(accessor, &sub_blocks_ref));
+          expression->getAllValues(accessor, &sub_blocks_ref, &cv_cache));
     }
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/storage/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt
index cb1f098..c3db584 100644
--- a/storage/CMakeLists.txt
+++ b/storage/CMakeLists.txt
@@ -297,6 +297,7 @@ target_link_libraries(quickstep_storage_AggregationOperationState
                       quickstep_types_containers_ColumnVector
                       quickstep_types_containers_ColumnVectorsValueAccessor
                       quickstep_types_containers_Tuple
+                      quickstep_utility_ColumnVectorCache
                       quickstep_utility_Macros
                       quickstep_utility_lipfilter_LIPFilterAdaptiveProber)
 target_link_libraries(quickstep_storage_AggregationOperationState_proto
@@ -961,6 +962,7 @@ target_link_libraries(quickstep_storage_StorageBlock
                       quickstep_types_containers_ColumnVectorsValueAccessor
                       quickstep_types_containers_Tuple
                       quickstep_types_operations_comparisons_ComparisonUtil
+                      quickstep_utility_ColumnVectorCache
                       quickstep_utility_Macros
                       quickstep_utility_PtrVector)
 # CMAKE_VALIDATE_IGNORE_BEGIN
@@ -1100,6 +1102,7 @@ target_link_libraries(quickstep_storage_WindowAggregationOperationState
                       quickstep_types_containers_ColumnVector
                       quickstep_types_containers_ColumnVectorUtil
                       quickstep_types_containers_ColumnVectorsValueAccessor
+                      quickstep_utility_ColumnVectorCache
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_storage_WindowAggregationOperationState_proto
                       quickstep_expressions_Expressions_proto

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/storage/StorageBlock.cpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.cpp b/storage/StorageBlock.cpp
index e91c1ac..31f1db2 100644
--- a/storage/StorageBlock.cpp
+++ b/storage/StorageBlock.cpp
@@ -56,6 +56,7 @@
 #include "types/containers/ColumnVectorsValueAccessor.hpp"
 #include "types/containers/Tuple.hpp"
 #include "types/operations/comparisons/ComparisonUtil.hpp"
+#include "utility/ColumnVectorCache.hpp"
 #include "utility/Macros.hpp"
 
 #include "glog/logging.h"
@@ -369,15 +370,18 @@ void StorageBlock::select(const vector<unique_ptr<const Scalar>> &selection,
                                       indices_,
                                       indices_consistent_);
 
-    std::unique_ptr<ValueAccessor> accessor(
-        tuple_store_->createValueAccessor(filter));
+    std::unique_ptr<ValueAccessor> accessor(tuple_store_->createValueAccessor(filter));
+    ColumnVectorCache cv_cache;
 
     for (vector<unique_ptr<const Scalar>>::const_iterator selection_cit = selection.begin();
          selection_cit != selection.end();
          ++selection_cit) {
       // TODO(chasseur): Can probably elide some copies for parts of the
       // selection that are ScalarAttribute or ScalarLiteral.
-      temp_result.addColumn((*selection_cit)->getAllValues(accessor.get(), &sub_blocks_ref));
+      temp_result.addColumn(
+          (*selection_cit)->getAllValues(accessor.get(),
+                                         &sub_blocks_ref,
+                                         &cv_cache));
     }
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/storage/WindowAggregationOperationState.cpp
----------------------------------------------------------------------
diff --git a/storage/WindowAggregationOperationState.cpp b/storage/WindowAggregationOperationState.cpp
index 58bdf18..30d91db 100644
--- a/storage/WindowAggregationOperationState.cpp
+++ b/storage/WindowAggregationOperationState.cpp
@@ -46,6 +46,7 @@
 #include "types/containers/ColumnVector.hpp"
 #include "types/containers/ColumnVectorsValueAccessor.hpp"
 #include "types/containers/ColumnVectorUtil.hpp"
+#include "utility/ColumnVectorCache.hpp"
 
 #include "glog/logging.h"
 
@@ -236,11 +237,16 @@ void WindowAggregationOperationState::windowAggregateBlocks(
       argument_accessor = new ColumnVectorsValueAccessor();
     }
 
+    std::unique_ptr<ColumnVectorCache> cv_cache = std::make_unique<ColumnVectorCache>();
     for (const std::unique_ptr<const Scalar> &argument : arguments_) {
       argument_accessor->addColumn(argument->getAllValues(tuple_accessor,
-                                                          &sub_block_ref));
+                                                          &sub_block_ref,
+                                                          cv_cache.get()));
     }
 
+    // Release common subexpression cache as early as possible.
+    cv_cache.reset();
+
     InvokeOnAnyValueAccessor(tuple_accessor,
                              [&] (auto *tuple_accessor) -> void {  // NOLINT(build/c++11)
       tuple_accessor->beginIteration();

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/types/containers/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/types/containers/CMakeLists.txt b/types/containers/CMakeLists.txt
index c2a6623..97841c2 100644
--- a/types/containers/CMakeLists.txt
+++ b/types/containers/CMakeLists.txt
@@ -42,8 +42,7 @@ target_link_libraries(quickstep_types_containers_ColumnVectorsValueAccessor
                       quickstep_types_TypedValue
                       quickstep_types_containers_ColumnVector
                       quickstep_types_containers_Tuple
-                      quickstep_utility_Macros
-                      quickstep_utility_ScopedDeleter)
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_types_containers_Tuple
                       glog
                       quickstep_catalog_CatalogTypedefs

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/types/containers/ColumnVector.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVector.hpp b/types/containers/ColumnVector.hpp
index fc65656..5ef9871 100644
--- a/types/containers/ColumnVector.hpp
+++ b/types/containers/ColumnVector.hpp
@@ -43,6 +43,9 @@ namespace quickstep {
 // TODO(chasseur): Look into ways to allocate ColumnVector memory from the
 // StorageManager.
 
+class ColumnVector;
+typedef std::shared_ptr<const ColumnVector> ColumnVectorPtr;
+
 /**
  * @brief A vector of values of the same type. Two implementations exist:
  *        NativeColumnVector (an array of fixed-size data elements) and
@@ -107,6 +110,13 @@ class ColumnVector {
    **/
   virtual bool isNative() const = 0;
 
+  /**
+   * @brief Get the number of values in this ColumnVector.
+   *
+   * @return The number of values in this ColumnVector.
+   **/
+  virtual std::size_t size() const = 0;
+
  protected:
   const Type &type_;
 
@@ -176,7 +186,7 @@ class NativeColumnVector : public ColumnVector {
    *
    * @return The number of values in this NativeColumnVector.
    **/
-  inline std::size_t size() const {
+  inline std::size_t size() const override {
     return actual_length_;
   }
 
@@ -436,7 +446,7 @@ class IndirectColumnVector : public ColumnVector {
    *
    * @return The number of values in this IndirectColumnVector.
    **/
-  inline std::size_t size() const {
+  inline std::size_t size() const override {
     return values_.size();
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/types/containers/ColumnVectorsValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVectorsValueAccessor.hpp b/types/containers/ColumnVectorsValueAccessor.hpp
index 6dc1124..ebd46d4 100644
--- a/types/containers/ColumnVectorsValueAccessor.hpp
+++ b/types/containers/ColumnVectorsValueAccessor.hpp
@@ -32,7 +32,6 @@
 #include "types/containers/ColumnVector.hpp"
 #include "types/containers/Tuple.hpp"
 #include "utility/Macros.hpp"
-#include "utility/ScopedDeleter.hpp"
 
 #include "glog/logging.h"
 
@@ -74,22 +73,17 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
    *             this value-accessor is responsible for freeing this column
    *             vector.
    **/
-  void addColumn(ColumnVector *column, const bool owns = true) {
+  void addColumn(ColumnVectorPtr column) {
     // If this is not the first column to be added, make sure it is the same
     // length as the others.
-    DCHECK(columns_.empty()
-           || (column->isNative()
-               ? (static_cast<const NativeColumnVector*>(column)->size() == column_length_)
-               : (static_cast<const IndirectColumnVector*>(column)->size() == column_length_)));
+    DCHECK(columns_.empty() || column->size() == column_length_);
     columns_.push_back(column);
     column_native_.push_back(column->isNative());
-    if (owns) {
-      deleter_.addObject(column);
-    }
-    column_length_
-        = column->isNative()
-          ? static_cast<const NativeColumnVector*>(column)->size()
-          : static_cast<const IndirectColumnVector*>(column)->size();
+    column_length_ = column->size();
+  }
+
+  void addColumn(ColumnVector *column) {
+    addColumn(ColumnVectorPtr(column));
   }
 
   inline void beginIteration() {
@@ -309,11 +303,10 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
            && (static_cast<std::vector<ColumnVector*>::size_type>(attr_id) < columns_.size());
   }
 
-  std::vector<ColumnVector*> columns_;
+  std::vector<ColumnVectorPtr> columns_;
   std::vector<bool> column_native_;
   std::size_t column_length_;
   std::size_t current_position_;
-  ScopedDeleter deleter_;
 
   DISALLOW_COPY_AND_ASSIGN(ColumnVectorsValueAccessor);
 };

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/types/operations/binary_operations/AddBinaryOperation.hpp
----------------------------------------------------------------------
diff --git a/types/operations/binary_operations/AddBinaryOperation.hpp b/types/operations/binary_operations/AddBinaryOperation.hpp
index bc862bf..2309563 100644
--- a/types/operations/binary_operations/AddBinaryOperation.hpp
+++ b/types/operations/binary_operations/AddBinaryOperation.hpp
@@ -51,6 +51,10 @@ class AddBinaryOperation : public ArithmeticBinaryOperation {
     return instance;
   }
 
+  bool isCommutative() const override {
+    return true;
+  }
+
   bool canApplyToTypes(const Type &left,
                        const Type &right) const override;
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/types/operations/binary_operations/BinaryOperation.hpp
----------------------------------------------------------------------
diff --git a/types/operations/binary_operations/BinaryOperation.hpp b/types/operations/binary_operations/BinaryOperation.hpp
index 585a1c6..bc8a083 100644
--- a/types/operations/binary_operations/BinaryOperation.hpp
+++ b/types/operations/binary_operations/BinaryOperation.hpp
@@ -335,6 +335,19 @@ class BinaryOperation : public Operation {
   }
 
   /**
+   * @brief Whether this binary operation is commutative.
+   *
+   * @note The commutative property provides more optimization opportunities,
+   *       e.g. common subexpression elimination. Meanwhile it is always safe
+   *       to assume that a binary operation is not commutative.
+   *
+   * @return True if this binary operation is commutative; false otherwise.
+   */
+  virtual bool isCommutative() const {
+    return false;
+  }
+
+  /**
    * @brief Determine whether this BinaryOperation can apply to the specified
    *        Types.
    * @note When the Types that an operator can apply to are changed,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/types/operations/binary_operations/MultiplyBinaryOperation.hpp
----------------------------------------------------------------------
diff --git a/types/operations/binary_operations/MultiplyBinaryOperation.hpp b/types/operations/binary_operations/MultiplyBinaryOperation.hpp
index 6edc999..cc005e2 100644
--- a/types/operations/binary_operations/MultiplyBinaryOperation.hpp
+++ b/types/operations/binary_operations/MultiplyBinaryOperation.hpp
@@ -51,6 +51,10 @@ class MultiplyBinaryOperation : public ArithmeticBinaryOperation {
     return instance;
   }
 
+  bool isCommutative() const override {
+    return true;
+  }
+
   bool canApplyToTypes(const Type &left,
                        const Type &right) const override;
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index ca04462..e1fb770 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -171,6 +171,7 @@ add_library(quickstep_utility_BloomFilter_proto
 add_library(quickstep_utility_CalculateInstalledMemory CalculateInstalledMemory.cpp CalculateInstalledMemory.hpp)
 add_library(quickstep_utility_Cast ../empty_src.cpp Cast.hpp)
 add_library(quickstep_utility_CheckSnprintf ../empty_src.cpp CheckSnprintf.hpp)
+add_library(quickstep_utility_ColumnVectorCache ../empty_src.cpp ColumnVectorCache.hpp)
 add_library(quickstep_utility_CompositeHash ../empty_src.cpp CompositeHash.hpp)
 add_library(quickstep_utility_BarrieredReadWriteConcurrentBitVector
             ../empty_src.cpp
@@ -182,6 +183,7 @@ add_library(quickstep_utility_ExecutionDAGVisualizer
             ExecutionDAGVisualizer.cpp
             ExecutionDAGVisualizer.hpp)
 add_library(quickstep_utility_Glob Glob.cpp Glob.hpp)
+add_library(quickstep_utility_HashError ../empty_src.cpp HashError.hpp)
 add_library(quickstep_utility_HashPair ../empty_src.cpp HashPair.hpp)
 add_library(quickstep_utility_Macros ../empty_src.cpp Macros.hpp)
 add_library(quickstep_utility_MemStream ../empty_src.cpp MemStream.hpp)
@@ -237,6 +239,9 @@ target_link_libraries(quickstep_utility_CalculateInstalledMemory
                       glog)
 target_link_libraries(quickstep_utility_CheckSnprintf
                       glog)
+target_link_libraries(quickstep_utility_ColumnVectorCache
+                      quickstep_types_containers_ColumnVector
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_utility_CompositeHash
                       quickstep_types_TypedValue
                       quickstep_utility_HashPair
@@ -344,12 +349,14 @@ target_link_libraries(quickstep_utility
                       quickstep_utility_CalculateInstalledMemory
                       quickstep_utility_Cast
                       quickstep_utility_CheckSnprintf
+                      quickstep_utility_ColumnVectorCache
                       quickstep_utility_CompositeHash
                       quickstep_utility_DAG
                       quickstep_utility_DisjointTreeForest
                       quickstep_utility_EqualsAnyConstant
                       quickstep_utility_ExecutionDAGVisualizer
                       quickstep_utility_Glob
+                      quickstep_utility_HashError
                       quickstep_utility_HashPair
                       quickstep_utility_Macros
                       quickstep_utility_MemStream

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/utility/ColumnVectorCache.hpp
----------------------------------------------------------------------
diff --git a/utility/ColumnVectorCache.hpp b/utility/ColumnVectorCache.hpp
new file mode 100644
index 0000000..46e742c
--- /dev/null
+++ b/utility/ColumnVectorCache.hpp
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_COLUMN_VECTOR_CACHE_HPP_
+#define QUICKSTEP_UTILITY_COLUMN_VECTOR_CACHE_HPP_
+
+#include <unordered_map>
+
+#include "types/containers/ColumnVector.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Expressions
+ *  @{
+ */
+
+/**
+ * @brief A memoization table for column vectors parameterized on integer IDs.
+ **/
+class ColumnVectorCache {
+ public:
+  /**
+   * @brief Constructor.
+   */
+  ColumnVectorCache() {}
+
+  /**
+   * @brief Check whether this cache contains the column vector parameterized on
+   *        the given ID.
+   *
+   * @param share_id The integer ID for the column vector.
+   * @return True if this cache contains the column vector; false otherwise.
+   */
+  inline bool contains(const int share_id) const {
+    return cv_cache_.find(share_id) != cv_cache_.end();
+  }
+
+  /**
+   * @brief Get the cached column vector parameterized on the given ID.
+   *
+   * @param share_id The integer ID for the column vector.
+   * @return The cached column vector.
+   */
+  inline ColumnVectorPtr get(const int share_id) const {
+    DCHECK(contains(share_id));
+    return cv_cache_.at(share_id);
+  }
+
+  /**
+   * @brief Cache the column vector with the given ID as parameter.
+   *
+   * @param share_id The integer ID for the column vector.
+   * @param cv The column vector to be cached.
+   */
+  inline void set(const int share_id, const ColumnVectorPtr &cv) {
+    DCHECK(!contains(share_id));
+    cv_cache_.emplace(share_id, cv);
+  }
+
+ private:
+  std::unordered_map<int, ColumnVectorPtr> cv_cache_;
+
+  DISALLOW_COPY_AND_ASSIGN(ColumnVectorCache);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_UTILITY_COLUMN_VECTOR_CACHE_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9f48a4a7/utility/HashError.hpp
----------------------------------------------------------------------
diff --git a/utility/HashError.hpp b/utility/HashError.hpp
new file mode 100644
index 0000000..fda54d7
--- /dev/null
+++ b/utility/HashError.hpp
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_HASH_ERROR_HPP_
+#define QUICKSTEP_UTILITY_HASH_ERROR_HPP_
+
+#include <exception>
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+/**
+ * @brief Exception thrown for non-supported hash().
+ **/
+class HashNotSupported : public std::exception {
+ public:
+  /**
+   * @brief Constructor.
+   *
+   * @param message The error message.
+   **/
+  HashNotSupported(const std::string &message)
+      : message_(message) {}
+
+  /**
+   * @brief Destructor.
+   */
+  ~HashNotSupported() throw() {}
+
+  virtual const char* what() const throw() {
+    return message_.c_str();
+  }
+
+ private:
+  const std::string message_;
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_UTILITY_HASH_ERROR_HPP_