You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2017/04/12 19:36:04 UTC

[1/5] incubator-quickstep git commit: Implement optimizer and execution layers for UNION and INTERSECT. [Forced Update!]

Repository: incubator-quickstep
Updated Branches:
  refs/heads/common-subexpression cc1a86fc4 -> b0acc9ce9 (forced update)


Implement optimizer and execution layers for UNION and INTERSECT.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/563abe04
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/563abe04
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/563abe04

Branch: refs/heads/common-subexpression
Commit: 563abe0430af0e26f571cf07ba03079232c59fd3
Parents: 5b7b5cb
Author: Tianrun <Ti...@node-0.tianrun-qv23700.quickstep-pg0.wisc.cloudlab.us>
Authored: Mon Apr 10 10:21:11 2017 -0600
Committer: Tianrun <Ti...@node-0.tianrun-qv23700.quickstep-pg0.wisc.cloudlab.us>
Committed: Tue Apr 11 23:52:18 2017 -0600

----------------------------------------------------------------------
 query_optimizer/CMakeLists.txt                  |   2 +
 query_optimizer/ExecutionGenerator.cpp          |  68 +++++
 query_optimizer/ExecutionGenerator.hpp          |   8 +
 query_optimizer/cost_model/CMakeLists.txt       |   2 +
 query_optimizer/cost_model/SimpleCostModel.cpp  |  13 +
 query_optimizer/cost_model/SimpleCostModel.hpp  |   6 +
 .../cost_model/StarSchemaSimpleCostModel.cpp    |  12 +
 .../cost_model/StarSchemaSimpleCostModel.hpp    |   6 +-
 query_optimizer/physical/CMakeLists.txt         |  10 +
 query_optimizer/physical/PhysicalType.hpp       |   1 +
 query_optimizer/physical/UnionAll.hpp           | 188 +++++++++++++
 query_optimizer/resolver/Resolver.cpp           |   6 +-
 query_optimizer/strategy/CMakeLists.txt         |   5 +
 query_optimizer/strategy/Join.cpp               |  42 +++
 query_optimizer/strategy/OneToOne.cpp           |  30 ++
 .../tests/execution_generator/Select.test       |  86 ++++++
 .../tests/physical_generator/Select.test        | 280 +++++++++++++++++++
 query_optimizer/tests/resolver/CMakeLists.txt   |   4 +
 .../tests/resolver/SetOperation.test            | 185 ++++++++++++
 relational_operators/CMakeLists.txt             |  16 ++
 relational_operators/RelationalOperator.hpp     |   1 +
 relational_operators/UnionAllOperator.cpp       | 144 ++++++++++
 relational_operators/UnionAllOperator.hpp       | 228 +++++++++++++++
 23 files changed, 1339 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 9bdb753..08b6467 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -113,6 +113,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
                       quickstep_queryoptimizer_physical_TableGenerator
                       quickstep_queryoptimizer_physical_TableReference
                       quickstep_queryoptimizer_physical_TopLevelPlan
+                      quickstep_queryoptimizer_physical_UnionAll
                       quickstep_queryoptimizer_physical_UpdateTable
                       quickstep_queryoptimizer_physical_WindowAggregate
                       quickstep_relationaloperators_AggregationOperator
@@ -138,6 +139,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
                       quickstep_relationaloperators_SortRunGenerationOperator
                       quickstep_relationaloperators_TableGeneratorOperator
                       quickstep_relationaloperators_TextScanOperator
+                      quickstep_relationaloperators_UnionAllOperator
                       quickstep_relationaloperators_UpdateOperator
                       quickstep_relationaloperators_WindowAggregationOperator
                       quickstep_storage_AggregationOperationState_proto

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 6fec85b..3e0f647 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -94,6 +94,7 @@
 #include "query_optimizer/physical/TableGenerator.hpp"
 #include "query_optimizer/physical/TableReference.hpp"
 #include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/physical/UnionAll.hpp"
 #include "query_optimizer/physical/UpdateTable.hpp"
 #include "query_optimizer/physical/WindowAggregate.hpp"
 #include "relational_operators/AggregationOperator.hpp"
@@ -119,6 +120,7 @@
 #include "relational_operators/SortRunGenerationOperator.hpp"
 #include "relational_operators/TableGeneratorOperator.hpp"
 #include "relational_operators/TextScanOperator.hpp"
+#include "relational_operators/UnionAllOperator.hpp"
 #include "relational_operators/UpdateOperator.hpp"
 #include "relational_operators/WindowAggregationOperator.hpp"
 #include "storage/AggregationOperationState.pb.h"
@@ -313,6 +315,9 @@ void ExecutionGenerator::generatePlanInternal(
     case P::PhysicalType::kTableReference:
       return convertTableReference(
           std::static_pointer_cast<const P::TableReference>(physical_plan));
+    case P::PhysicalType::kUnionAll:
+      return convertUnionAll(
+          std::static_pointer_cast<const P::UnionAll>(physical_plan));
     case P::PhysicalType::kUpdateTable:
       return convertUpdateTable(
           std::static_pointer_cast<const P::UpdateTable>(physical_plan));
@@ -1386,6 +1391,69 @@ void ExecutionGenerator::convertInsertSelection(
                                        false /* is_pipeline_breaker */);
 }
 
+void ExecutionGenerator::convertUnionAll(
+    const P::UnionAllPtr &physical_unionall) {
+  const CatalogRelation *output_relation = nullptr;
+  const QueryContext::insert_destination_id insert_destination_index =
+      query_context_proto_->insert_destinations_size();
+  S::InsertDestination *insert_destination_proto =
+      query_context_proto_->add_insert_destinations();
+  createTemporaryCatalogRelation(physical_unionall,
+                                 &output_relation,
+                                 insert_destination_proto);
+
+  const std::vector<P::PhysicalPtr> &operands = physical_unionall->operands();
+  std::vector<const CatalogRelation*> input_relations;
+  std::vector<bool> is_stored_relation;
+  std::vector<std::vector<attribute_id>> select_attribute_ids;
+  std::vector<QueryPlan::DAGNodeIndex> dependency_operator_index;
+
+  for (const auto &operand : operands) {
+    const CatalogRelationInfo *input_relation_info =
+        findRelationInfoOutputByPhysical(operand);
+    DCHECK(input_relation_info != nullptr);
+    input_relations.push_back(input_relation_info->relation);
+    is_stored_relation.push_back(input_relation_info->isStoredRelation());
+    dependency_operator_index.push_back(input_relation_info->producer_operator_index);
+
+    const QueryContext::scalar_group_id project_expressions_group_index =
+        query_context_proto_->scalar_groups_size();
+    convertNamedExpressions(
+        E::ToNamedExpressions(operand->getOutputAttributes()),
+        query_context_proto_->add_scalar_groups());
+    std::vector<attribute_id> select_attribute_id;
+    convertSimpleProjection(project_expressions_group_index, &select_attribute_id);
+    select_attribute_ids.push_back(std::move(select_attribute_id));
+  }
+
+  UnionAllOperator *union_all =
+      new UnionAllOperator(query_handle_->query_id(),
+                           input_relations,
+                           *output_relation,
+                           insert_destination_index,
+                           is_stored_relation,
+                           select_attribute_ids);
+
+  const QueryPlan::DAGNodeIndex union_all_index =
+      execution_plan_->addRelationalOperator(union_all);
+  insert_destination_proto->set_relational_op_index(union_all_index);
+
+  for (std::size_t relation_id = 0; relation_id < is_stored_relation.size(); ++relation_id) {
+    if (!is_stored_relation[relation_id]) {
+      execution_plan_->addDirectDependency(union_all_index,
+                                           dependency_operator_index[relation_id],
+                                           false /* is_pipeline_breaker */);
+    }
+  }
+
+  physical_to_output_relation_map_.emplace(
+      std::piecewise_construct,
+      std::forward_as_tuple(physical_unionall),
+      std::forward_as_tuple(union_all_index,
+                            output_relation));
+  temporary_relation_info_vec_.emplace_back(union_all_index, output_relation);
+}
+
 void ExecutionGenerator::convertUpdateTable(
     const P::UpdateTablePtr &physical_plan) {
   // UpdateTable is converted to an Update and a SaveBlocks.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp
index f4e614a..19e75c1 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -62,6 +62,7 @@
 #include "query_optimizer/physical/TableGenerator.hpp"
 #include "query_optimizer/physical/TableReference.hpp"
 #include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/physical/UnionAll.hpp"
 #include "query_optimizer/physical/UpdateTable.hpp"
 #include "query_optimizer/physical/WindowAggregate.hpp"
 #include "utility/Macros.hpp"
@@ -328,6 +329,13 @@ class ExecutionGenerator {
   void convertInsertTuple(const physical::InsertTuplePtr &physical_plan);
 
   /**
+   * @brief Converts a physical UnionAll to an UnionAll operator.
+   *
+   * @param physical_plan The UnionAll to be converted.
+   */
+  void convertUnionAll(const physical::UnionAllPtr &physical_plan);
+
+  /**
    * @brief Converts an UpdateTable to an Update and a SaveBlocks.
    *
    * @param physical_plan The UpdateTable to be converted.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/cost_model/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/CMakeLists.txt b/query_optimizer/cost_model/CMakeLists.txt
index 4042915..3d4ee93 100644
--- a/query_optimizer/cost_model/CMakeLists.txt
+++ b/query_optimizer/cost_model/CMakeLists.txt
@@ -45,6 +45,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_SimpleCostModel
                       quickstep_queryoptimizer_physical_TableGenerator
                       quickstep_queryoptimizer_physical_TableReference
                       quickstep_queryoptimizer_physical_TopLevelPlan
+                      quickstep_queryoptimizer_physical_UnionAll
                       quickstep_queryoptimizer_physical_WindowAggregate
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
@@ -79,6 +80,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostMod
                       quickstep_queryoptimizer_physical_TableGenerator
                       quickstep_queryoptimizer_physical_TableReference
                       quickstep_queryoptimizer_physical_TopLevelPlan
+                      quickstep_queryoptimizer_physical_UnionAll
                       quickstep_queryoptimizer_physical_WindowAggregate
                       quickstep_types_NullType
                       quickstep_types_Type

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/cost_model/SimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/SimpleCostModel.cpp b/query_optimizer/cost_model/SimpleCostModel.cpp
index cfd8a75..cc46c00 100644
--- a/query_optimizer/cost_model/SimpleCostModel.cpp
+++ b/query_optimizer/cost_model/SimpleCostModel.cpp
@@ -38,6 +38,7 @@
 #include "query_optimizer/physical/TableGenerator.hpp"
 #include "query_optimizer/physical/TableReference.hpp"
 #include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/physical/UnionAll.hpp"
 #include "query_optimizer/physical/WindowAggregate.hpp"
 
 #include "glog/logging.h"
@@ -90,6 +91,9 @@ std::size_t SimpleCostModel::estimateCardinality(
     case P::PhysicalType::kWindowAggregate:
       return estimateCardinalityForWindowAggregate(
           std::static_pointer_cast<const P::WindowAggregate>(physical_plan));
+    case P::PhysicalType::kUnionAll:
+      return estimateCardinalityForUnionAll(
+          std::static_pointer_cast<const P::UnionAll>(physical_plan));
     default:
       throw UnsupportedPhysicalPlan(physical_plan);
   }
@@ -163,6 +167,15 @@ std::size_t SimpleCostModel::estimateCardinalityForWindowAggregate(
   return estimateCardinality(physical_plan->input());
 }
 
+std::size_t SimpleCostModel::estimateCardinalityForUnionAll(
+    const physical::UnionAllPtr &physical_plan) {
+  std::size_t cardinality = 0;
+  for (const P::PhysicalPtr &operand : physical_plan->operands()) {
+    cardinality += estimateCardinality(operand);
+  }
+  return cardinality;
+}
+
 }  // namespace cost
 }  // namespace optimizer
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/cost_model/SimpleCostModel.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/SimpleCostModel.hpp b/query_optimizer/cost_model/SimpleCostModel.hpp
index 0660c37..653e115 100644
--- a/query_optimizer/cost_model/SimpleCostModel.hpp
+++ b/query_optimizer/cost_model/SimpleCostModel.hpp
@@ -35,6 +35,7 @@
 #include "query_optimizer/physical/TableGenerator.hpp"
 #include "query_optimizer/physical/TableReference.hpp"
 #include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/physical/UnionAll.hpp"
 #include "query_optimizer/physical/WindowAggregate.hpp"
 #include "utility/Macros.hpp"
 
@@ -109,6 +110,11 @@ class SimpleCostModel : public CostModel {
   std::size_t estimateCardinalityForWindowAggregate(
       const physical::WindowAggregatePtr &physical_plan);
 
+  // Return the estimated cardinality of union all operation,
+  // which is the sum of the cardinality of all children operators.
+  std::size_t estimateCardinalityForUnionAll(
+      const physical::UnionAllPtr &physical_plan);
+
   const std::vector<physical::PhysicalPtr> &shared_subplans_;
 
   DISALLOW_COPY_AND_ASSIGN(SimpleCostModel);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index fc775c7..b17fac0 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -55,6 +55,7 @@
 #include "query_optimizer/physical/TableGenerator.hpp"
 #include "query_optimizer/physical/TableReference.hpp"
 #include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/physical/UnionAll.hpp"
 #include "types/Type.hpp"
 #include "types/TypeID.hpp"
 #include "types/TypedValue.hpp"
@@ -118,6 +119,9 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinality(
     case P::PhysicalType::kWindowAggregate:
       return estimateCardinalityForWindowAggregate(
           std::static_pointer_cast<const P::WindowAggregate>(physical_plan));
+    case P::PhysicalType::kUnionAll:
+      return estimateCardinalityForUnionAll(
+          std::static_pointer_cast<const P::UnionAll>(physical_plan));
     default:
       throw UnsupportedPhysicalPlan(physical_plan);
   }
@@ -203,6 +207,14 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinalityForWindowAggregate(
   return estimateCardinality(physical_plan->input());
 }
 
+std::size_t StarSchemaSimpleCostModel::estimateCardinalityForUnionAll(
+    const P::UnionAllPtr &physical_plan) {
+  std::size_t cardinality = 0;
+  for (const P::PhysicalPtr &operand : physical_plan->operands()) {
+    cardinality += estimateCardinality(operand);
+  }
+  return cardinality;
+}
 
 std::size_t StarSchemaSimpleCostModel::estimateNumGroupsForAggregate(
     const physical::AggregatePtr &aggregate) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
index afb2ef9..0461077 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
@@ -39,6 +39,7 @@
 #include "query_optimizer/physical/TableGenerator.hpp"
 #include "query_optimizer/physical/TableReference.hpp"
 #include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/physical/UnionAll.hpp"
 #include "query_optimizer/physical/WindowAggregate.hpp"
 #include "types/TypedValue.hpp"
 #include "utility/Macros.hpp"
@@ -82,7 +83,7 @@ class StarSchemaSimpleCostModel : public CostModel {
 
   /**
    * @brief Estimate the number of distinct values of an attribute in a relation.
-   * 
+   *
    * @param attribute_id The expression id of the target attribute.
    * @param physical_plan The physical plan of the attribute's relation.
    * @return The estimated number of distinct values for the attribute.
@@ -217,6 +218,9 @@ class StarSchemaSimpleCostModel : public CostModel {
   std::size_t estimateCardinalityForWindowAggregate(
       const physical::WindowAggregatePtr &physical_plan);
 
+  std::size_t estimateCardinalityForUnionAll(
+      const physical::UnionAllPtr &physical_plan);
+
   double estimateSelectivityForPredicate(
       const expressions::PredicatePtr &filter_predicate,
       const physical::PhysicalPtr &physical_plan);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/physical/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/CMakeLists.txt b/query_optimizer/physical/CMakeLists.txt
index 77ae75e..2751c6e 100644
--- a/query_optimizer/physical/CMakeLists.txt
+++ b/query_optimizer/physical/CMakeLists.txt
@@ -45,6 +45,7 @@ add_library(quickstep_queryoptimizer_physical_Sort Sort.cpp Sort.hpp)
 add_library(quickstep_queryoptimizer_physical_TableGenerator ../../empty_src.cpp TableGenerator.hpp)
 add_library(quickstep_queryoptimizer_physical_TableReference TableReference.cpp TableReference.hpp)
 add_library(quickstep_queryoptimizer_physical_TopLevelPlan TopLevelPlan.cpp TopLevelPlan.hpp)
+add_library(quickstep_queryoptimizer_physical_UnionAll ../../empty_src.cpp UnionAll.hpp)
 add_library(quickstep_queryoptimizer_physical_UpdateTable UpdateTable.cpp UpdateTable.hpp)
 add_library(quickstep_queryoptimizer_physical_WindowAggregate WindowAggregate.cpp WindowAggregate.hpp)
 
@@ -274,6 +275,14 @@ target_link_libraries(quickstep_queryoptimizer_physical_TopLevelPlan
                       quickstep_queryoptimizer_physical_PhysicalType
                       quickstep_utility_Cast
                       quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_physical_UnionAll
+                      quickstep_queryoptimizer_OptimizerTree
+                      quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_queryoptimizer_expressions_ExpressionUtil
+                      quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_physical_PhysicalType
+                      quickstep_utility_Cast
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_physical_UpdateTable
                       glog
                       quickstep_queryoptimizer_OptimizerTree
@@ -326,5 +335,6 @@ target_link_libraries(quickstep_queryoptimizer_physical
                       quickstep_queryoptimizer_physical_TableGenerator
                       quickstep_queryoptimizer_physical_TableReference
                       quickstep_queryoptimizer_physical_TopLevelPlan
+                      quickstep_queryoptimizer_physical_UnionAll
                       quickstep_queryoptimizer_physical_UpdateTable
                       quickstep_queryoptimizer_physical_WindowAggregate)

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/physical/PhysicalType.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/PhysicalType.hpp b/query_optimizer/physical/PhysicalType.hpp
index 077bd54..47db7ec 100644
--- a/query_optimizer/physical/PhysicalType.hpp
+++ b/query_optimizer/physical/PhysicalType.hpp
@@ -51,6 +51,7 @@ enum class PhysicalType {
   kTableGenerator,
   kTableReference,
   kTopLevelPlan,
+  kUnionAll,
   kUpdateTable,
   kWindowAggregate
 };

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/physical/UnionAll.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/UnionAll.hpp b/query_optimizer/physical/UnionAll.hpp
new file mode 100644
index 0000000..939249f
--- /dev/null
+++ b/query_optimizer/physical/UnionAll.hpp
@@ -0,0 +1,188 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_UNION_ALL_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_UNION_ALL_HPP_
+
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "query_optimizer/OptimizerTree.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "utility/Cast.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+namespace optimizer {
+namespace physical {
+
+/** \addtogroup OptimizerPhysical
+ *  @{
+ */
+
+class UnionAll;
+typedef std::shared_ptr<const UnionAll> UnionAllPtr;
+
+/**
+ * @brief Perform Union All operation on query results.
+ */
+class UnionAll : public Physical {
+ public:
+  ~UnionAll() override {}
+
+  PhysicalType getPhysicalType() const override {
+    return PhysicalType::kUnionAll;
+  }
+
+  std::string getName() const override {
+    return "UnionAll";
+  }
+
+  /**
+   * @return The operands for the UnionALl operator.
+   */
+  const std::vector<PhysicalPtr>& operands() const {
+    return operands_;
+  }
+
+  PhysicalPtr copyWithNewChildren(
+      const std::vector<PhysicalPtr> &new_children) const override {
+    return Create(new_children, project_attributes_);
+  }
+
+  std::vector<expressions::AttributeReferencePtr> getOutputAttributes() const override {
+    return project_attributes_;
+  }
+
+  std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override {
+    std::vector<expressions::AttributeReferencePtr> referenced_attributes;
+    for (const PhysicalPtr &operand : operands_) {
+      const std::vector<expressions::AttributeReferencePtr> reference =
+          operand->getOutputAttributes();
+      referenced_attributes.insert(referenced_attributes.end(),
+                                   reference.begin(),
+                                   reference.end());
+    }
+    return referenced_attributes;
+  }
+
+  bool maybeCopyWithPrunedExpressions(
+      const expressions::UnorderedNamedExpressionSet &referenced_expressions,
+      PhysicalPtr *output) const override {
+    std::vector<std::size_t> expression_index;
+    for (std::size_t i = 0; i < project_attributes_.size(); ++i) {
+      if (referenced_expressions.find(project_attributes_[i]) != referenced_expressions.end()) {
+        expression_index.push_back(i);
+      }
+    }
+
+    if (expression_index.size() == project_attributes_.size()) {
+      return false;
+    }
+
+    std::vector<PhysicalPtr> new_operands;
+    for (std::size_t i = 0; i < operands_.size(); ++i) {
+      // Currently only prune when all children are select.
+      if (operands_[i]->getPhysicalType() != PhysicalType::kSelection) {
+        return false;
+      }
+
+      const auto child_output_attrs = operands_[i]->getOutputAttributes();
+      expressions::UnorderedNamedExpressionSet child_referenced_attrs;
+      for (const std::size_t idx : expression_index) {
+        child_referenced_attrs.emplace(child_output_attrs[idx]);
+      }
+      PhysicalPtr new_operand;
+      if (!operands_[i]->maybeCopyWithPrunedExpressions(
+              child_referenced_attrs, &new_operand)) {
+          return false;
+      }
+      DCHECK_EQ(expression_index.size(), new_operand->getOutputAttributes().size());
+      new_operands.push_back(new_operand);
+    }
+    std::vector<expressions::AttributeReferencePtr> new_project_attributes;
+    for (const std::size_t idx : expression_index) {
+      new_project_attributes.emplace_back(project_attributes_[idx]);
+    }
+    *output = Create(new_operands, new_project_attributes);
+    return true;
+  }
+
+  /**
+   * @brief Creates the physical node of UnionAll.
+   *
+   * @param operands The children physical nodes of UnionAll.
+   * @param project_attributes The project attributes of this UnionAll.
+   * @return An immutable UnionAll node.
+   */
+  static UnionAllPtr Create(
+      const std::vector<PhysicalPtr> &operands,
+      const std::vector<expressions::AttributeReferencePtr> &project_attributes) {
+    return UnionAllPtr(
+        new UnionAll(operands, project_attributes));
+  }
+
+ protected:
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<OptimizerTreeBaseNodePtr> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<OptimizerTreeBaseNodePtr>> *container_child_fields) const override {
+    container_child_field_names->emplace_back("operands");
+    container_child_fields->emplace_back(
+        CastSharedPtrVector<OptimizerTreeBase>(operands_));
+
+    container_child_field_names->emplace_back("project_attributes");
+    container_child_fields->emplace_back(
+        CastSharedPtrVector<OptimizerTreeBase>(project_attributes_));
+  }
+
+ private:
+  UnionAll(const std::vector<PhysicalPtr> &operands,
+           const std::vector<expressions::AttributeReferencePtr> &project_attributes)
+      : operands_(operands),
+        project_attributes_(project_attributes) {
+    for (const PhysicalPtr &operand : operands) {
+      addChild(operand);
+    }
+  }
+
+  const std::vector<PhysicalPtr> operands_;
+  const std::vector<expressions::AttributeReferencePtr> project_attributes_;
+
+  DISALLOW_COPY_AND_ASSIGN(UnionAll);
+};
+
+/** @} */
+
+}  // namespace physical
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_UNION_ALL_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/resolver/Resolver.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/resolver/Resolver.cpp b/query_optimizer/resolver/Resolver.cpp
index 2d3a06b..0f65255 100644
--- a/query_optimizer/resolver/Resolver.cpp
+++ b/query_optimizer/resolver/Resolver.cpp
@@ -1416,13 +1416,13 @@ L::LogicalPtr Resolver::resolveSetOperations(
             THROW_SQL_ERROR_AT(&parse_set_operations)
                 << "There is not a safely coerce between "
                 << current_type.getName()
-                << "and " << possible_type.getName();
+                << " and " << possible_type.getName();
           }
         } else {
           THROW_SQL_ERROR_AT(&parse_set_operations)
-              << "Does not support cast operation between non-numeric types"
+              << "Does not support cast operation with non-numeric types "
               << current_type.getName()
-              << "and " << possible_type.getName();
+              << " and " << possible_type.getName();
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/strategy/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/strategy/CMakeLists.txt b/query_optimizer/strategy/CMakeLists.txt
index 002fa9b..e3e6d76 100644
--- a/query_optimizer/strategy/CMakeLists.txt
+++ b/query_optimizer/strategy/CMakeLists.txt
@@ -59,6 +59,8 @@ target_link_libraries(quickstep_queryoptimizer_strategy_Join
                       quickstep_queryoptimizer_logical_NestedLoopsJoin
                       quickstep_queryoptimizer_logical_PatternMatcher
                       quickstep_queryoptimizer_logical_Project
+                      quickstep_queryoptimizer_logical_SetOperation
+                      quickstep_queryoptimizer_physical_Aggregate
                       quickstep_queryoptimizer_physical_HashJoin
                       quickstep_queryoptimizer_physical_NestedLoopsJoin
                       quickstep_queryoptimizer_physical_PatternMatcher
@@ -83,6 +85,7 @@ target_link_libraries(quickstep_queryoptimizer_strategy_OneToOne
                       quickstep_queryoptimizer_logical_Logical
                       quickstep_queryoptimizer_logical_LogicalType
                       quickstep_queryoptimizer_logical_Sample
+                      quickstep_queryoptimizer_logical_SetOperation
                       quickstep_queryoptimizer_logical_SharedSubplanReference
                       quickstep_queryoptimizer_logical_Sort
                       quickstep_queryoptimizer_logical_TableGenerator
@@ -90,6 +93,7 @@ target_link_libraries(quickstep_queryoptimizer_strategy_OneToOne
                       quickstep_queryoptimizer_logical_TopLevelPlan
                       quickstep_queryoptimizer_logical_UpdateTable
                       quickstep_queryoptimizer_logical_WindowAggregate
+                      quickstep_queryoptimizer_physical_Aggregate
                       quickstep_queryoptimizer_physical_CopyFrom
                       quickstep_queryoptimizer_physical_CreateIndex
                       quickstep_queryoptimizer_physical_CreateTable
@@ -104,6 +108,7 @@ target_link_libraries(quickstep_queryoptimizer_strategy_OneToOne
                       quickstep_queryoptimizer_physical_TableGenerator
                       quickstep_queryoptimizer_physical_TableReference
                       quickstep_queryoptimizer_physical_TopLevelPlan
+                      quickstep_queryoptimizer_physical_UnionAll
                       quickstep_queryoptimizer_physical_UpdateTable
                       quickstep_queryoptimizer_physical_WindowAggregate
                       quickstep_queryoptimizer_strategy_Strategy

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/strategy/Join.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/strategy/Join.cpp b/query_optimizer/strategy/Join.cpp
index cd01bd1..f7d6d24 100644
--- a/query_optimizer/strategy/Join.cpp
+++ b/query_optimizer/strategy/Join.cpp
@@ -37,6 +37,8 @@
 #include "query_optimizer/logical/NestedLoopsJoin.hpp"
 #include "query_optimizer/logical/PatternMatcher.hpp"
 #include "query_optimizer/logical/Project.hpp"
+#include "query_optimizer/logical/SetOperation.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
 #include "query_optimizer/physical/HashJoin.hpp"
 #include "query_optimizer/physical/NestedLoopsJoin.hpp"
 #include "query_optimizer/physical/PatternMatcher.hpp"
@@ -61,6 +63,7 @@ bool Join::generatePlan(const L::LogicalPtr &logical_input,
   L::FilterPtr logical_filter;
   L::HashJoinPtr logical_hash_join;
   L::NestedLoopsJoinPtr logical_nested_loops_join;
+  L::SetOperationPtr logical_set_operation;
 
   // Collapse project-join.
   if (L::SomeProject::MatchesWithConditionalCast(logical_input, &logical_project)) {
@@ -137,6 +140,45 @@ bool Join::generatePlan(const L::LogicalPtr &logical_input,
     }
   }
 
+  // Convert set operations.
+  if (L::SomeSetOperation::MatchesWithConditionalCast(logical_input, &logical_set_operation)) {
+    if (logical_set_operation->getSetOperationType() !=  L::SetOperation::kIntersect) {
+      // Union and UnionAll operations are in OneToOne.cpp.
+      return false;
+    }
+
+    // For Intersect operation, convert it into a physical hash semi join.
+    const std::vector<L::LogicalPtr> &operands = logical_set_operation->getOperands();
+
+    DCHECK_GE(operands.size(), 2u);
+    L::LogicalPtr intermediate = operands[0];
+    for (std::size_t i = 1; i < operands.size(); ++i) {
+      intermediate = L::HashJoin::Create(intermediate,
+                                         operands[i],
+                                         intermediate->getOutputAttributes(),
+                                         operands[i]->getOutputAttributes(),
+                                         nullptr /* residual_predicate */,
+                                         L::HashJoin::JoinType::kLeftSemiJoin);
+    }
+
+    const std::vector<E::NamedExpressionPtr> project_expressions =
+        E::ToNamedExpressions(operands[0]->getOutputAttributes());
+    logical_project = L::Project::Create(intermediate,
+                                         project_expressions);
+
+    P::PhysicalPtr physical_hash_join;
+    addHashJoin(logical_project,
+                nullptr /* logical_filter */,
+                std::static_pointer_cast<const L::HashJoin>(intermediate),
+                &physical_hash_join);
+
+    *physical_output = P::Aggregate::Create(physical_hash_join,
+                                            project_expressions,
+                                            {} /* aggregate_expressions */,
+                                            nullptr /* filter_predicate */);
+    return true;
+  }
+
   // Convert a single binary join.
   if (L::SomeHashJoin::MatchesWithConditionalCast(logical_input, &logical_hash_join)) {
     addHashJoin(nullptr /* logical_project */,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/strategy/OneToOne.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/strategy/OneToOne.cpp b/query_optimizer/strategy/OneToOne.cpp
index 7d0c4cb..af4e150 100644
--- a/query_optimizer/strategy/OneToOne.cpp
+++ b/query_optimizer/strategy/OneToOne.cpp
@@ -35,6 +35,7 @@
 #include "query_optimizer/logical/InsertTuple.hpp"
 #include "query_optimizer/logical/LogicalType.hpp"
 #include "query_optimizer/logical/Sample.hpp"
+#include "query_optimizer/logical/SetOperation.hpp"
 #include "query_optimizer/logical/SharedSubplanReference.hpp"
 #include "query_optimizer/logical/Sort.hpp"
 #include "query_optimizer/logical/TableGenerator.hpp"
@@ -42,6 +43,7 @@
 #include "query_optimizer/logical/TopLevelPlan.hpp"
 #include "query_optimizer/logical/UpdateTable.hpp"
 #include "query_optimizer/logical/WindowAggregate.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
 #include "query_optimizer/physical/CopyFrom.hpp"
 #include "query_optimizer/physical/CreateIndex.hpp"
 #include "query_optimizer/physical/CreateTable.hpp"
@@ -55,6 +57,7 @@
 #include "query_optimizer/physical/TableGenerator.hpp"
 #include "query_optimizer/physical/TableReference.hpp"
 #include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/physical/UnionAll.hpp"
 #include "query_optimizer/physical/UpdateTable.hpp"
 #include "query_optimizer/physical/WindowAggregate.hpp"
 
@@ -164,6 +167,33 @@ bool OneToOne::generatePlan(const L::LogicalPtr &logical_input,
           sample->percentage());
       return true;
     }
+    case L::LogicalType::kSetOperation: {
+      const L::SetOperationPtr set_operation =
+          std::static_pointer_cast<const L::SetOperation>(logical_input);
+      std::vector<P::PhysicalPtr> physical_operands;
+      for (const L::LogicalPtr &logical : set_operation->getOperands()) {
+        physical_operands.push_back(physical_mapper_->createOrGetPhysicalFromLogical(logical));
+      }
+      if (set_operation->getSetOperationType() == L::SetOperation::kUnionAll) {
+        // For UnionAll operation, convert it into a physical UnionAll.
+        *physical_output = P::UnionAll::Create(physical_operands,
+                                               set_operation->getOutputAttributes());
+        return true;
+      } else if (set_operation->getSetOperationType() == L::SetOperation::kUnion) {
+        // For Union operation, convert it into a physical UnionAll followed by an Aggregate.
+        P::PhysicalPtr union_all = P::UnionAll::Create(physical_operands,
+                                                       set_operation->getOutputAttributes());
+        *physical_output = P::Aggregate::Create(
+            union_all,
+            E::ToNamedExpressions(set_operation->getOutputAttributes()),
+            {} /* aggregate_expression */,
+            nullptr /* filter_predicate */);
+        return true;
+      } else {
+        // INTERSECT is in Join.cpp
+        return false;
+      }
+    }
     case L::LogicalType::kSort: {
       const L::Sort *sort =
           static_cast<const L::Sort*>(logical_input.get());

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/tests/execution_generator/Select.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/execution_generator/Select.test b/query_optimizer/tests/execution_generator/Select.test
index 494e759..b3aaaa9 100644
--- a/query_optimizer/tests/execution_generator/Select.test
+++ b/query_optimizer/tests/execution_generator/Select.test
@@ -1072,3 +1072,89 @@ WINDOW w AS
 +------------------------+
 |                     -18|
 +------------------------+
+==
+
+SELECT int_col AS result FROM test
+WHERE int_col < 5
+UNION
+SELECT int_col + 3 AS result FROM test
+WHERE int_col < 5;
+--
++-----------+
+|result     |
++-----------+
+|         -1|
+|          2|
+|         -3|
+|          4|
+|         -5|
+|         -7|
+|         -9|
+|        -11|
+|        -13|
+|        -15|
+|        -17|
+|        -19|
+|        -21|
+|        -23|
+|          5|
+|          0|
+|          7|
+|         -2|
+|         -4|
+|         -6|
+|         -8|
+|        -10|
+|        -12|
+|        -14|
+|        -16|
+|        -18|
+|        -20|
++-----------+
+==
+
+SELECT i + 1 AS result
+FROM generate_series(1, 5) AS gs(i)
+INTERSECT
+SELECT i * 2 AS result
+FROM generate_series(1, 5) AS gs(i)
+--
++-----------+
+|result     |
++-----------+
+|          2|
+|          4|
+|          6|
++-----------+
+==
+
+SELECT float_col FROM test
+WHERE int_col < 5
+ORDER BY int_col DESC
+LIMIT 5
+UNION ALL
+SELECT float_col FROM test
+WHERE int_col < 5
+ORDER BY int_col DESC
+LIMIT 10
+--
++---------------+
+|float_col      |
++---------------+
+|              2|
+|     1.41421354|
+|              1|
+|     1.73205078|
+|     2.23606801|
+|              2|
+|     1.41421354|
+|              1|
+|     1.73205078|
+|     2.23606801|
+|     2.64575124|
+|              3|
+|     3.31662488|
+|     3.60555124|
+|     3.87298346|
++---------------+
+==

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/tests/physical_generator/Select.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/physical_generator/Select.test b/query_optimizer/tests/physical_generator/Select.test
index f81cad8..f7de922 100644
--- a/query_optimizer/tests/physical_generator/Select.test
+++ b/query_optimizer/tests/physical_generator/Select.test
@@ -3140,3 +3140,283 @@ TopLevelPlan
   +-AttributeReference[id=7,name=,alias=sum(avg(int_col)),relation=,
     type=Double NULL]
 ==
+
+SELECT int_col FROM test
+UNION
+SELECT int_col FROM test
+--
+[Optimized Logical Plan]
+TopLevelPlan
++-plan=Union[set_operation_type=Union]
+| +-operands=
+| | +-Project
+| | | +-input=TableReference[relation_name=Test,relation_alias=test]
+| | | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | | |   type=VarChar(20) NULL]
+| | | +-project_list=
+| | |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-Project
+| |   +-input=TableReference[relation_name=Test,relation_alias=test]
+| |   | +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| |   | +-AttributeReference[id=7,name=long_col,relation=test,type=Long]
+| |   | +-AttributeReference[id=8,name=float_col,relation=test,type=Float]
+| |   | +-AttributeReference[id=9,name=double_col,relation=test,type=Double NULL]
+| |   | +-AttributeReference[id=10,name=char_col,relation=test,type=Char(20)]
+| |   | +-AttributeReference[id=11,name=vchar_col,relation=test,
+| |   |   type=VarChar(20) NULL]
+| |   +-project_list=
+| |     +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| +-project_attributes=
+|   +-AttributeReference[id=12,name=int_col,relation=,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=12,name=int_col,relation=,type=Int NULL]
+[Physical Plan]
+TopLevelPlan
++-plan=Aggregate
+| +-input=UnionAll
+| | +-operands=
+| | | +-Selection
+| | | | +-input=TableReference[relation=Test,alias=test]
+| | | | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | | | +-AttributeReference[id=3,name=double_col,relation=test,
+| | | | | | type=Double NULL]
+| | | | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | | | |   type=VarChar(20) NULL]
+| | | | +-project_expressions=
+| | | |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-Selection
+| | |   +-input=TableReference[relation=Test,alias=test]
+| | |   | +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| | |   | +-AttributeReference[id=7,name=long_col,relation=test,type=Long]
+| | |   | +-AttributeReference[id=8,name=float_col,relation=test,type=Float]
+| | |   | +-AttributeReference[id=9,name=double_col,relation=test,
+| | |   | | type=Double NULL]
+| | |   | +-AttributeReference[id=10,name=char_col,relation=test,type=Char(20)]
+| | |   | +-AttributeReference[id=11,name=vchar_col,relation=test,
+| | |   |   type=VarChar(20) NULL]
+| | |   +-project_expressions=
+| | |     +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| | +-project_attributes=
+| |   +-AttributeReference[id=12,name=int_col,relation=,type=Int NULL]
+| +-grouping_expressions=
+| | +-AttributeReference[id=12,name=int_col,relation=,type=Int NULL]
+| +-aggregate_expressions=
+|   +-[]
++-output_attributes=
+  +-AttributeReference[id=12,name=int_col,relation=,type=Int NULL]
+==
+
+SELECT intv FROM
+  (SELECT int_col, double_col FROM test
+   UNION ALL
+   SELECT int_col, double_col FROM test
+  ) AS temp(intv, doublev)
+--
+[Optimized Logical Plan]
+TopLevelPlan
++-plan=Project
+| +-input=UnionAll[set_operation_type=UnionAll]
+| | +-operands=
+| | | +-Project
+| | | | +-input=TableReference[relation_name=Test,relation_alias=test]
+| | | | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | | | +-AttributeReference[id=3,name=double_col,relation=test,
+| | | | | | type=Double NULL]
+| | | | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | | | |   type=VarChar(20) NULL]
+| | | | +-project_list=
+| | | |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | |   +-AttributeReference[id=3,name=double_col,relation=test,
+| | | |     type=Double NULL]
+| | | +-Project
+| | |   +-input=TableReference[relation_name=Test,relation_alias=test]
+| | |   | +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| | |   | +-AttributeReference[id=7,name=long_col,relation=test,type=Long]
+| | |   | +-AttributeReference[id=8,name=float_col,relation=test,type=Float]
+| | |   | +-AttributeReference[id=9,name=double_col,relation=test,
+| | |   | | type=Double NULL]
+| | |   | +-AttributeReference[id=10,name=char_col,relation=test,type=Char(20)]
+| | |   | +-AttributeReference[id=11,name=vchar_col,relation=test,
+| | |   |   type=VarChar(20) NULL]
+| | |   +-project_list=
+| | |     +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| | |     +-AttributeReference[id=9,name=double_col,relation=test,
+| | |       type=Double NULL]
+| | +-project_attributes=
+| |   +-AttributeReference[id=12,name=int_col,relation=,type=Int NULL]
+| |   +-AttributeReference[id=13,name=double_col,relation=,type=Double NULL]
+| +-project_list=
+|   +-Alias[id=14,name=intv,relation=,type=Int NULL]
+|     +-AttributeReference[id=12,name=int_col,relation=,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=14,name=intv,relation=,type=Int NULL]
+[Physical Plan]
+TopLevelPlan
++-plan=Selection
+| +-input=UnionAll
+| | +-operands=
+| | | +-Selection
+| | | | +-input=TableReference[relation=Test,alias=test]
+| | | | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | | | +-AttributeReference[id=3,name=double_col,relation=test,
+| | | | | | type=Double NULL]
+| | | | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | | | |   type=VarChar(20) NULL]
+| | | | +-project_expressions=
+| | | |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-Selection
+| | |   +-input=TableReference[relation=Test,alias=test]
+| | |   | +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| | |   | +-AttributeReference[id=7,name=long_col,relation=test,type=Long]
+| | |   | +-AttributeReference[id=8,name=float_col,relation=test,type=Float]
+| | |   | +-AttributeReference[id=9,name=double_col,relation=test,
+| | |   | | type=Double NULL]
+| | |   | +-AttributeReference[id=10,name=char_col,relation=test,type=Char(20)]
+| | |   | +-AttributeReference[id=11,name=vchar_col,relation=test,
+| | |   |   type=VarChar(20) NULL]
+| | |   +-project_expressions=
+| | |     +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| | +-project_attributes=
+| |   +-AttributeReference[id=12,name=int_col,relation=,type=Int NULL]
+| +-project_expressions=
+|   +-Alias[id=14,name=intv,relation=,type=Int NULL]
+|     +-AttributeReference[id=12,name=int_col,relation=,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=14,name=intv,relation=,type=Int NULL]
+==
+
+SELECT int_col FROM test
+INTERSECT
+SELECT intv FROM
+  (SELECT int_col, double_col FROM test
+   UNION ALL
+   SELECT int_col, double_col FROM test
+  ) AS temp(intv, doublev)
+--
+[Optimized Logical Plan]
+TopLevelPlan
++-plan=Intersect[set_operation_type=Intersect]
+| +-operands=
+| | +-Project
+| | | +-input=TableReference[relation_name=Test,relation_alias=test]
+| | | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | | |   type=VarChar(20) NULL]
+| | | +-project_list=
+| | |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-Project
+| |   +-input=UnionAll[set_operation_type=UnionAll]
+| |   | +-operands=
+| |   | | +-Project
+| |   | | | +-input=TableReference[relation_name=Test,relation_alias=test]
+| |   | | | | +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| |   | | | | +-AttributeReference[id=7,name=long_col,relation=test,type=Long]
+| |   | | | | +-AttributeReference[id=8,name=float_col,relation=test,type=Float]
+| |   | | | | +-AttributeReference[id=9,name=double_col,relation=test,
+| |   | | | | | type=Double NULL]
+| |   | | | | +-AttributeReference[id=10,name=char_col,relation=test,
+| |   | | | | | type=Char(20)]
+| |   | | | | +-AttributeReference[id=11,name=vchar_col,relation=test,
+| |   | | | |   type=VarChar(20) NULL]
+| |   | | | +-project_list=
+| |   | | |   +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| |   | | |   +-AttributeReference[id=9,name=double_col,relation=test,
+| |   | | |     type=Double NULL]
+| |   | | +-Project
+| |   | |   +-input=TableReference[relation_name=Test,relation_alias=test]
+| |   | |   | +-AttributeReference[id=12,name=int_col,relation=test,
+| |   | |   | | type=Int NULL]
+| |   | |   | +-AttributeReference[id=13,name=long_col,relation=test,type=Long]
+| |   | |   | +-AttributeReference[id=14,name=float_col,relation=test,type=Float]
+| |   | |   | +-AttributeReference[id=15,name=double_col,relation=test,
+| |   | |   | | type=Double NULL]
+| |   | |   | +-AttributeReference[id=16,name=char_col,relation=test,
+| |   | |   | | type=Char(20)]
+| |   | |   | +-AttributeReference[id=17,name=vchar_col,relation=test,
+| |   | |   |   type=VarChar(20) NULL]
+| |   | |   +-project_list=
+| |   | |     +-AttributeReference[id=12,name=int_col,relation=test,
+| |   | |     | type=Int NULL]
+| |   | |     +-AttributeReference[id=15,name=double_col,relation=test,
+| |   | |       type=Double NULL]
+| |   | +-project_attributes=
+| |   |   +-AttributeReference[id=18,name=int_col,relation=,type=Int NULL]
+| |   |   +-AttributeReference[id=19,name=double_col,relation=,type=Double NULL]
+| |   +-project_list=
+| |     +-Alias[id=20,name=intv,relation=,type=Int NULL]
+| |       +-AttributeReference[id=18,name=int_col,relation=,type=Int NULL]
+| +-project_attributes=
+|   +-AttributeReference[id=22,name=int_col,relation=,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=22,name=int_col,relation=,type=Int NULL]
+[Physical Plan]
+TopLevelPlan
++-plan=Aggregate
+| +-input=HashLeftSemiJoin
+| | +-left=TableReference[relation=Test,alias=test]
+| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | |   type=VarChar(20) NULL]
+| | +-right=UnionAll
+| | | +-operands=
+| | | | +-Selection
+| | | | | +-input=TableReference[relation=Test,alias=test]
+| | | | | | +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| | | | | | +-AttributeReference[id=7,name=long_col,relation=test,type=Long]
+| | | | | | +-AttributeReference[id=8,name=float_col,relation=test,type=Float]
+| | | | | | +-AttributeReference[id=9,name=double_col,relation=test,
+| | | | | | | type=Double NULL]
+| | | | | | +-AttributeReference[id=10,name=char_col,relation=test,type=Char(20)]
+| | | | | | +-AttributeReference[id=11,name=vchar_col,relation=test,
+| | | | | |   type=VarChar(20) NULL]
+| | | | | +-project_expressions=
+| | | | |   +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| | | | +-Selection
+| | | |   +-input=TableReference[relation=Test,alias=test]
+| | | |   | +-AttributeReference[id=12,name=int_col,relation=test,type=Int NULL]
+| | | |   | +-AttributeReference[id=13,name=long_col,relation=test,type=Long]
+| | | |   | +-AttributeReference[id=14,name=float_col,relation=test,type=Float]
+| | | |   | +-AttributeReference[id=15,name=double_col,relation=test,
+| | | |   | | type=Double NULL]
+| | | |   | +-AttributeReference[id=16,name=char_col,relation=test,type=Char(20)]
+| | | |   | +-AttributeReference[id=17,name=vchar_col,relation=test,
+| | | |   |   type=VarChar(20) NULL]
+| | | |   +-project_expressions=
+| | | |     +-AttributeReference[id=12,name=int_col,relation=test,type=Int NULL]
+| | | +-project_attributes=
+| | |   +-AttributeReference[id=18,name=int_col,relation=,type=Int NULL]
+| | +-project_expressions=
+| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-left_join_attributes=
+| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-right_join_attributes=
+| |   +-AttributeReference[id=18,name=int_col,relation=,type=Int NULL]
+| +-grouping_expressions=
+| | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| +-aggregate_expressions=
+|   +-[]
++-output_attributes=
+  +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+==

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/tests/resolver/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/resolver/CMakeLists.txt b/query_optimizer/tests/resolver/CMakeLists.txt
index 3101b43..5350543 100644
--- a/query_optimizer/tests/resolver/CMakeLists.txt
+++ b/query_optimizer/tests/resolver/CMakeLists.txt
@@ -51,6 +51,10 @@ add_test(quickstep_queryoptimizer_tests_resolver_select
          "../quickstep_queryoptimizer_tests_OptimizerTextTest"
          "${CMAKE_CURRENT_SOURCE_DIR}/Select.test"
          "${CMAKE_CURRENT_BINARY_DIR}/Select.test")
+add_test(quickstep_queryoptimizer_tests_resolver_setoperation
+         "../quickstep_queryoptimizer_tests_OptimizerTextTest"
+         "${CMAKE_CURRENT_SOURCE_DIR}/SetOperation.test"
+         "${CMAKE_CURRENT_BINARY_DIR}/SetOperation.test")
 add_test(quickstep_queryoptimizer_tests_resolver_update
          "../quickstep_queryoptimizer_tests_OptimizerTextTest"
          "${CMAKE_CURRENT_SOURCE_DIR}/Update.test"

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/query_optimizer/tests/resolver/SetOperation.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/resolver/SetOperation.test b/query_optimizer/tests/resolver/SetOperation.test
new file mode 100644
index 0000000..f4bff6d
--- /dev/null
+++ b/query_optimizer/tests/resolver/SetOperation.test
@@ -0,0 +1,185 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[default initial_logical_plan]
+SELECT int_col FROM test
+UNION
+SELECT int_col FROM test
+--
+TopLevelPlan
++-plan=Union[set_operation_type=Union]
+| +-operands=
+| | +-Project
+| | | +-input=Project
+| | | | +-input=TableReference[relation_name=Test,relation_alias=test]
+| | | | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | | | +-AttributeReference[id=3,name=double_col,relation=test,
+| | | | | | type=Double NULL]
+| | | | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | | | |   type=VarChar(20) NULL]
+| | | | +-project_list=
+| | | |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-project_list=
+| | |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-Project
+| |   +-input=TableReference[relation_name=Test,relation_alias=test]
+| |   | +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| |   | +-AttributeReference[id=7,name=long_col,relation=test,type=Long]
+| |   | +-AttributeReference[id=8,name=float_col,relation=test,type=Float]
+| |   | +-AttributeReference[id=9,name=double_col,relation=test,type=Double NULL]
+| |   | +-AttributeReference[id=10,name=char_col,relation=test,type=Char(20)]
+| |   | +-AttributeReference[id=11,name=vchar_col,relation=test,
+| |   |   type=VarChar(20) NULL]
+| |   +-project_list=
+| |     +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| +-project_attributes=
+|   +-AttributeReference[id=12,name=int_col,relation=,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=12,name=int_col,relation=,type=Int NULL]
+==
+
+SELECT double_col FROM test
+UNION ALL
+SELECT int_col FROM test
+--
+TopLevelPlan
++-plan=UnionAll[set_operation_type=UnionAll]
+| +-operands=
+| | +-Project
+| | | +-input=Project
+| | | | +-input=TableReference[relation_name=Test,relation_alias=test]
+| | | | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | | | +-AttributeReference[id=3,name=double_col,relation=test,
+| | | | | | type=Double NULL]
+| | | | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | | | |   type=VarChar(20) NULL]
+| | | | +-project_list=
+| | | |   +-AttributeReference[id=3,name=double_col,relation=test,
+| | | |     type=Double NULL]
+| | | +-project_list=
+| | |   +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | +-Project
+| |   +-input=TableReference[relation_name=Test,relation_alias=test]
+| |   | +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| |   | +-AttributeReference[id=7,name=long_col,relation=test,type=Long]
+| |   | +-AttributeReference[id=8,name=float_col,relation=test,type=Float]
+| |   | +-AttributeReference[id=9,name=double_col,relation=test,type=Double NULL]
+| |   | +-AttributeReference[id=10,name=char_col,relation=test,type=Char(20)]
+| |   | +-AttributeReference[id=11,name=vchar_col,relation=test,
+| |   |   type=VarChar(20) NULL]
+| |   +-project_list=
+| |     +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| +-project_attributes=
+|   +-AttributeReference[id=12,name=double_col,relation=,type=Double NULL]
++-output_attributes=
+  +-AttributeReference[id=12,name=double_col,relation=,type=Double NULL]
+==
+
+SELECT int_col, double_col FROM test
+UNION
+SELECT int_col, int_col FROM test
+--
+TopLevelPlan
++-plan=Union[set_operation_type=Union]
+| +-operands=
+| | +-Project
+| | | +-input=Project
+| | | | +-input=TableReference[relation_name=Test,relation_alias=test]
+| | | | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | | | +-AttributeReference[id=3,name=double_col,relation=test,
+| | | | | | type=Double NULL]
+| | | | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | | | |   type=VarChar(20) NULL]
+| | | | +-project_list=
+| | | |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | |   +-AttributeReference[id=3,name=double_col,relation=test,
+| | | |     type=Double NULL]
+| | | +-project_list=
+| | |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | |   +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | +-Project
+| |   +-input=Project
+| |   | +-input=TableReference[relation_name=Test,relation_alias=test]
+| |   | | +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| |   | | +-AttributeReference[id=7,name=long_col,relation=test,type=Long]
+| |   | | +-AttributeReference[id=8,name=float_col,relation=test,type=Float]
+| |   | | +-AttributeReference[id=9,name=double_col,relation=test,
+| |   | | | type=Double NULL]
+| |   | | +-AttributeReference[id=10,name=char_col,relation=test,type=Char(20)]
+| |   | | +-AttributeReference[id=11,name=vchar_col,relation=test,
+| |   | |   type=VarChar(20) NULL]
+| |   | +-project_list=
+| |   |   +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| |   |   +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| |   +-project_list=
+| |     +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| |     +-Alias[id=12,name=int_col,relation=,type=Double NULL]
+| |       +-Cast[target_type=Double NULL]
+| |         +-operand=AttributeReference[id=6,name=int_col,relation=test,
+| |           type=Int NULL]
+| +-project_attributes=
+|   +-AttributeReference[id=13,name=int_col,relation=,type=Int NULL]
+|   +-AttributeReference[id=14,name=double_col,relation=,type=Double NULL]
++-output_attributes=
+  +-AttributeReference[id=13,name=int_col,relation=,type=Int NULL]
+  +-AttributeReference[id=14,name=double_col,relation=,type=Double NULL]
+==
+
+SELECT float_col FROM test
+INTERSECT
+SELECT long_col FROM test
+--
+ERROR: There is not a safely coerce between Long and Float (1 : 1)
+SELECT float_col FROM test
+^
+==
+
+SELECT int_col FROM test
+UNION ALL
+SELECT float_col FROM test
+--
+ERROR: There is not a safely coerce between Float and Int NULL (1 : 1)
+SELECT int_col FROM test
+^
+==
+
+SELECT int_col FROM test
+INTERSECT
+SELECT vchar_col FROM test
+--
+ERROR: Does not support cast operation with non-numeric types VarChar(20) NULL and Int NULL (1 : 1)
+SELECT int_col FROM test
+^
+==
+
+SELECT int_col, double_col FROM test
+UNION ALL
+SELECT double_col, vchar_col FROM test
+--
+ERROR: Does not support cast operation with non-numeric types VarChar(20) NULL and Double NULL (1 : 1)
+SELECT int_col, double_col FRO...
+^
+==

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index 1693ec2..4ea809b 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -73,6 +73,7 @@ add_library(quickstep_relationaloperators_SortRunGenerationOperator SortRunGener
             SortRunGenerationOperator.hpp)
 add_library(quickstep_relationaloperators_TableGeneratorOperator TableGeneratorOperator.cpp TableGeneratorOperator.hpp)
 add_library(quickstep_relationaloperators_TextScanOperator TextScanOperator.cpp TextScanOperator.hpp)
+add_library(quickstep_relationaloperators_UnionAllOperator UnionAllOperator.cpp UnionAllOperator.hpp)
 add_library(quickstep_relationaloperators_UpdateOperator UpdateOperator.cpp UpdateOperator.hpp)
 add_library(quickstep_relationaloperators_WindowAggregationOperator WindowAggregationOperator.cpp WindowAggregationOperator.hpp)
 add_library(quickstep_relationaloperators_WorkOrder ../empty_src.cpp WorkOrder.hpp)
@@ -505,6 +506,20 @@ if (QUICKSTEP_HAVE_FILE_MANAGER_HDFS)
   target_link_libraries(quickstep_relationaloperators_TextScanOperator
                         ${LIBHDFS3_LIBRARIES})
 endif(QUICKSTEP_HAVE_FILE_MANAGER_HDFS)
+target_link_libraries(quickstep_relationaloperators_UnionAllOperator
+                      glog
+                      quickstep_catalog_CatalogRelation
+                      quickstep_catalog_CatalogTypedefs
+                      quickstep_queryexecution_QueryContext
+                      quickstep_queryexecution_WorkOrderProtosContainer
+                      quickstep_queryexecution_WorkOrdersContainer
+                      quickstep_relationaloperators_RelationalOperator
+                      quickstep_storage_InsertDestination
+                      quickstep_storage_StorageBlock
+                      quickstep_storage_StorageBlockInfo
+                      quickstep_storage_StorageManager
+                      quickstep_utility_Macros
+                      tmb)
 target_link_libraries(quickstep_relationaloperators_UpdateOperator
                       glog
                       quickstep_catalog_CatalogRelation
@@ -611,6 +626,7 @@ target_link_libraries(quickstep_relationaloperators
                       quickstep_relationaloperators_SortRunGenerationOperator
                       quickstep_relationaloperators_TableGeneratorOperator
                       quickstep_relationaloperators_TextScanOperator
+                      quickstep_relationaloperators_UnionAllOperator
                       quickstep_relationaloperators_UpdateOperator
                       quickstep_relationaloperators_WindowAggregationOperator
                       quickstep_relationaloperators_WorkOrder

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/relational_operators/RelationalOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/RelationalOperator.hpp b/relational_operators/RelationalOperator.hpp
index 0aeb4c9..c568654 100644
--- a/relational_operators/RelationalOperator.hpp
+++ b/relational_operators/RelationalOperator.hpp
@@ -86,6 +86,7 @@ class RelationalOperator {
     kSortRunGeneration,
     kTableGenerator,
     kTextScan,
+    kUnionAll,
     kUpdate,
     kWindowAggregation,
     kMockOperator

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/relational_operators/UnionAllOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/UnionAllOperator.cpp b/relational_operators/UnionAllOperator.cpp
new file mode 100644
index 0000000..141b3cf
--- /dev/null
+++ b/relational_operators/UnionAllOperator.cpp
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "relational_operators/UnionAllOperator.hpp"
+
+#include <cstddef>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "query_execution/QueryContext.hpp"
+#include "query_execution/WorkOrderProtosContainer.hpp"
+#include "query_execution/WorkOrdersContainer.hpp"
+#include "storage/InsertDestination.hpp"
+#include "storage/StorageBlock.hpp"
+#include "storage/StorageBlockInfo.hpp"
+#include "storage/StorageManager.hpp"
+
+#include "glog/logging.h"
+
+#include "tmb/id_typedefs.h"
+
+namespace quickstep {
+
+void UnionAllOperator::feedInputBlock(const block_id input_block_id,
+                                      const relation_id input_relation_id,
+                                      const partition_id part_id) {
+  std::size_t index = relation_id_to_index_.at(input_relation_id);
+  input_relations_block_ids_[index].push_back(input_block_id);
+}
+
+void UnionAllOperator::doneFeedingInputBlocks(const relation_id rel_id) {
+  std::size_t relation_index = relation_id_to_index_.at(rel_id);
+  DCHECK(still_feeding_.find(relation_index) != still_feeding_.end());
+  still_feeding_.erase(relation_index);
+  if (still_feeding_.size() == 0) {
+    done_feeding_input_relation_ = true;
+  }
+}
+
+void UnionAllOperator::addWorkOrdersSingleRelation(
+    WorkOrdersContainer *container,
+    QueryContext *query_context,
+    StorageManager *storage_manager,
+    InsertDestination *output_destination,
+    const std::size_t relation_index) {
+  if (input_relations_are_stored_[relation_index]) {
+    const std::vector<block_id> &all_blocks = input_relations_block_ids_.at(relation_index);
+    for (const block_id input_block_id : all_blocks) {
+      container->addNormalWorkOrder(
+          new UnionAllWorkOrder(
+              query_id_,
+              input_relations_[relation_index],
+              input_block_id,
+              select_attribute_ids_[relation_index],
+              output_destination,
+              storage_manager),
+          op_index_);
+    }
+  } else {
+    std::size_t num_generated = num_workorders_generated_[relation_index];
+    const std::vector<block_id> &all_blocks = input_relations_block_ids_[relation_index];
+    while (num_generated < all_blocks .size()) {
+      container->addNormalWorkOrder(
+          new UnionAllWorkOrder(
+              query_id_,
+              input_relations_[relation_index],
+              all_blocks[num_generated],
+              select_attribute_ids_[relation_index],
+              output_destination,
+              storage_manager),
+          op_index_);
+      ++num_generated;
+    }
+    num_workorders_generated_[relation_index] = num_generated;
+  }
+}
+
+bool UnionAllOperator::getAllWorkOrders(
+    WorkOrdersContainer *container,
+    QueryContext *query_context,
+    StorageManager *storage_manager,
+    const tmb::client_id scheduler_client_id,
+    tmb::MessageBus *bus) {
+  DCHECK(query_context != nullptr);
+
+  InsertDestination *output_destination =
+      query_context->getInsertDestination(output_destination_index_);
+
+  if (!stored_generated_) {
+    for (std::size_t relation_index = 0; relation_index < input_relations_.size(); ++relation_index) {
+      if (input_relations_are_stored_[relation_index]) {
+        addWorkOrdersSingleRelation(container,
+                                    query_context,
+                                    storage_manager,
+                                    output_destination,
+                                    relation_index);
+      }
+    }
+    stored_generated_ = true;
+  }
+
+  for (std::size_t relation_index = 0; relation_index < input_relations_.size(); ++relation_index) {
+    if (!input_relations_are_stored_[relation_index]) {
+       addWorkOrdersSingleRelation(container,
+                                   query_context,
+                                   storage_manager,
+                                   output_destination,
+                                   relation_index);
+    }
+  }
+  return stored_generated_ && done_feeding_input_relation_;
+}
+
+bool UnionAllOperator::getAllWorkOrderProtos(WorkOrderProtosContainer* container) {
+  // TODO(tianrun): Add protobuf for UnionAllWorkOrder to support distributed mode.
+  LOG(FATAL) << "UnionAllOperator is not supported in distributed mode yet.";
+  return true;
+}
+
+void UnionAllWorkOrder::execute() {
+  BlockReference block(
+      storage_manager_->getBlock(input_block_id_, *input_relation_));
+  block->selectSimple(select_attribute_id_,
+                      nullptr,
+                      output_destination_);
+}
+
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/563abe04/relational_operators/UnionAllOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/UnionAllOperator.hpp b/relational_operators/UnionAllOperator.hpp
new file mode 100644
index 0000000..3bfed82
--- /dev/null
+++ b/relational_operators/UnionAllOperator.hpp
@@ -0,0 +1,228 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_RELATIONAL_OPERATORS_UNION_ALL_OPERATOR_HPP_
+#define QUICKSTEP_RELATIONAL_OPERATORS_UNION_ALL_OPERATOR_HPP_
+
+#include <cstddef>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "catalog/CatalogRelation.hpp"
+#include "catalog/CatalogTypedefs.hpp"
+#include "query_execution/QueryContext.hpp"
+#include "relational_operators/RelationalOperator.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+#include "tmb/id_typedefs.h"
+
+namespace tmb { class MessageBus; }
+
+namespace quickstep {
+
+class InsertDestination;
+class StorageManager;
+class WorkOrderProtosContainer;
+class WorkOrdersContainer;
+
+/** \addtogroup RelationalOperators
+ *  @{
+ */
+
+/**
+ * @brief An operator which performs UNION ALL operation to relational tables.
+ **/
+class UnionAllOperator : public RelationalOperator {
+ public:
+  /**
+   * @brief Constructor.
+   *
+   * @param query_id The ID of the query to which this operator belongs.
+   * @param input_relations The input relations to this UnionAll operator.
+   * @param output_relation The output relation.
+   * @param output_destination_index The index of the InsertDestination in the
+   *        QueryContext to insert the results.
+   * @param input_relations_are_stored A boolean vector indicating whether each
+   *        input relation is a stored relation and is fully availabie to the
+   *        operator before it can start generating workorders.
+   * @param select_attribute_ids The attributes for all input relations.
+   **/
+  UnionAllOperator(const std::size_t query_id,
+                   const std::vector<const CatalogRelation*> &input_relations,
+                   const CatalogRelation &output_relation,
+                   const QueryContext::insert_destination_id output_destination_index,
+                   const std::vector<bool> &input_relations_are_stored,
+                   const std::vector<std::vector<attribute_id>> &select_attribute_ids)
+      : RelationalOperator(query_id),
+        input_relations_(input_relations),
+        input_relations_are_stored_(input_relations_are_stored),
+        output_relation_(output_relation),
+        output_destination_index_(output_destination_index),
+        select_attribute_ids_(select_attribute_ids),
+        stored_generated_(false) {
+    // For every input relation, initialize the UnionAll data structure.
+    for (std::size_t i = 0; i < input_relations.size(); i++) {
+      relation_id_to_index_.emplace(input_relations[i]->getID(), i);
+      if (input_relations_are_stored[i]) {
+        input_relations_block_ids_.emplace_back(
+            input_relations[i]->getBlocksSnapshot());
+      } else {
+        input_relations_block_ids_.emplace_back();
+        still_feeding_.insert(i);
+      }
+
+      num_workorders_generated_.push_back(0);
+    }
+  }
+
+  ~UnionAllOperator() override {}
+
+  OperatorType getOperatorType() const override {
+    return kUnionAll;
+  }
+
+  std::string getName() const override {
+    return "UnionAll";
+  }
+
+  QueryContext::insert_destination_id getInsertDestinationID() const override {
+    return output_destination_index_;
+  }
+
+  /**
+   * @return The input relations to this UnionAll operator.
+   */
+  const std::vector<const CatalogRelation*>& input_relations() const {
+    return input_relations_;
+  }
+
+  /**
+   * @return The output relation to this UnionAll operator.
+   */
+  const CatalogRelation& output_relation() const {
+    return output_relation_;
+  }
+
+  const relation_id getOutputRelationID() const override {
+    return output_relation_.getID();
+  }
+
+  void feedInputBlock(const block_id input_block_id,
+                      const relation_id input_relation_id,
+                      const partition_id part_id) override;
+
+  void doneFeedingInputBlocks(const relation_id rel_id) override;
+
+  bool getAllWorkOrders(WorkOrdersContainer *container,
+                        QueryContext *query_context,
+                        StorageManager *storage_manager,
+                        const tmb::client_id scheduler_client_id,
+                        tmb::MessageBus *bus) override;
+
+  bool getAllWorkOrderProtos(WorkOrderProtosContainer *container) override;
+
+ private:
+  // Add work orders for a single relation.
+  void addWorkOrdersSingleRelation(WorkOrdersContainer *container,
+                                   QueryContext *query_context,
+                                   StorageManager *storage_manager,
+                                   InsertDestination *output_destination,
+                                   const std::size_t relation_index);
+
+  const std::vector<const CatalogRelation*> input_relations_;
+  const std::vector<bool> input_relations_are_stored_;
+
+  const CatalogRelation &output_relation_;
+  const QueryContext::insert_destination_id output_destination_index_;
+
+  const std::vector<std::vector<attribute_id>> select_attribute_ids_;
+
+  // For input_relation_[i],
+  // its block ids are in input_relations_block_ids_[i],
+  // its number of generated work orders is in num_workorders_generated_[i].
+  std::vector<std::vector<block_id>> input_relations_block_ids_;
+  std::vector<std::size_t> num_workorders_generated_;
+
+  // Relation indices that are not stored, and are still feeding.
+  std::unordered_set<std::size_t> still_feeding_;
+
+  // Map from relation_id to index in vector.
+  std::unordered_map<relation_id, std::size_t> relation_id_to_index_;
+
+  // If all the stored relations are generated.
+  bool stored_generated_;
+
+  DISALLOW_COPY_AND_ASSIGN(UnionAllOperator);
+};
+
+
+/**
+ * @brief A work order produced by UnionAllOperator.
+ */
+class UnionAllWorkOrder : public WorkOrder {
+ public:
+  /**
+   * @brief Constructor.
+   *
+   * @param query_id The ID of the query to which this WorkOrder belongs.
+   * @param input_relation The relation to perform UnionAll over.
+   * @param input_block_id The block id.
+   * @param select_attribute_id The ids of the attributes to select from
+   *        the input relation.
+   * @param output_destination The InsertDestination to insert the UnionAll
+   *        result.
+   * @param storage_manager The StorageManager to use.
+   */
+  UnionAllWorkOrder(const std::size_t query_id,
+                    const CatalogRelationSchema *input_relation,
+                    const block_id input_block_id,
+                    const std::vector<attribute_id> &select_attribute_id,
+                    InsertDestination *output_destination,
+                    StorageManager *storage_manager)
+      : WorkOrder(query_id),
+        input_relation_(input_relation),
+        input_block_id_(input_block_id),
+        select_attribute_id_(select_attribute_id),
+        output_destination_(output_destination),
+        storage_manager_(storage_manager) {}
+
+  ~UnionAllWorkOrder() override {}
+
+  void execute() override;
+
+ private:
+  const CatalogRelationSchema *input_relation_;
+  const block_id input_block_id_;
+  const std::vector<attribute_id> select_attribute_id_;
+
+  InsertDestination *output_destination_;
+  StorageManager* storage_manager_;
+
+  DISALLOW_COPY_AND_ASSIGN(UnionAllWorkOrder);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_RELATIONAL_OPERATORS_UNION_ALL_OPERATOR_HPP_


[3/5] incubator-quickstep git commit: Initial commit

Posted by ji...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarLiteral.cpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarLiteral.cpp b/expressions/scalar/ScalarLiteral.cpp
index 48b5574..5cb7776 100644
--- a/expressions/scalar/ScalarLiteral.cpp
+++ b/expressions/scalar/ScalarLiteral.cpp
@@ -47,24 +47,49 @@ Scalar* ScalarLiteral::clone() const {
   return new ScalarLiteral(internal_literal_, type_);
 }
 
-ColumnVector* ScalarLiteral::getAllValues(
+ColumnVectorPtr ScalarLiteral::getAllValues(
     ValueAccessor *accessor,
-    const SubBlocksReference *sub_blocks_ref) const {
-  return ColumnVector::MakeVectorOfValue(
-      type_,
-      internal_literal_,
-      accessor->getNumTuplesVirtual());
+    const SubBlocksReference *sub_blocks_ref,
+    ScalarCache *scalar_cache) const {
+  return ColumnVectorPtr(
+      ColumnVector::MakeVectorOfValue(type_,
+                                      internal_literal_,
+                                      accessor->getNumTuplesVirtual()));
 }
 
-ColumnVector* ScalarLiteral::getAllValuesForJoin(
+ColumnVectorPtr ScalarLiteral::getAllValuesForJoin(
     const relation_id left_relation_id,
     ValueAccessor *left_accessor,
     const relation_id right_relation_id,
     ValueAccessor *right_accessor,
-    const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids) const {
-  return ColumnVector::MakeVectorOfValue(type_,
-                                         internal_literal_,
-                                         joined_tuple_ids.size());
+    const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+    ScalarCache *scalar_cache) const {
+  return ColumnVectorPtr(
+      ColumnVector::MakeVectorOfValue(type_,
+                                      internal_literal_,
+                                      joined_tuple_ids.size()));
+}
+
+void ScalarLiteral::getFieldStringItems(
+    std::vector<std::string> *inline_field_names,
+    std::vector<std::string> *inline_field_values,
+    std::vector<std::string> *non_container_child_field_names,
+    std::vector<const Expression*> *non_container_child_fields,
+    std::vector<std::string> *container_child_field_names,
+    std::vector<std::vector<const Expression*>> *container_child_fields) const {
+  Scalar::getFieldStringItems(inline_field_names,
+                              inline_field_values,
+                              non_container_child_field_names,
+                              non_container_child_fields,
+                              container_child_field_names,
+                              container_child_fields);
+
+  inline_field_names->emplace_back("internal_literal");
+  if (internal_literal_.isNull()) {
+    inline_field_values->emplace_back("NULL");
+  } else {
+    inline_field_values->emplace_back(type_.printValueToString(internal_literal_));
+  }
 }
 
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarLiteral.hpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarLiteral.hpp b/expressions/scalar/ScalarLiteral.hpp
index c7f5ceb..e81aaf8 100644
--- a/expressions/scalar/ScalarLiteral.hpp
+++ b/expressions/scalar/ScalarLiteral.hpp
@@ -20,6 +20,7 @@
 #ifndef QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_LITERAL_HPP_
 #define QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_LITERAL_HPP_
 
+#include <string>
 #include <utility>
 #include <vector>
 
@@ -28,11 +29,12 @@
 #include "expressions/scalar/Scalar.hpp"
 #include "storage/StorageBlockInfo.hpp"
 #include "types/TypedValue.hpp"
+#include "types/containers/ColumnVector.hpp"
 #include "utility/Macros.hpp"
 
 namespace quickstep {
 
-class ColumnVector;
+class ScalarCache;
 class Type;
 class ValueAccessor;
 
@@ -101,15 +103,26 @@ class ScalarLiteral : public Scalar {
     return internal_literal_;
   }
 
-  ColumnVector* getAllValues(ValueAccessor *accessor,
-                             const SubBlocksReference *sub_blocks_ref) const override;
+  ColumnVectorPtr getAllValues(ValueAccessor *accessor,
+                               const SubBlocksReference *sub_blocks_ref,
+                               ScalarCache *scalar_cache) const override;
 
-  ColumnVector* getAllValuesForJoin(
+  ColumnVectorPtr getAllValuesForJoin(
       const relation_id left_relation_id,
       ValueAccessor *left_accessor,
       const relation_id right_relation_id,
       ValueAccessor *right_accessor,
-      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids) const override;
+      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+      ScalarCache *scalar_cache) const override;
+
+ protected:
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<const Expression*> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<const Expression*>> *container_child_fields) const override;
 
  private:
   TypedValue internal_literal_;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarSharedExpression.cpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarSharedExpression.cpp b/expressions/scalar/ScalarSharedExpression.cpp
new file mode 100644
index 0000000..8dbb3bb
--- /dev/null
+++ b/expressions/scalar/ScalarSharedExpression.cpp
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "expressions/scalar/ScalarSharedExpression.hpp"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "expressions/Expressions.pb.h"
+#include "expressions/scalar/ScalarCache.hpp"
+#include "storage/ValueAccessor.hpp"
+#include "types/TypedValue.hpp"
+#include "types/containers/ColumnVector.hpp"
+
+namespace quickstep {
+
+struct SubBlocksReference;
+
+ScalarSharedExpression::ScalarSharedExpression(const int share_id,
+                                               Scalar *operand)
+    : Scalar(operand->getType()),
+      share_id_(share_id),
+      operand_(operand) {
+}
+
+serialization::Scalar ScalarSharedExpression::getProto() const {
+  serialization::Scalar proto;
+  proto.set_data_source(serialization::Scalar::SHARED_EXPRESSION);
+  proto.SetExtension(serialization::ScalarSharedExpression::share_id, share_id_);
+  proto.MutableExtension(serialization::ScalarSharedExpression::operand)
+      ->CopyFrom(operand_->getProto());
+
+  return proto;
+}
+
+Scalar* ScalarSharedExpression::clone() const {
+  return new ScalarSharedExpression(share_id_, operand_->clone());
+}
+
+TypedValue ScalarSharedExpression::getValueForSingleTuple(const ValueAccessor &accessor,
+                                                          const tuple_id tuple) const {
+  return operand_->getValueForSingleTuple(accessor, tuple);
+}
+
+TypedValue ScalarSharedExpression::getValueForJoinedTuples(
+    const ValueAccessor &left_accessor,
+    const relation_id left_relation_id,
+    const tuple_id left_tuple_id,
+    const ValueAccessor &right_accessor,
+    const relation_id right_relation_id,
+    const tuple_id right_tuple_id) const {
+  return operand_->getValueForJoinedTuples(left_accessor,
+                                           left_relation_id,
+                                           left_tuple_id,
+                                           right_accessor,
+                                           right_relation_id,
+                                           right_tuple_id);
+}
+
+ColumnVectorPtr ScalarSharedExpression::getAllValues(
+    ValueAccessor *accessor,
+    const SubBlocksReference *sub_blocks_ref,
+    ScalarCache *scalar_cache) const {
+  if (scalar_cache == nullptr) {
+    return operand_->getAllValues(accessor, sub_blocks_ref, scalar_cache);
+  } else {
+    ColumnVectorPtr result;
+    if (scalar_cache->has(share_id_)) {
+      result = scalar_cache->get(share_id_);
+    } else {
+      result = operand_->getAllValues(accessor, sub_blocks_ref, scalar_cache);
+      scalar_cache->set(share_id_, result);
+    }
+    return result;
+  }
+}
+
+ColumnVectorPtr ScalarSharedExpression::getAllValuesForJoin(
+    const relation_id left_relation_id,
+    ValueAccessor *left_accessor,
+    const relation_id right_relation_id,
+    ValueAccessor *right_accessor,
+    const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+    ScalarCache *scalar_cache) const {
+  if (scalar_cache == nullptr) {
+    return operand_->getAllValuesForJoin(left_relation_id,
+                                         left_accessor,
+                                         right_relation_id,
+                                         right_accessor,
+                                         joined_tuple_ids,
+                                         scalar_cache);
+  } else {
+    ColumnVectorPtr result;
+    if (scalar_cache->has(share_id_)) {
+      result = scalar_cache->get(share_id_);
+    } else {
+      result = operand_->getAllValuesForJoin(left_relation_id,
+                                             left_accessor,
+                                             right_relation_id,
+                                             right_accessor,
+                                             joined_tuple_ids,
+                                             scalar_cache);
+      scalar_cache->set(share_id_, result);
+    }
+    return result;
+  }
+}
+
+void ScalarSharedExpression::getFieldStringItems(
+    std::vector<std::string> *inline_field_names,
+    std::vector<std::string> *inline_field_values,
+    std::vector<std::string> *non_container_child_field_names,
+    std::vector<const Expression*> *non_container_child_fields,
+    std::vector<std::string> *container_child_field_names,
+    std::vector<std::vector<const Expression*>> *container_child_fields) const {
+  inline_field_names->emplace_back("share_id");
+  inline_field_values->emplace_back(std::to_string(share_id_));
+
+  non_container_child_field_names->emplace_back("operand");
+  non_container_child_fields->emplace_back(operand_.get());
+}
+
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarSharedExpression.hpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarSharedExpression.hpp b/expressions/scalar/ScalarSharedExpression.hpp
new file mode 100644
index 0000000..3262ef1
--- /dev/null
+++ b/expressions/scalar/ScalarSharedExpression.hpp
@@ -0,0 +1,119 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_SHARED_EXPRESSION_HPP_
+#define QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_SHARED_EXPRESSION_HPP_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "expressions/Expressions.pb.h"
+#include "expressions/scalar/Scalar.hpp"
+#include "storage/StorageBlockInfo.hpp"
+#include "types/TypedValue.hpp"
+#include "types/containers/ColumnVector.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+
+class ScalarCache;
+class ValueAccessor;
+
+struct SubBlocksReference;
+
+/** \addtogroup Expressions
+ *  @{
+ */
+
+class ScalarSharedExpression : public Scalar {
+ public:
+  /**
+   * @brief Constructor.
+   **/
+  ScalarSharedExpression(const int shared_id, Scalar *operand);
+
+  /**
+   * @brief Destructor.
+   **/
+  ~ScalarSharedExpression() override {
+  }
+
+  serialization::Scalar getProto() const override;
+
+  Scalar* clone() const override;
+
+  ScalarDataSource getDataSource() const override {
+    return kSharedExpression;
+  }
+
+  TypedValue getValueForSingleTuple(const ValueAccessor &accessor,
+                                    const tuple_id tuple) const override;
+
+  TypedValue getValueForJoinedTuples(
+      const ValueAccessor &left_accessor,
+      const relation_id left_relation_id,
+      const tuple_id left_tuple_id,
+      const ValueAccessor &right_accessor,
+      const relation_id right_relation_id,
+      const tuple_id right_tuple_id) const override;
+
+  bool hasStaticValue() const override {
+    return operand_->hasStaticValue();
+  }
+
+  const TypedValue& getStaticValue() const override {
+    return operand_->getStaticValue();
+  }
+
+  ColumnVectorPtr getAllValues(ValueAccessor *accessor,
+                               const SubBlocksReference *sub_blocks_ref,
+                               ScalarCache *scalar_cache) const override;
+
+  ColumnVectorPtr getAllValuesForJoin(
+      const relation_id left_relation_id,
+      ValueAccessor *left_accessor,
+      const relation_id right_relation_id,
+      ValueAccessor *right_accessor,
+      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+      ScalarCache *scalar_cache) const override;
+
+ protected:
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<const Expression*> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<const Expression*>> *container_child_fields) const override;
+
+ private:
+  const int share_id_;
+  std::unique_ptr<Scalar> operand_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScalarSharedExpression);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_SHARED_EXPRESSION_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarUnaryExpression.cpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarUnaryExpression.cpp b/expressions/scalar/ScalarUnaryExpression.cpp
index 72fdbe1..80d4944 100644
--- a/expressions/scalar/ScalarUnaryExpression.cpp
+++ b/expressions/scalar/ScalarUnaryExpression.cpp
@@ -33,6 +33,7 @@
 #include "types/containers/ColumnVector.hpp"
 #include "types/operations/Operation.pb.h"
 #include "types/operations/unary_operations/UnaryOperation.hpp"
+#include "types/operations/unary_operations/UnaryOperationID.hpp"
 
 #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_JOIN
 #include "glog/logging.h"
@@ -91,36 +92,43 @@ TypedValue ScalarUnaryExpression::getValueForJoinedTuples(
   }
 }
 
-ColumnVector* ScalarUnaryExpression::getAllValues(
+ColumnVectorPtr ScalarUnaryExpression::getAllValues(
     ValueAccessor *accessor,
-    const SubBlocksReference *sub_blocks_ref) const {
+    const SubBlocksReference *sub_blocks_ref,
+    ScalarCache *scalar_cache) const {
   if (fast_operator_.get() == nullptr) {
-    return ColumnVector::MakeVectorOfValue(getType(),
-                                           static_value_,
-                                           accessor->getNumTuplesVirtual());
+    return ColumnVectorPtr(
+        ColumnVector::MakeVectorOfValue(getType(),
+                                        static_value_,
+                                        accessor->getNumTuplesVirtual()));
   } else {
 #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
     const attribute_id operand_attr_id = operand_->getAttributeIdForValueAccessor();
     if (operand_attr_id != -1) {
-      return fast_operator_->applyToValueAccessor(accessor, operand_attr_id);
+      return ColumnVectorPtr(
+          fast_operator_->applyToValueAccessor(accessor, operand_attr_id));
     }
 #endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
 
-    std::unique_ptr<ColumnVector> operand_result(operand_->getAllValues(accessor, sub_blocks_ref));
-    return fast_operator_->applyToColumnVector(*operand_result);
+    ColumnVectorPtr operand_result(
+        operand_->getAllValues(accessor, sub_blocks_ref, scalar_cache));
+    return ColumnVectorPtr(
+        fast_operator_->applyToColumnVector(*operand_result));
   }
 }
 
-ColumnVector* ScalarUnaryExpression::getAllValuesForJoin(
+ColumnVectorPtr ScalarUnaryExpression::getAllValuesForJoin(
     const relation_id left_relation_id,
     ValueAccessor *left_accessor,
     const relation_id right_relation_id,
     ValueAccessor *right_accessor,
-    const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids) const {
+    const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+    ScalarCache *scalar_cache) const {
   if (fast_operator_.get() == nullptr) {
-    return ColumnVector::MakeVectorOfValue(getType(),
-                                           static_value_,
-                                           joined_tuple_ids.size());
+    return ColumnVectorPtr(
+        ColumnVector::MakeVectorOfValue(getType(),
+                                        static_value_,
+                                        joined_tuple_ids.size()));
   } else {
 #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_JOIN
     const attribute_id operand_attr_id = operand_->getAttributeIdForValueAccessor();
@@ -132,20 +140,23 @@ ColumnVector* ScalarUnaryExpression::getAllValuesForJoin(
       const bool using_left_relation = (operand_relation_id == left_relation_id);
       ValueAccessor *operand_accessor = using_left_relation ? left_accessor
                                                             : right_accessor;
-      return fast_operator_->applyToValueAccessorForJoin(operand_accessor,
-                                                         using_left_relation,
-                                                         operand_attr_id,
-                                                         joined_tuple_ids);
+      return ColumnVectorPtr(
+          fast_operator_->applyToValueAccessorForJoin(operand_accessor,
+                                                      using_left_relation,
+                                                      operand_attr_id,
+                                                      joined_tuple_ids));
     }
 #endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_JOIN
 
-    std::unique_ptr<ColumnVector> operand_result(
+    ColumnVectorPtr operand_result(
         operand_->getAllValuesForJoin(left_relation_id,
                                       left_accessor,
                                       right_relation_id,
                                       right_accessor,
-                                      joined_tuple_ids));
-    return fast_operator_->applyToColumnVector(*operand_result);
+                                      joined_tuple_ids,
+                                      scalar_cache));
+    return ColumnVectorPtr(
+        fast_operator_->applyToColumnVector(*operand_result));
   }
 }
 
@@ -166,4 +177,35 @@ void ScalarUnaryExpression::initHelper(bool own_children) {
   }
 }
 
+void ScalarUnaryExpression::getFieldStringItems(
+    std::vector<std::string> *inline_field_names,
+    std::vector<std::string> *inline_field_values,
+    std::vector<std::string> *non_container_child_field_names,
+    std::vector<const Expression*> *non_container_child_fields,
+    std::vector<std::string> *container_child_field_names,
+    std::vector<std::vector<const Expression*>> *container_child_fields) const {
+  Scalar::getFieldStringItems(inline_field_names,
+                              inline_field_values,
+                              non_container_child_field_names,
+                              non_container_child_fields,
+                              container_child_field_names,
+                              container_child_fields);
+
+  if (fast_operator_ == nullptr) {
+    inline_field_names->emplace_back("static_value");
+    if (static_value_.isNull()) {
+      inline_field_values->emplace_back("NULL");
+    } else {
+      inline_field_values->emplace_back(type_.printValueToString(static_value_));
+    }
+  }
+
+  inline_field_names->emplace_back("operation");
+  inline_field_values->emplace_back(
+      kUnaryOperationNames[static_cast<int>(operation_.getUnaryOperationID())]);
+
+  non_container_child_field_names->emplace_back("operand");
+  non_container_child_fields->emplace_back(operand_.get());
+}
+
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarUnaryExpression.hpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarUnaryExpression.hpp b/expressions/scalar/ScalarUnaryExpression.hpp
index 608a842..efba14e 100644
--- a/expressions/scalar/ScalarUnaryExpression.hpp
+++ b/expressions/scalar/ScalarUnaryExpression.hpp
@@ -21,6 +21,7 @@
 #define QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_UNARY_EXPRESSION_HPP_
 
 #include <memory>
+#include <string>
 #include <utility>
 #include <vector>
 
@@ -29,6 +30,7 @@
 #include "expressions/scalar/Scalar.hpp"
 #include "storage/StorageBlockInfo.hpp"
 #include "types/TypedValue.hpp"
+#include "types/containers/ColumnVector.hpp"
 #include "types/operations/unary_operations/UnaryOperation.hpp"
 #include "utility/Macros.hpp"
 
@@ -36,7 +38,7 @@
 
 namespace quickstep {
 
-class ColumnVector;
+class ScalarCache;
 class ValueAccessor;
 
 struct SubBlocksReference;
@@ -93,15 +95,26 @@ class ScalarUnaryExpression : public Scalar {
     return static_value_;
   }
 
-  ColumnVector* getAllValues(ValueAccessor *accessor,
-                             const SubBlocksReference *sub_blocks_ref) const override;
+  ColumnVectorPtr getAllValues(ValueAccessor *accessor,
+                               const SubBlocksReference *sub_blocks_ref,
+                               ScalarCache *scalar_cache) const override;
 
-  ColumnVector* getAllValuesForJoin(
+  ColumnVectorPtr getAllValuesForJoin(
       const relation_id left_relation_id,
       ValueAccessor *left_accessor,
       const relation_id right_relation_id,
       ValueAccessor *right_accessor,
-      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids) const override;
+      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+      ScalarCache *scalar_cache) const override;
+
+ protected:
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<const Expression*> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<const Expression*>> *container_child_fields) const override;
 
  private:
   void initHelper(bool own_children);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 08b6467..9e5a2c8 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -214,6 +214,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
                       quickstep_queryoptimizer_logical_Logical
                       quickstep_queryoptimizer_physical_Physical
                       quickstep_queryoptimizer_rules_AttachLIPFilters
+                      quickstep_queryoptimizer_rules_CommonSubexpressionExtraction
                       quickstep_queryoptimizer_rules_FuseAggregateJoin
                       quickstep_queryoptimizer_rules_InjectJoinFilters
                       quickstep_queryoptimizer_rules_PruneColumns

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index ac51c31..34deb76 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -27,6 +27,7 @@
 #include "query_optimizer/logical/Logical.hpp"
 #include "query_optimizer/physical/Physical.hpp"
 #include "query_optimizer/rules/AttachLIPFilters.hpp"
+#include "query_optimizer/rules/CommonSubexpressionExtraction.hpp"
 #include "query_optimizer/rules/FuseAggregateJoin.hpp"
 #include "query_optimizer/rules/InjectJoinFilters.hpp"
 #include "query_optimizer/rules/PruneColumns.hpp"
@@ -148,6 +149,8 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
 
   rules.emplace_back(new FuseAggregateJoin());
 
+  rules.emplace_back(new CommonSubexpressionExtraction(optimizer_context_));
+
   // NOTE(jianqiao): Adding rules after InjectJoinFilters (or AttachLIPFilters) requires
   // extra handling of LIPFilterConfiguration for transformed nodes. So currently it is
   // suggested that all the new rules be placed before this point.
@@ -165,7 +168,8 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
              << physical_plan_->toString();
   }
 
-  DVLOG(4) << "Optimized physical plan:\n" << physical_plan_->toString();
+//  DVLOG(4) << "Optimized physical plan:\n" << physical_plan_->toString();
+  std::cerr << "Optimized physical plan:\n" << physical_plan_->toString();
 
   if (FLAGS_visualize_plan) {
     quickstep::PlanVisualizer plan_visualizer;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/AttributeReference.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/AttributeReference.cpp b/query_optimizer/expressions/AttributeReference.cpp
index f0e49d4..5dc59bb 100644
--- a/query_optimizer/expressions/AttributeReference.cpp
+++ b/query_optimizer/expressions/AttributeReference.cpp
@@ -19,6 +19,7 @@
 
 #include "query_optimizer/expressions/AttributeReference.hpp"
 
+#include <functional>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -26,6 +27,7 @@
 #include "expressions/scalar/ScalarAttribute.hpp"
 #include "query_optimizer/expressions/ExprId.hpp"
 #include "query_optimizer/expressions/Expression.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
 
 #include "glog/logging.h"
 
@@ -57,6 +59,22 @@ std::vector<AttributeReferencePtr> AttributeReference::getReferencedAttributes()
   return new ::quickstep::ScalarAttribute(*found_it->second);
 }
 
+std::size_t AttributeReference::computeHash() const {
+  return std::hash<std::size_t>()(static_cast<std::size_t>(id()));
+}
+
+bool AttributeReference::equals(const ScalarPtr &other) const {
+  AttributeReferencePtr attr;
+  if (SomeAttributeReference::MatchesWithConditionalCast(other, &attr)) {
+    if (id() != attr->id()) {
+      return false;
+    }
+    DCHECK(type_.equals(attr->type_));
+    return true;
+  }
+  return false;
+}
+
 void AttributeReference::getFieldStringItems(
     std::vector<std::string> *inline_field_names,
     std::vector<std::string> *inline_field_values,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/AttributeReference.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/AttributeReference.hpp b/query_optimizer/expressions/AttributeReference.hpp
index f5207b1..27d8bcb 100644
--- a/query_optimizer/expressions/AttributeReference.hpp
+++ b/query_optimizer/expressions/AttributeReference.hpp
@@ -88,6 +88,8 @@ class AttributeReference : public NamedExpression {
   ::quickstep::Scalar* concretize(
       const std::unordered_map<ExprId, const CatalogAttribute*> &substitution_map) const override;
 
+  bool equals(const ScalarPtr &other) const override;
+
   /**
    * @brief Creates an immutable AttributReference.
    *
@@ -114,6 +116,8 @@ class AttributeReference : public NamedExpression {
   }
 
  protected:
+  std::size_t computeHash() const override;
+
   void getFieldStringItems(
      std::vector<std::string> *inline_field_names,
      std::vector<std::string> *inline_field_values,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/BinaryExpression.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/BinaryExpression.cpp b/query_optimizer/expressions/BinaryExpression.cpp
index 446dd55..aac675b 100644
--- a/query_optimizer/expressions/BinaryExpression.cpp
+++ b/query_optimizer/expressions/BinaryExpression.cpp
@@ -31,6 +31,7 @@
 #include "query_optimizer/expressions/PatternMatcher.hpp"
 #include "types/operations/binary_operations/BinaryOperation.hpp"
 #include "types/operations/binary_operations/BinaryOperationID.hpp"
+#include "utility/HashPair.hpp"
 
 #include "glog/logging.h"
 
@@ -104,6 +105,22 @@ std::vector<AttributeReferencePtr> BinaryExpression::getReferencedAttributes() c
       right_->concretize(substitution_map));
 }
 
+std::size_t BinaryExpression::computeHash() const {
+  return CombineHashes(
+      CombineHashes(static_cast<std::size_t>(ExpressionType::kBinaryExpression),
+                    static_cast<std::size_t>(operation_.getBinaryOperationID())),
+      CombineHashes(left_->hash(), right_->hash()));
+}
+
+bool BinaryExpression::equals(const ScalarPtr &other) const {
+  BinaryExpressionPtr expr;
+  if (SomeBinaryExpression::MatchesWithConditionalCast(other, &expr)) {
+    return operation_.getBinaryOperationID() == expr->operation_.getBinaryOperationID()
+           && left_->equals(expr->left_) && right_->equals(expr->right_);
+  }
+  return false;
+}
+
 void BinaryExpression::getFieldStringItems(
     std::vector<std::string> *inline_field_names,
     std::vector<std::string> *inline_field_values,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/BinaryExpression.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/BinaryExpression.hpp b/query_optimizer/expressions/BinaryExpression.hpp
index 9b11ed1..6a37679 100644
--- a/query_optimizer/expressions/BinaryExpression.hpp
+++ b/query_optimizer/expressions/BinaryExpression.hpp
@@ -90,6 +90,8 @@ class BinaryExpression : public Scalar {
   ::quickstep::Scalar* concretize(
       const std::unordered_map<ExprId, const CatalogAttribute*> &substitution_map) const override;
 
+  bool equals(const ScalarPtr &other) const override;
+
   static BinaryExpressionPtr Create(const BinaryOperation &operation,
                                     const ScalarPtr &left,
                                     const ScalarPtr &right) {
@@ -97,6 +99,8 @@ class BinaryExpression : public Scalar {
   }
 
  protected:
+  std::size_t computeHash() const override;
+
   void getFieldStringItems(
       std::vector<std::string> *inline_field_names,
       std::vector<std::string> *inline_field_values,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/CMakeLists.txt b/query_optimizer/expressions/CMakeLists.txt
index 35fac90..2a5f610 100644
--- a/query_optimizer/expressions/CMakeLists.txt
+++ b/query_optimizer/expressions/CMakeLists.txt
@@ -21,7 +21,11 @@ add_library(quickstep_queryoptimizer_expressions_Alias Alias.cpp Alias.hpp)
 add_library(quickstep_queryoptimizer_expressions_AttributeReference AttributeReference.cpp AttributeReference.hpp)
 add_library(quickstep_queryoptimizer_expressions_BinaryExpression BinaryExpression.cpp BinaryExpression.hpp)
 add_library(quickstep_queryoptimizer_expressions_Cast Cast.cpp Cast.hpp)
-add_library(quickstep_queryoptimizer_expressions_ComparisonExpression ComparisonExpression.cpp
+add_library(quickstep_queryoptimizer_expressions_CommonSubexpression
+            CommonSubexpression.cpp
+            CommonSubexpression.hpp)
+add_library(quickstep_queryoptimizer_expressions_ComparisonExpression
+            ComparisonExpression.cpp
             ComparisonExpression.hpp)
 add_library(quickstep_queryoptimizer_expressions_Exists Exists.cpp Exists.hpp)
 add_library(quickstep_queryoptimizer_expressions_Expression ../../empty_src.cpp Expression.hpp)
@@ -43,7 +47,9 @@ add_library(quickstep_queryoptimizer_expressions_SearchedCase SearchedCase.cpp S
 add_library(quickstep_queryoptimizer_expressions_SimpleCase SimpleCase.cpp SimpleCase.hpp)
 add_library(quickstep_queryoptimizer_expressions_SubqueryExpression SubqueryExpression.cpp SubqueryExpression.hpp)
 add_library(quickstep_queryoptimizer_expressions_UnaryExpression UnaryExpression.cpp UnaryExpression.hpp)
-add_library(quickstep_queryoptimizer_expressions_WindowAggregateFunction WindowAggregateFunction.cpp WindowAggregateFunction.hpp)
+add_library(quickstep_queryoptimizer_expressions_WindowAggregateFunction
+            WindowAggregateFunction.cpp
+            WindowAggregateFunction.hpp)
 
 # Link dependencies:
 target_link_libraries(quickstep_queryoptimizer_expressions_AggregateFunction
@@ -78,6 +84,7 @@ target_link_libraries(quickstep_queryoptimizer_expressions_AttributeReference
                       quickstep_queryoptimizer_expressions_Expression
                       quickstep_queryoptimizer_expressions_ExpressionType
                       quickstep_queryoptimizer_expressions_NamedExpression
+                      quickstep_queryoptimizer_expressions_PatternMatcher
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_expressions_BinaryExpression
                       glog
@@ -91,6 +98,7 @@ target_link_libraries(quickstep_queryoptimizer_expressions_BinaryExpression
                       quickstep_queryoptimizer_expressions_Scalar
                       quickstep_types_operations_binaryoperations_BinaryOperation
                       quickstep_types_operations_binaryoperations_BinaryOperationID
+                      quickstep_utility_HashPair
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_expressions_Cast
                       glog
@@ -105,6 +113,18 @@ target_link_libraries(quickstep_queryoptimizer_expressions_Cast
                       quickstep_queryoptimizer_expressions_Scalar
                       quickstep_types_Type
                       quickstep_types_operations_unaryoperations_NumericCastOperation
+                      quickstep_utility_HashPair
+                      quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_expressions_CommonSubexpression
+                      glog
+                      quickstep_expressions_scalar_ScalarSharedExpression
+                      quickstep_queryoptimizer_OptimizerTree
+                      quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_queryoptimizer_expressions_ExprId
+                      quickstep_queryoptimizer_expressions_Expression
+                      quickstep_queryoptimizer_expressions_ExpressionType
+                      quickstep_queryoptimizer_expressions_PatternMatcher
+                      quickstep_queryoptimizer_expressions_Scalar
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_expressions_ComparisonExpression
                       glog
@@ -233,6 +253,7 @@ target_link_libraries(quickstep_queryoptimizer_expressions_Scalar
                       glog
                       quickstep_queryoptimizer_expressions_ExprId
                       quickstep_queryoptimizer_expressions_Expression
+                      quickstep_utility_HashError
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_expressions_ScalarLiteral
                       glog
@@ -242,9 +263,11 @@ target_link_libraries(quickstep_queryoptimizer_expressions_ScalarLiteral
                       quickstep_queryoptimizer_expressions_ExprId
                       quickstep_queryoptimizer_expressions_Expression
                       quickstep_queryoptimizer_expressions_ExpressionType
+                      quickstep_queryoptimizer_expressions_PatternMatcher
                       quickstep_queryoptimizer_expressions_Scalar
                       quickstep_types_Type
                       quickstep_types_TypedValue
+                      quickstep_utility_HashPair
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_expressions_SearchedCase
                       quickstep_expressions_predicate_Predicate
@@ -272,12 +295,14 @@ target_link_libraries(quickstep_queryoptimizer_expressions_SimpleCase
                       quickstep_queryoptimizer_expressions_ExprId
                       quickstep_queryoptimizer_expressions_Expression
                       quickstep_queryoptimizer_expressions_ExpressionType
+                      quickstep_queryoptimizer_expressions_PatternMatcher
                       quickstep_queryoptimizer_expressions_Predicate
                       quickstep_queryoptimizer_expressions_Scalar
                       quickstep_types_Type
                       quickstep_types_operations_comparisons_ComparisonFactory
                       quickstep_types_operations_comparisons_ComparisonID
                       quickstep_utility_Cast
+                      quickstep_utility_HashPair
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_expressions_SubqueryExpression
                       glog
@@ -301,6 +326,7 @@ target_link_libraries(quickstep_queryoptimizer_expressions_UnaryExpression
                       quickstep_queryoptimizer_expressions_Scalar
                       quickstep_types_operations_unaryoperations_UnaryOperation
                       quickstep_types_operations_unaryoperations_UnaryOperationID
+                      quickstep_utility_HashPair
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_expressions_WindowAggregateFunction
                       glog
@@ -324,6 +350,7 @@ target_link_libraries(quickstep_queryoptimizer_expressions
                       quickstep_queryoptimizer_expressions_AttributeReference
                       quickstep_queryoptimizer_expressions_BinaryExpression
                       quickstep_queryoptimizer_expressions_Cast
+                      quickstep_queryoptimizer_expressions_CommonSubexpression
                       quickstep_queryoptimizer_expressions_ComparisonExpression
                       quickstep_queryoptimizer_expressions_Exists
                       quickstep_queryoptimizer_expressions_Expression

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/Cast.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/Cast.cpp b/query_optimizer/expressions/Cast.cpp
index c0813c5..f4cac8b 100644
--- a/query_optimizer/expressions/Cast.cpp
+++ b/query_optimizer/expressions/Cast.cpp
@@ -33,6 +33,7 @@
 #include "query_optimizer/expressions/Scalar.hpp"
 #include "types/Type.hpp"
 #include "types/operations/unary_operations/NumericCastOperation.hpp"
+#include "utility/HashPair.hpp"
 
 #include "glog/logging.h"
 
@@ -55,6 +56,21 @@ ExpressionPtr Cast::copyWithNewChildren(
                                                 operand_->concretize(substitution_map));
 }
 
+std::size_t Cast::computeHash() const {
+  return CombineHashes(
+      CombineHashes(static_cast<std::size_t>(ExpressionType::kCast),
+                    operand_->hash()),
+      static_cast<std::size_t>(target_type_.getTypeID()));
+}
+
+bool Cast::equals(const ScalarPtr &other) const {
+  CastPtr expr;
+  if (SomeCast::MatchesWithConditionalCast(other, &expr)) {
+    return operand_->equals(expr->operand_) && target_type_.equals(expr->target_type_);
+  }
+  return false;
+}
+
 void Cast::getFieldStringItems(
     std::vector<std::string> *inline_field_names,
     std::vector<std::string> *inline_field_values,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/Cast.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/Cast.hpp b/query_optimizer/expressions/Cast.hpp
index ac5bd02..11be775 100644
--- a/query_optimizer/expressions/Cast.hpp
+++ b/query_optimizer/expressions/Cast.hpp
@@ -78,6 +78,8 @@ class Cast : public Scalar {
   ::quickstep::Scalar* concretize(
       const std::unordered_map<ExprId, const CatalogAttribute*> &substitution_map) const override;
 
+  bool equals(const ScalarPtr &other) const override;
+
   /**
    * @brief Creates a Cast expression that converts \p operand to \p target_type.
    *
@@ -90,6 +92,8 @@ class Cast : public Scalar {
   }
 
  protected:
+  std::size_t computeHash() const override;
+
   void getFieldStringItems(
       std::vector<std::string> *inline_field_names,
       std::vector<std::string> *inline_field_values,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/CommonSubexpression.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/CommonSubexpression.cpp b/query_optimizer/expressions/CommonSubexpression.cpp
new file mode 100644
index 0000000..558b1fa
--- /dev/null
+++ b/query_optimizer/expressions/CommonSubexpression.cpp
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/expressions/CommonSubexpression.hpp"
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "expressions/scalar/ScalarSharedExpression.hpp"
+#include "query_optimizer/OptimizerTree.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/expressions/Expression.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/expressions/Scalar.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+namespace expressions {
+
+ExpressionPtr CommonSubexpression::copyWithNewChildren(
+    const std::vector<ExpressionPtr> &new_children) const {
+  DCHECK_EQ(new_children.size(), children().size());
+  DCHECK(SomeScalar::Matches(new_children[0]));
+  return CommonSubexpression::Create(
+      common_subexpression_id_,
+      std::static_pointer_cast<const Scalar>(new_children[0]));
+}
+
+::quickstep::Scalar* CommonSubexpression::concretize(
+    const std::unordered_map<ExprId, const CatalogAttribute*> &substitution_map) const {
+  return new ::quickstep::ScalarSharedExpression(
+      common_subexpression_id_, operand_->concretize(substitution_map));
+}
+
+void CommonSubexpression::getFieldStringItems(
+    std::vector<std::string> *inline_field_names,
+    std::vector<std::string> *inline_field_values,
+    std::vector<std::string> *non_container_child_field_names,
+    std::vector<OptimizerTreeBaseNodePtr> *non_container_child_fields,
+    std::vector<std::string> *container_child_field_names,
+    std::vector<std::vector<OptimizerTreeBaseNodePtr>> *container_child_fields) const {
+  inline_field_names->push_back("common_subexpression_id");
+  inline_field_values->push_back(std::to_string(common_subexpression_id_));
+
+  non_container_child_field_names->push_back("Operand");
+  non_container_child_fields->push_back(operand_);
+}
+
+}  // namespace expressions
+}  // namespace optimizer
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/CommonSubexpression.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/CommonSubexpression.hpp b/query_optimizer/expressions/CommonSubexpression.hpp
new file mode 100644
index 0000000..c642996
--- /dev/null
+++ b/query_optimizer/expressions/CommonSubexpression.hpp
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_EXPRESSIONS_COMMON_SUBEXPRESSION_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_EXPRESSIONS_COMMON_SUBEXPRESSION_HPP_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/expressions/Expression.hpp"
+#include "query_optimizer/expressions/ExpressionType.hpp"
+#include "query_optimizer/expressions/Scalar.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+class Scalar;
+class Type;
+
+namespace optimizer {
+namespace expressions {
+
+/** \addtogroup OptimizerExpressions
+ *  @{
+ */
+
+class CommonSubexpression;
+typedef std::shared_ptr<const CommonSubexpression> CommonSubexpressionPtr;
+
+class CommonSubexpression : public Scalar {
+ public:
+  ExpressionType getExpressionType() const override {
+    return ExpressionType::kCommonSubexpression;
+  }
+
+  std::string getName() const override {
+    return "CommonSubexpression";
+  }
+
+  bool isConstant() const override {
+    return operand_->isConstant();
+  }
+
+  inline ExprId common_subexpression_id() const {
+    return common_subexpression_id_;
+  }
+
+  /**
+   * @return The operand that represents the common subexpression.
+   */
+  const ScalarPtr& operand() const {
+    return operand_;
+  }
+
+  const Type& getValueType() const override {
+    return operand_->getValueType();
+  }
+
+  ExpressionPtr copyWithNewChildren(
+      const std::vector<ExpressionPtr> &new_children) const override;
+
+  std::vector<AttributeReferencePtr> getReferencedAttributes() const override {
+    return operand_->getReferencedAttributes();
+  }
+
+  ::quickstep::Scalar* concretize(
+      const std::unordered_map<ExprId, const CatalogAttribute*> &substitution_map) const override;
+
+  /**
+   * @brief Creates an immutable CommonSubexpression.
+   *
+   * @param operand The operand that represents the common subexpression.
+   * @return An immutable CommonSubexpression that is shared among multiple
+   *         expressions.
+   */
+  static CommonSubexpressionPtr Create(const ExprId common_subexpression_id,
+                                       const ScalarPtr &operand) {
+    return CommonSubexpressionPtr(
+        new CommonSubexpression(common_subexpression_id, operand));
+  }
+
+ protected:
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<OptimizerTreeBaseNodePtr> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<OptimizerTreeBaseNodePtr>> *container_child_fields) const override;
+
+ private:
+  CommonSubexpression(const ExprId common_subexpression_id,
+                      const ScalarPtr &operand)
+      : common_subexpression_id_(common_subexpression_id),
+        operand_(operand) {
+    addChild(operand);
+  }
+
+  ExprId common_subexpression_id_;
+  ScalarPtr operand_;
+
+  DISALLOW_COPY_AND_ASSIGN(CommonSubexpression);
+};
+
+/** @} */
+
+}  // namespace expressions
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_QUERY_OPTIMIZER_EXPRESSIONS_COMMON_SUBEXPRESSION_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/ExpressionType.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/ExpressionType.hpp b/query_optimizer/expressions/ExpressionType.hpp
index 5008f1d..ba7f639 100644
--- a/query_optimizer/expressions/ExpressionType.hpp
+++ b/query_optimizer/expressions/ExpressionType.hpp
@@ -32,11 +32,12 @@ namespace expressions {
  * @brief Optimizer expression types.
  **/
 enum class ExpressionType {
-  kAggregateFunction,
+  kAggregateFunction = 0,
   kAlias,
   kAttributeReference,
   kBinaryExpression,
   kCast,
+  kCommonSubexpression,
   kComparisonExpression,
   kExists,
   kInTableQuery,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/ExpressionUtil.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/ExpressionUtil.hpp b/query_optimizer/expressions/ExpressionUtil.hpp
index 6b8666e..29d90f0 100644
--- a/query_optimizer/expressions/ExpressionUtil.hpp
+++ b/query_optimizer/expressions/ExpressionUtil.hpp
@@ -85,9 +85,9 @@ template <class NamedExpressionType1, class NamedExpressionType2>
 bool ContainsExpression(
     const std::vector<std::shared_ptr<const NamedExpressionType1>> &expressions,
     const std::shared_ptr<const NamedExpressionType2> &expression_to_check) {
-  for (const std::shared_ptr<const NamedExpressionType1> &expression :
-       expressions) {
-    if (expression->equals(expression_to_check)) {
+  for (const auto &expression : expressions) {
+    if (expression->id() == expression_to_check->id()) {
+      DCHECK(expression->getExpressionType() == expression_to_check->getExpressionType());
       return true;
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/NamedExpression.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/NamedExpression.cpp b/query_optimizer/expressions/NamedExpression.cpp
index 992a84a..2c2beac 100644
--- a/query_optimizer/expressions/NamedExpression.cpp
+++ b/query_optimizer/expressions/NamedExpression.cpp
@@ -25,6 +25,8 @@
 #include "query_optimizer/OptimizerTree.hpp"
 #include "types/Type.hpp"
 
+#include "glog/logging.h"
+
 namespace quickstep {
 namespace optimizer {
 namespace expressions {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/NamedExpression.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/NamedExpression.hpp b/query_optimizer/expressions/NamedExpression.hpp
index 9de8005..6725567 100644
--- a/query_optimizer/expressions/NamedExpression.hpp
+++ b/query_optimizer/expressions/NamedExpression.hpp
@@ -69,19 +69,6 @@ class NamedExpression : public Scalar {
    */
   inline const std::string& relation_name() const { return relation_name_; }
 
-  /**
-   * @brief Compares this named expression with \p other. Two named expressions
-   *        are equal if they have the same ExprId and are both Alias or
-   *        AttributeReference.
-   *
-   * @param other Another named expression to compare with.
-   * @return True if the named expression is equal to \p other.
-   */
-  inline bool equals(const NamedExpressionPtr &other) const {
-    return getExpressionType() == other->getExpressionType() &&
-           id_ == other->id();
-  }
-
  protected:
   /**
    * @brief Constructor.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/PatternMatcher.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/PatternMatcher.hpp b/query_optimizer/expressions/PatternMatcher.hpp
index 18d6b1c..e30a4d9 100644
--- a/query_optimizer/expressions/PatternMatcher.hpp
+++ b/query_optimizer/expressions/PatternMatcher.hpp
@@ -35,6 +35,7 @@ class Avg;
 class AttributeReference;
 class BinaryExpression;
 class Cast;
+class CommonSubexpression;
 class ComparisonExpression;
 class Count;
 class Exists;
@@ -50,6 +51,7 @@ class Predicate;
 class PredicateLiteral;
 class Scalar;
 class ScalarLiteral;
+class SimpleCase;
 class Sum;
 class UnaryExpression;
 class WindowAggregateFunction;
@@ -145,16 +147,13 @@ using SomeScalar = SomeExpressionNode<Scalar,
                                       ExpressionType::kAttributeReference,
                                       ExpressionType::kBinaryExpression,
                                       ExpressionType::kCast,
-                                      ExpressionType::kComparisonExpression,
-                                      ExpressionType::kLogicalAnd,
-                                      ExpressionType::kLogicalNot,
-                                      ExpressionType::kLogicalOr,
-                                      ExpressionType::kPredicateLiteral,
+                                      ExpressionType::kCommonSubexpression,
                                       ExpressionType::kScalarLiteral,
                                       ExpressionType::kSearchedCase,
                                       ExpressionType::kSimpleCase,
                                       ExpressionType::kUnaryExpression>;
 using SomeScalarLiteral = SomeExpressionNode<ScalarLiteral, ExpressionType::kScalarLiteral>;
+using SomeSimpleCase = SomeExpressionNode<SimpleCase, ExpressionType::kSimpleCase>;
 using SomeUnaryExpression = SomeExpressionNode<UnaryExpression, ExpressionType::kUnaryExpression>;
 using SomeWindowAggregateFunction = SomeExpressionNode<WindowAggregateFunction,
                                                        ExpressionType::kWindowAggregateFunction>;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/Scalar.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/Scalar.hpp b/query_optimizer/expressions/Scalar.hpp
index 4870118..36a8de5 100644
--- a/query_optimizer/expressions/Scalar.hpp
+++ b/query_optimizer/expressions/Scalar.hpp
@@ -20,11 +20,13 @@
 #ifndef QUICKSTEP_QUERY_OPTIMIZER_EXPRESSIONS_SCALAR_HPP_
 #define QUICKSTEP_QUERY_OPTIMIZER_EXPRESSIONS_SCALAR_HPP_
 
+#include <cstddef>
 #include <memory>
 #include <unordered_map>
 
 #include "query_optimizer/expressions/Expression.hpp"
 #include "query_optimizer/expressions/ExprId.hpp"
+#include "utility/HashError.hpp"
 #include "utility/Macros.hpp"
 
 namespace quickstep {
@@ -65,10 +67,27 @@ class Scalar : public Expression {
       const std::unordered_map<ExprId, const CatalogAttribute*>& substitution_map)
       const = 0;
 
+  virtual bool equals(const ScalarPtr &other) const {
+    return false;
+  }
+
+  std::size_t hash() const {
+    if (hash_cache_ == nullptr) {
+      hash_cache_ = std::make_unique<std::size_t>(computeHash());
+    }
+    return *hash_cache_;
+  }
+
  protected:
   Scalar() {}
 
+  virtual std::size_t computeHash() const {
+    throw HashNotSupported("Unsupported computeHash() in " + getName());
+  }
+
  private:
+  mutable std::unique_ptr<std::size_t> hash_cache_;
+
   DISALLOW_COPY_AND_ASSIGN(Scalar);
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/ScalarLiteral.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/ScalarLiteral.cpp b/query_optimizer/expressions/ScalarLiteral.cpp
index d70c4cf..180d760 100644
--- a/query_optimizer/expressions/ScalarLiteral.cpp
+++ b/query_optimizer/expressions/ScalarLiteral.cpp
@@ -28,7 +28,9 @@
 #include "query_optimizer/expressions/AttributeReference.hpp"
 #include "query_optimizer/expressions/ExprId.hpp"
 #include "query_optimizer/expressions/Expression.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
 #include "types/Type.hpp"
+#include "utility/HashPair.hpp"
 
 #include "glog/logging.h"
 
@@ -51,6 +53,19 @@ ExpressionPtr ScalarLiteral::copyWithNewChildren(
   return new ::quickstep::ScalarLiteral(value_, value_type_);
 }
 
+std::size_t ScalarLiteral::computeHash() const {
+  return CombineHashes(static_cast<std::size_t>(ExpressionType::kScalarLiteral),
+                       value_.getHash());
+}
+
+bool ScalarLiteral::equals(const ScalarPtr &other) const {
+  ScalarLiteralPtr lit;
+  if (SomeScalarLiteral::MatchesWithConditionalCast(other, &lit)) {
+    return value_type_.equals(lit->getValueType()) && value_.fastEqualCheck(lit->value());
+  }
+  return false;
+}
+
 void ScalarLiteral::getFieldStringItems(
     std::vector<std::string> *inline_field_names,
     std::vector<std::string> *inline_field_values,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/ScalarLiteral.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/ScalarLiteral.hpp b/query_optimizer/expressions/ScalarLiteral.hpp
index 8ebc41c..8a73405 100644
--- a/query_optimizer/expressions/ScalarLiteral.hpp
+++ b/query_optimizer/expressions/ScalarLiteral.hpp
@@ -81,6 +81,8 @@ class ScalarLiteral : public Scalar {
   ::quickstep::Scalar* concretize(
       const std::unordered_map<ExprId, const CatalogAttribute*> &substitution_map) const override;
 
+  bool equals(const ScalarPtr &other) const override;
+
   /**
    * @brief Creates an immutable ScalarLiteral.
    * @param literal_value The literal value.
@@ -92,6 +94,8 @@ class ScalarLiteral : public Scalar {
   }
 
  protected:
+  std::size_t computeHash() const override;
+
   void getFieldStringItems(
       std::vector<std::string> *inline_field_names,
       std::vector<std::string> *inline_field_values,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/SimpleCase.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/SimpleCase.cpp b/query_optimizer/expressions/SimpleCase.cpp
index 454d7b9..ccdd8e5 100644
--- a/query_optimizer/expressions/SimpleCase.cpp
+++ b/query_optimizer/expressions/SimpleCase.cpp
@@ -31,12 +31,14 @@
 #include "query_optimizer/expressions/AttributeReference.hpp"
 #include "query_optimizer/expressions/ComparisonExpression.hpp"
 #include "query_optimizer/expressions/Expression.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
 #include "query_optimizer/expressions/Predicate.hpp"
 #include "query_optimizer/expressions/Scalar.hpp"
 #include "types/Type.hpp"
 #include "types/operations/comparisons/ComparisonID.hpp"
 #include "types/operations/comparisons/ComparisonFactory.hpp"
 #include "utility/Cast.hpp"
+#include "utility/HashPair.hpp"
 
 #include "glog/logging.h"
 
@@ -161,6 +163,50 @@ ExpressionPtr SimpleCase::copyWithNewChildren(const std::vector<ExpressionPtr> &
       else_result_expression.release());
 }
 
+std::size_t SimpleCase::computeHash() const {
+  std::size_t hash_code =
+      CombineHashes(static_cast<std::size_t>(ExpressionType::kSimpleCase),
+                    case_operand_->hash());
+  for (std::size_t i = 0; i < condition_operands_.size(); ++i) {
+    hash_code = CombineHashes(hash_code, condition_operands_[i]->hash());
+    hash_code = CombineHashes(hash_code, conditional_result_expressions_[i]->hash());
+  }
+  if (else_result_expression_ != nullptr) {
+    hash_code = CombineHashes(hash_code, else_result_expression_->hash());
+  }
+  return hash_code;
+}
+
+bool SimpleCase::equals(const ScalarPtr &other) const {
+  SimpleCasePtr expr;
+  if (!SomeSimpleCase::MatchesWithConditionalCast(other, &expr)) {
+    return false;
+  }
+  if (!case_operand_->equals(expr->case_operand_)) {
+    return false;
+  }
+  if (condition_operands_.size() != expr->condition_operands_.size()) {
+    return false;
+  }
+  for (std::size_t i = 0; i < condition_operands_.size(); ++i) {
+    if (!condition_operands_[i]->equals(expr->condition_operands_[i])
+        || !conditional_result_expressions_[i]->equals(
+                expr->conditional_result_expressions_[i])) {
+      return false;
+    }
+  }
+  if ((else_result_expression_ == nullptr
+       || expr->else_result_expression_ == nullptr)
+      && else_result_expression_ != expr->else_result_expression_) {
+    return false;
+  }
+  if (!else_result_expression_->equals(expr->else_result_expression_)) {
+    return false;
+  }
+  DCHECK(value_type_.equals(expr->value_type_));
+  return true;
+}
+
 void SimpleCase::getFieldStringItems(
     std::vector<std::string> *inline_field_names,
     std::vector<std::string> *inline_field_values,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/SimpleCase.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/SimpleCase.hpp b/query_optimizer/expressions/SimpleCase.hpp
index 897d87f..bf3decd 100644
--- a/query_optimizer/expressions/SimpleCase.hpp
+++ b/query_optimizer/expressions/SimpleCase.hpp
@@ -110,6 +110,8 @@ class SimpleCase : public Scalar {
   ::quickstep::Scalar* concretize(
       const std::unordered_map<ExprId, const CatalogAttribute*>& substitution_map) const override;
 
+  bool equals(const ScalarPtr &other) const override;
+
   /**
    * @brief Creates an immutable SimpleCase.
    *
@@ -136,6 +138,8 @@ class SimpleCase : public Scalar {
   }
 
  protected:
+  std::size_t computeHash() const override;
+
   void getFieldStringItems(std::vector<std::string> *inline_field_names,
                            std::vector<std::string> *inline_field_values,
                            std::vector<std::string> *non_container_child_field_names,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/UnaryExpression.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/UnaryExpression.cpp b/query_optimizer/expressions/UnaryExpression.cpp
index b0fff62..06a2ee1 100644
--- a/query_optimizer/expressions/UnaryExpression.cpp
+++ b/query_optimizer/expressions/UnaryExpression.cpp
@@ -31,6 +31,7 @@
 #include "query_optimizer/expressions/Scalar.hpp"
 #include "types/operations/unary_operations/UnaryOperation.hpp"
 #include "types/operations/unary_operations/UnaryOperationID.hpp"
+#include "utility/HashPair.hpp"
 
 #include "glog/logging.h"
 
@@ -56,6 +57,22 @@ ExpressionPtr UnaryExpression::copyWithNewChildren(
       operation_, operand_->concretize(substitution_map));
 }
 
+std::size_t UnaryExpression::computeHash() const {
+  return CombineHashes(
+      CombineHashes(static_cast<std::size_t>(ExpressionType::kUnaryExpression),
+                    static_cast<std::size_t>(operation_.getUnaryOperationID())),
+      operand_->hash());
+}
+
+bool UnaryExpression::equals(const ScalarPtr &other) const {
+  UnaryExpressionPtr expr;
+  if (SomeUnaryExpression::MatchesWithConditionalCast(other, &expr)) {
+    return operation_.getUnaryOperationID() == expr->operation_.getUnaryOperationID()
+           && operand_->equals(expr->operand_);
+  }
+  return false;
+}
+
 void UnaryExpression::getFieldStringItems(
     std::vector<std::string> *inline_field_names,
     std::vector<std::string> *inline_field_values,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/expressions/UnaryExpression.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/UnaryExpression.hpp b/query_optimizer/expressions/UnaryExpression.hpp
index c4542d0..2a9e97e 100644
--- a/query_optimizer/expressions/UnaryExpression.hpp
+++ b/query_optimizer/expressions/UnaryExpression.hpp
@@ -29,8 +29,10 @@
 #include "query_optimizer/expressions/ExprId.hpp"
 #include "query_optimizer/expressions/Expression.hpp"
 #include "query_optimizer/expressions/ExpressionType.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
 #include "query_optimizer/expressions/Scalar.hpp"
 #include "types/operations/unary_operations/UnaryOperation.hpp"
+#include "utility/HashPair.hpp"
 #include "utility/Macros.hpp"
 
 #include "glog/logging.h"
@@ -85,6 +87,8 @@ class UnaryExpression : public Scalar {
   ::quickstep::Scalar* concretize(
       const std::unordered_map<ExprId, const CatalogAttribute*> &substitution_map) const override;
 
+  bool equals(const ScalarPtr &other) const override;
+
   /**
    * @brief Creates an immutable UnaryExpression.
    *
@@ -99,6 +103,8 @@ class UnaryExpression : public Scalar {
   }
 
  protected:
+  std::size_t computeHash() const override;
+
   void getFieldStringItems(
       std::vector<std::string> *inline_field_names,
       std::vector<std::string> *inline_field_values,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 6847951..b2d0f90 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -21,6 +21,9 @@ add_subdirectory(tests)
 add_library(quickstep_queryoptimizer_rules_AttachLIPFilters AttachLIPFilters.cpp AttachLIPFilters.hpp)
 add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp)
 add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp)
+add_library(quickstep_queryoptimizer_rules_CommonSubexpressionExtraction
+            CommonSubexpressionExtraction.cpp
+            CommonSubexpressionExtraction.hpp)
 add_library(quickstep_queryoptimizer_rules_FuseAggregateJoin FuseAggregateJoin.cpp FuseAggregateJoin.hpp)
 add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
 add_library(quickstep_queryoptimizer_rules_InjectJoinFilters InjectJoinFilters.cpp InjectJoinFilters.hpp)
@@ -77,6 +80,23 @@ target_link_libraries(quickstep_queryoptimizer_rules_CollapseProject
                       quickstep_queryoptimizer_rules_Rule
                       quickstep_queryoptimizer_rules_RuleHelper
                       quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_CommonSubexpressionExtraction
+                      glog
+                      quickstep_queryoptimizer_OptimizerContext
+                      quickstep_queryoptimizer_expressions_Alias
+                      quickstep_queryoptimizer_expressions_CommonSubexpression
+                      quickstep_queryoptimizer_expressions_Expression
+                      quickstep_queryoptimizer_expressions_ExpressionType
+                      quickstep_queryoptimizer_expressions_NamedExpression
+                      quickstep_queryoptimizer_expressions_PatternMatcher
+                      quickstep_queryoptimizer_expressions_Scalar
+                      quickstep_queryoptimizer_physical_Aggregate
+                      quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_physical_PhysicalType
+                      quickstep_queryoptimizer_physical_Selection
+                      quickstep_queryoptimizer_rules_Rule
+                      quickstep_utility_HashError
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_rules_FuseAggregateJoin
                       quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
                       quickstep_queryoptimizer_expressions_AggregateFunction
@@ -311,6 +331,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
                       quickstep_queryoptimizer_rules_AttachLIPFilters
                       quickstep_queryoptimizer_rules_BottomUpRule
                       quickstep_queryoptimizer_rules_CollapseProject
+                      quickstep_queryoptimizer_rules_CommonSubexpressionExtraction
                       quickstep_queryoptimizer_rules_FuseAggregateJoin
                       quickstep_queryoptimizer_rules_GenerateJoins
                       quickstep_queryoptimizer_rules_InjectJoinFilters

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/rules/CommonSubexpressionExtraction.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CommonSubexpressionExtraction.cpp b/query_optimizer/rules/CommonSubexpressionExtraction.cpp
new file mode 100644
index 0000000..b06d9fc
--- /dev/null
+++ b/query_optimizer/rules/CommonSubexpressionExtraction.cpp
@@ -0,0 +1,264 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/CommonSubexpressionExtraction.hpp"
+
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+#include "query_optimizer/OptimizerContext.hpp"
+#include "query_optimizer/expressions/Alias.hpp"
+#include "query_optimizer/expressions/CommonSubexpression.hpp"
+#include "query_optimizer/expressions/ExpressionType.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/expressions/Scalar.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "utility/HashError.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+CommonSubexpressionExtraction::CommonSubexpressionExtraction(
+    OptimizerContext *optimizer_context)
+    : optimizer_context_(optimizer_context) {
+  const std::vector<E::ExpressionType> whitelist = {
+      E::ExpressionType::kAlias,
+      E::ExpressionType::kAttributeReference,
+      E::ExpressionType::kAggregateFunction,
+      E::ExpressionType::kBinaryExpression,
+      E::ExpressionType::kCast,
+      E::ExpressionType::kCommonSubexpression,
+      E::ExpressionType::kScalarLiteral,
+      E::ExpressionType::kUnaryExpression
+  };
+
+  for (const auto &expr_type : whitelist) {
+    homogeneous_whitelist_.emplace(static_cast<int>(expr_type));
+  }
+}
+
+P::PhysicalPtr CommonSubexpressionExtraction::apply(const P::PhysicalPtr &input) {
+  DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+
+  return applyInternal(input);
+}
+
+P::PhysicalPtr CommonSubexpressionExtraction::applyInternal(
+    const P::PhysicalPtr &input) {
+  std::vector<P::PhysicalPtr> new_children;
+  for (const auto &child : input->children()) {
+    new_children.emplace_back(applyInternal(child));
+  }
+
+  const P::PhysicalPtr node =
+      new_children == input->children()
+          ? input
+          : input->copyWithNewChildren(new_children);
+
+  switch (node->getPhysicalType()) {
+    case P::PhysicalType::kSelection: {
+      const P::SelectionPtr selection =
+          std::static_pointer_cast<const P::Selection>(node);
+
+      const std::vector<E::NamedExpressionPtr> new_expressions =
+          DownCast<E::NamedExpression>(
+              transformExpressions(UpCast(selection->project_expressions())));
+
+      if (new_expressions != selection->project_expressions()) {
+        return P::Selection::Create(selection->input(),
+                                    new_expressions,
+                                    selection->filter_predicate());
+      }
+      break;
+    }
+    case P::PhysicalType::kAggregate: {
+      const P::AggregatePtr aggregate =
+          std::static_pointer_cast<const P::Aggregate>(node);
+      std::vector<E::ExpressionPtr> expressions =
+          UpCast(aggregate->aggregate_expressions());
+      for (const auto &expr : aggregate->grouping_expressions()) {
+        expressions.emplace_back(expr);
+      }
+
+      const std::vector<E::ExpressionPtr> new_expressions =
+          transformExpressions(expressions);
+
+      if (new_expressions != expressions) {
+        std::vector<E::AliasPtr> new_aggregate_expressions;
+        std::vector<E::NamedExpressionPtr> new_grouping_expressions;
+        const std::size_t num_aggrs = aggregate->aggregate_expressions().size();
+
+        for (std::size_t i = 0; i < num_aggrs; ++i) {
+          DCHECK(E::SomeAlias::Matches(new_expressions[i]));
+          new_aggregate_expressions.emplace_back(
+              std::static_pointer_cast<const E::Alias>(new_expressions[i]));
+        }
+        for (std::size_t i = num_aggrs; i < new_expressions.size(); ++i) {
+          DCHECK(E::SomeNamedExpression::Matches(new_expressions[i]));
+          new_grouping_expressions.emplace_back(
+              std::static_pointer_cast<const E::NamedExpression>(new_expressions[i]));
+        }
+        return P::Aggregate::Create(aggregate->input(),
+                                    new_grouping_expressions,
+                                    new_aggregate_expressions,
+                                    aggregate->filter_predicate());
+      }
+      break;
+    }
+    default:
+      break;
+  }
+
+  return node;
+}
+
+std::vector<E::ExpressionPtr> CommonSubexpressionExtraction::transformExpressions(
+    const std::vector<E::ExpressionPtr> &expressions) {
+  ScalarCounter counter;
+  ScalarHashable hashable;
+  for (const auto &expr : expressions) {
+    visitAndCount(expr, &counter, &hashable);
+  }
+
+  ScalarMap substitution_map;
+  std::vector<E::ExpressionPtr> new_expressions;
+  for (const auto &expr : expressions) {
+    new_expressions.emplace_back(
+        visitAndTransform(expr, 1, counter, hashable, &substitution_map));
+  }
+  return new_expressions;
+}
+
+E::ExpressionPtr CommonSubexpressionExtraction::transformExpression(
+    const E::ExpressionPtr &expression) {
+  return transformExpressions({expression}).front();
+}
+
+bool CommonSubexpressionExtraction::visitAndCount(
+    const E::ExpressionPtr &expression,
+    ScalarCounter *counter,
+    ScalarHashable *hashable) {
+  bool children_hashable = true;
+
+  const auto homogeneous_whitelist_it =
+      homogeneous_whitelist_.find(static_cast<int>(expression->getExpressionType()));
+  if (homogeneous_whitelist_it != homogeneous_whitelist_.end()) {
+    for (const auto &child : expression->children()) {
+      children_hashable &= visitAndCount(child, counter, hashable);
+    }
+  }
+
+  E::ScalarPtr scalar;
+  if (children_hashable &&
+      E::SomeScalar::MatchesWithConditionalCast(expression, &scalar)) {
+    try {
+      ++(*counter)[scalar];
+    } catch (const HashNotSupported &e) {
+      return false;
+    }
+    hashable->emplace(scalar);
+    return true;
+  }
+  return false;
+}
+
+E::ExpressionPtr CommonSubexpressionExtraction::visitAndTransform(
+    const E::ExpressionPtr &expression,
+    const std::size_t max_reference_count,
+    const ScalarCounter &counter,
+    const ScalarHashable &hashable,
+    ScalarMap *substitution_map) {
+  // TODO: figure out whether it is beneficial.
+  if (expression->getExpressionType() == E::ExpressionType::kScalarLiteral ||
+      expression->getExpressionType() == E::ExpressionType::kAttributeReference) {
+    return expression;
+  }
+
+  E::ScalarPtr scalar;
+  const bool is_hashable =
+      E::SomeScalar::MatchesWithConditionalCast(expression, &scalar)
+          && hashable.find(scalar) != hashable.end();
+
+  std::size_t new_max_reference_count;
+  if (is_hashable) {
+    // CommonSubexpression node already generated.
+    const auto substitution_map_it = substitution_map->find(scalar);
+    if (substitution_map_it != substitution_map->end()) {
+      return substitution_map_it->second;
+    }
+
+    const auto counter_it = counter.find(scalar);
+    DCHECK(counter_it != counter.end());
+    DCHECK_LE(max_reference_count, counter_it->second);
+    new_max_reference_count = counter_it->second;
+  } else {
+    new_max_reference_count = max_reference_count;
+  }
+
+  std::vector<E::ExpressionPtr> new_children;
+  const auto homogeneous_whitelist_it =
+      homogeneous_whitelist_.find(static_cast<int>(expression->getExpressionType()));
+  if (homogeneous_whitelist_it == homogeneous_whitelist_.end()) {
+    for (const auto &child : expression->children()) {
+      new_children.emplace_back(transformExpression(child));
+    }
+  } else {
+    for (const auto &child : expression->children()) {
+      new_children.emplace_back(
+          visitAndTransform(child,
+                            new_max_reference_count,
+                            counter,
+                            hashable,
+                            substitution_map));
+    }
+  }
+
+  E::ExpressionPtr output;
+  if (new_children == expression->children()) {
+    output = expression;
+  } else {
+    output = std::static_pointer_cast<const E::Scalar>(
+        expression->copyWithNewChildren(new_children));
+  }
+
+  if (is_hashable && new_max_reference_count > max_reference_count) {
+    DCHECK(E::SomeScalar::Matches(output));
+    const E::CommonSubexpressionPtr common_subexpression =
+        E::CommonSubexpression::Create(
+            optimizer_context_->nextExprId(),
+            std::static_pointer_cast<const E::Scalar>(output));
+    substitution_map->emplace(scalar, common_subexpression);
+    output = common_subexpression;
+  }
+
+  return output;
+}
+
+}  // namespace optimizer
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/query_optimizer/rules/CommonSubexpressionExtraction.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CommonSubexpressionExtraction.hpp b/query_optimizer/rules/CommonSubexpressionExtraction.hpp
new file mode 100644
index 0000000..121552c
--- /dev/null
+++ b/query_optimizer/rules/CommonSubexpressionExtraction.hpp
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_COMMON_SUBEXPRESSION_EXTRACTION_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_COMMON_SUBEXPRESSION_EXTRACTION_HPP_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "query_optimizer/expressions/CommonSubexpression.hpp"
+#include "query_optimizer/expressions/Expression.hpp"
+#include "query_optimizer/expressions/Scalar.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+class OptimizerContext;
+
+/** \addtogroup OptimizerRules
+ *  @{
+ */
+
+class CommonSubexpressionExtraction : public Rule<physical::Physical> {
+ public:
+  /**
+   * @brief Constructor.
+   */
+  CommonSubexpressionExtraction(OptimizerContext *optimizer_context);
+
+  ~CommonSubexpressionExtraction() override {}
+
+  std::string getName() const override {
+    return "CommonSubexpressionExtraction";
+  }
+
+  physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+  physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input);
+
+  struct ScalarHash {
+    inline std::size_t operator()(const expressions::ScalarPtr &scalar) const {
+      return scalar->hash();
+    }
+  };
+
+  struct ScalarEqual {
+    inline bool operator()(const expressions::ScalarPtr &lhs,
+                           const expressions::ScalarPtr &rhs) const {
+      return lhs->equals(rhs);
+    }
+  };
+
+  using ScalarCounter =
+      std::unordered_map<expressions::ScalarPtr, std::size_t, ScalarHash, ScalarEqual>;
+
+  using ScalarMap =
+      std::unordered_map<expressions::ScalarPtr,
+                         expressions::CommonSubexpressionPtr,
+                         ScalarHash,
+                         ScalarEqual>;
+
+  using ScalarHashable = std::unordered_set<expressions::ScalarPtr>;
+
+  std::vector<expressions::ExpressionPtr> transformExpressions(
+      const std::vector<expressions::ExpressionPtr> &expressions);
+
+  expressions::ExpressionPtr transformExpression(
+      const expressions::ExpressionPtr &expression);
+
+  bool visitAndCount(
+      const expressions::ExpressionPtr &expression,
+      ScalarCounter *counter,
+      ScalarHashable *hashable);
+
+  expressions::ExpressionPtr visitAndTransform(
+      const expressions::ExpressionPtr &expression,
+      const std::size_t max_reference_count,
+      const ScalarCounter &counter,
+      const ScalarHashable &hashable,
+      ScalarMap *substitution_map);
+
+  template <typename ScalarSubclassT>
+  static std::vector<expressions::ExpressionPtr> UpCast(
+      const std::vector<std::shared_ptr<const ScalarSubclassT>> &expressions) {
+    std::vector<expressions::ExpressionPtr> output;
+    for (const auto &expr : expressions) {
+      output.emplace_back(expr);
+    }
+    return output;
+  }
+
+  template <typename ScalarSubclassT>
+  static std::vector<std::shared_ptr<const ScalarSubclassT>> DownCast(
+      const std::vector<expressions::ExpressionPtr> &expressions) {
+    std::vector<std::shared_ptr<const ScalarSubclassT>> output;
+    for (const auto &expr : expressions) {
+      output.emplace_back(std::static_pointer_cast<const ScalarSubclassT>(expr));
+    }
+    return output;
+  }
+
+  OptimizerContext *optimizer_context_;
+  std::unordered_set<int> homogeneous_whitelist_;
+
+  DISALLOW_COPY_AND_ASSIGN(CommonSubexpressionExtraction);
+};
+
+/** @} */
+
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_QUERY_OPTIMIZER_RULES_COMMON_SUBEXPRESSION_EXTRACTION_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index 4ea809b..b93ec49 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -262,6 +262,7 @@ target_link_libraries(quickstep_relationaloperators_HashJoinOperator
                       quickstep_expressions_predicate_Predicate
                       quickstep_expressions_scalar_Scalar
                       quickstep_expressions_scalar_ScalarAttribute
+                      quickstep_expressions_scalar_ScalarCache
                       quickstep_queryexecution_QueryContext
                       quickstep_queryexecution_WorkOrderProtosContainer
                       quickstep_queryexecution_WorkOrdersContainer
@@ -318,6 +319,7 @@ target_link_libraries(quickstep_relationaloperators_NestedLoopsJoinOperator
                       quickstep_catalog_CatalogTypedefs
                       quickstep_expressions_predicate_Predicate
                       quickstep_expressions_scalar_Scalar
+                      quickstep_expressions_scalar_ScalarCache
                       quickstep_queryexecution_QueryContext
                       quickstep_queryexecution_WorkOrderProtosContainer
                       quickstep_queryexecution_WorkOrdersContainer


[2/5] incubator-quickstep git commit: Initial commit

Posted by ji...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index 0e75411..cd376c1 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -32,6 +32,7 @@
 #include "expressions/predicate/Predicate.hpp"
 #include "expressions/scalar/Scalar.hpp"
 #include "expressions/scalar/ScalarAttribute.hpp"
+#include "expressions/scalar/ScalarCache.hpp"
 #include "query_execution/QueryContext.hpp"
 #include "query_execution/WorkOrderProtosContainer.hpp"
 #include "query_execution/WorkOrdersContainer.hpp"
@@ -532,6 +533,7 @@ void HashInnerJoinWorkOrder::executeWithoutCopyElision(ValueAccessor *probe_acce
     }
 
     ColumnVectorsValueAccessor temp_result;
+    std::unique_ptr<ScalarCache> scalar_cache = std::make_unique<ScalarCache>();
     for (auto selection_cit = selection_.begin();
          selection_cit != selection_.end();
          ++selection_cit) {
@@ -539,8 +541,10 @@ void HashInnerJoinWorkOrder::executeWithoutCopyElision(ValueAccessor *probe_acce
                                                                   build_accessor.get(),
                                                                   probe_relation_id,
                                                                   probe_accessor,
-                                                                  build_block_entry.second));
+                                                                  build_block_entry.second,
+                                                                  scalar_cache.get()));
     }
+    scalar_cache.reset();
 
     output_destination_->bulkInsertTuples(&temp_result);
   }
@@ -649,12 +653,14 @@ void HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor *probe_accesso
         zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]);
       }
 
+      ScalarCache scalar_cache;
       for (const Scalar *scalar : non_trivial_expressions) {
         temp_result.addColumn(scalar->getAllValuesForJoin(build_relation_id,
                                                           build_accessor.get(),
                                                           probe_relation_id,
                                                           probe_accessor,
-                                                          zipped_joined_tuple_ids));
+                                                          zipped_joined_tuple_ids,
+                                                          &scalar_cache));
       }
     }
 
@@ -765,13 +771,16 @@ void HashSemiJoinWorkOrder::executeWithResidualPredicate() {
 
   std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
       probe_store.createValueAccessor(&filter));
+
   ColumnVectorsValueAccessor temp_result;
+  std::unique_ptr<ScalarCache> scalar_cache = std::make_unique<ScalarCache>();
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
        selection_it != selection_.end();
        ++selection_it) {
     temp_result.addColumn((*selection_it)->getAllValues(
-        probe_accessor_with_filter.get(), &sub_blocks_ref));
+        probe_accessor_with_filter.get(), &sub_blocks_ref, scalar_cache.get()));
   }
+  scalar_cache.reset();
 
   output_destination_->bulkInsertTuples(&temp_result);
 }
@@ -828,12 +837,15 @@ void HashSemiJoinWorkOrder::executeWithoutResidualPredicate() {
 
   std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
       probe_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
+
   ColumnVectorsValueAccessor temp_result;
+  std::unique_ptr<ScalarCache> scalar_cache = std::make_unique<ScalarCache>();
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
        selection_it != selection_.end(); ++selection_it) {
     temp_result.addColumn((*selection_it)->getAllValues(
-        probe_accessor_with_filter.get(), &sub_blocks_ref));
+        probe_accessor_with_filter.get(), &sub_blocks_ref, scalar_cache.get()));
   }
+  scalar_cache.reset();
 
   output_destination_->bulkInsertTuples(&temp_result);
 }
@@ -886,12 +898,15 @@ void HashAntiJoinWorkOrder::executeWithoutResidualPredicate() {
 
   std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
       probe_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
+
   ColumnVectorsValueAccessor temp_result;
+  std::unique_ptr<ScalarCache> scalar_cache = std::make_unique<ScalarCache>();
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
        selection_it != selection_.end(); ++selection_it) {
     temp_result.addColumn((*selection_it)->getAllValues(
-        probe_accessor_with_filter.get(), &sub_blocks_ref));
+        probe_accessor_with_filter.get(), &sub_blocks_ref, scalar_cache.get()));
   }
+  scalar_cache.reset();
 
   output_destination_->bulkInsertTuples(&temp_result);
 }
@@ -976,14 +991,18 @@ void HashAntiJoinWorkOrder::executeWithResidualPredicate() {
 
   std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
       probe_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
+
   ColumnVectorsValueAccessor temp_result;
+  std::unique_ptr<ScalarCache> scalar_cache = std::make_unique<ScalarCache>();
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
        selection_it != selection_.end();
        ++selection_it) {
     temp_result.addColumn(
         (*selection_it)->getAllValues(probe_accessor_with_filter.get(),
-                                      &sub_blocks_ref));
+                                      &sub_blocks_ref,
+                                      scalar_cache.get()));
   }
+  scalar_cache.reset();
 
   output_destination_->bulkInsertTuples(&temp_result);
 }
@@ -1032,12 +1051,11 @@ void HashOuterJoinWorkOrder::execute() {
            &build_block_entry : *collector.getJoinedTupleMap()) {
     const BlockReference build_block =
         storage_manager_->getBlock(build_block_entry.first, build_relation_);
-    const TupleStorageSubBlock &build_store =
-        build_block->getTupleStorageSubBlock();
+    const TupleStorageSubBlock &build_store = build_block->getTupleStorageSubBlock();
+    std::unique_ptr<ValueAccessor> build_accessor(build_store.createValueAccessor());
 
-    std::unique_ptr<ValueAccessor> build_accessor(
-        build_store.createValueAccessor());
     ColumnVectorsValueAccessor temp_result;
+    std::unique_ptr<ScalarCache> scalar_cache = std::make_unique<ScalarCache>();
     for (auto selection_it = selection_.begin();
          selection_it != selection_.end();
          ++selection_it) {
@@ -1047,8 +1065,11 @@ void HashOuterJoinWorkOrder::execute() {
               build_accessor.get(),
               probe_relation_id,
               probe_accessor.get(),
-              build_block_entry.second));
+              build_block_entry.second,
+              scalar_cache.get()));
     }
+    scalar_cache.reset();
+
     output_destination_->bulkInsertTuples(&temp_result);
   }
 
@@ -1061,8 +1082,9 @@ void HashOuterJoinWorkOrder::execute() {
   if (num_tuples_without_matches > 0) {
     std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
         probe_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
-    ColumnVectorsValueAccessor temp_result;
 
+    ColumnVectorsValueAccessor temp_result;
+    std::unique_ptr<ScalarCache> scalar_cache = std::make_unique<ScalarCache>();
     for (std::size_t i = 0; i < selection_.size(); ++i) {
       if (is_selection_on_build_[i]) {
         // NOTE(harshad, jianqiao): The assumption here is that any operation
@@ -1090,9 +1112,12 @@ void HashOuterJoinWorkOrder::execute() {
       } else {
         temp_result.addColumn(
             selection_[i]->getAllValues(probe_accessor_with_filter.get(),
-                                        &sub_blocks_ref));
+                                        &sub_blocks_ref,
+                                        scalar_cache.get()));
       }
     }
+    scalar_cache.reset();
+
     output_destination_->bulkInsertTuples(&temp_result);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/relational_operators/NestedLoopsJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/NestedLoopsJoinOperator.cpp b/relational_operators/NestedLoopsJoinOperator.cpp
index f17402f..a6bacc7 100644
--- a/relational_operators/NestedLoopsJoinOperator.cpp
+++ b/relational_operators/NestedLoopsJoinOperator.cpp
@@ -27,6 +27,7 @@
 #include "catalog/CatalogRelationSchema.hpp"
 #include "expressions/predicate/Predicate.hpp"
 #include "expressions/scalar/Scalar.hpp"
+#include "expressions/scalar/ScalarCache.hpp"
 #include "query_execution/QueryContext.hpp"
 #include "query_execution/WorkOrderProtosContainer.hpp"
 #include "query_execution/WorkOrdersContainer.hpp"
@@ -417,6 +418,7 @@ void NestedLoopsJoinWorkOrder::executeHelper(const TupleStorageSubBlock &left_st
     // evaluation and data movement, but low enough that temporary memory
     // requirements don't get out of hand).
     ColumnVectorsValueAccessor temp_result;
+    std::unique_ptr<ScalarCache> scalar_cache = std::make_unique<ScalarCache>();
     for (vector<unique_ptr<const Scalar>>::const_iterator selection_cit = selection_.begin();
          selection_cit != selection_.end();
          ++selection_cit) {
@@ -424,8 +426,10 @@ void NestedLoopsJoinWorkOrder::executeHelper(const TupleStorageSubBlock &left_st
                                                                   left_accessor.get(),
                                                                   right_input_relation_id,
                                                                   right_accessor.get(),
-                                                                  joined_tuple_ids));
+                                                                  joined_tuple_ids,
+                                                                  scalar_cache.get()));
     }
+    scalar_cache.reset();
 
     output_destination_->bulkInsertTuples(&temp_result);
   }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/storage/AggregationOperationState.cpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.cpp b/storage/AggregationOperationState.cpp
index 90543c4..facc7fa 100644
--- a/storage/AggregationOperationState.cpp
+++ b/storage/AggregationOperationState.cpp
@@ -38,6 +38,7 @@
 #include "expressions/aggregation/AggregationHandle.hpp"
 #include "expressions/predicate/Predicate.hpp"
 #include "expressions/scalar/Scalar.hpp"
+#include "expressions/scalar/ScalarCache.hpp"
 #include "storage/AggregationOperationState.pb.h"
 #include "storage/CollisionFreeVectorTable.hpp"
 #include "storage/HashTableFactory.hpp"
@@ -491,9 +492,10 @@ void AggregationOperationState::aggregateBlock(const block_id input_block,
     SubBlocksReference sub_blocks_ref(tuple_store,
                                       block->getIndices(),
                                       block->getIndicesConsistent());
+    ScalarCache scalar_cache;
     for (const auto &expression : non_trivial_expressions_) {
       non_trivial_results->addColumn(
-          expression->getAllValues(accessor, &sub_blocks_ref));
+          expression->getAllValues(accessor, &sub_blocks_ref, &scalar_cache));
     }
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/storage/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt
index cb1f098..0a1d484 100644
--- a/storage/CMakeLists.txt
+++ b/storage/CMakeLists.txt
@@ -276,6 +276,7 @@ target_link_libraries(quickstep_storage_AggregationOperationState
                       quickstep_expressions_aggregation_AggregationHandle
                       quickstep_expressions_predicate_Predicate
                       quickstep_expressions_scalar_Scalar
+                      quickstep_expressions_scalar_ScalarCache
                       quickstep_storage_AggregationOperationState_proto
                       quickstep_storage_CollisionFreeVectorTable
                       quickstep_storage_HashTableBase
@@ -936,6 +937,7 @@ target_link_libraries(quickstep_storage_StorageBlock
                       quickstep_catalog_CatalogTypedefs
                       quickstep_expressions_predicate_Predicate
                       quickstep_expressions_scalar_Scalar
+                      quickstep_expressions_scalar_ScalarCache
                       quickstep_storage_BasicColumnStoreTupleStorageSubBlock
                       quickstep_storage_BloomFilterIndexSubBlock
                       quickstep_storage_CSBTreeIndexSubBlock
@@ -1086,6 +1088,7 @@ target_link_libraries(quickstep_storage_WindowAggregationOperationState
                       quickstep_expressions_Expressions_proto
                       quickstep_expressions_scalar_Scalar
                       quickstep_expressions_scalar_ScalarAttribute
+                      quickstep_expressions_scalar_ScalarCache
                       quickstep_expressions_windowaggregation_WindowAggregateFunction
                       quickstep_expressions_windowaggregation_WindowAggregateFunctionFactory
                       quickstep_expressions_windowaggregation_WindowAggregationHandle

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/storage/StorageBlock.cpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.cpp b/storage/StorageBlock.cpp
index e91c1ac..d724317 100644
--- a/storage/StorageBlock.cpp
+++ b/storage/StorageBlock.cpp
@@ -30,6 +30,7 @@
 #include "catalog/CatalogTypedefs.hpp"
 #include "expressions/predicate/Predicate.hpp"
 #include "expressions/scalar/Scalar.hpp"
+#include "expressions/scalar/ScalarCache.hpp"
 #include "storage/BasicColumnStoreTupleStorageSubBlock.hpp"
 #include "storage/BloomFilterIndexSubBlock.hpp"
 #include "storage/CSBTreeIndexSubBlock.hpp"
@@ -369,15 +370,18 @@ void StorageBlock::select(const vector<unique_ptr<const Scalar>> &selection,
                                       indices_,
                                       indices_consistent_);
 
-    std::unique_ptr<ValueAccessor> accessor(
-        tuple_store_->createValueAccessor(filter));
+    std::unique_ptr<ValueAccessor> accessor(tuple_store_->createValueAccessor(filter));
+    ScalarCache scalar_cache;
 
     for (vector<unique_ptr<const Scalar>>::const_iterator selection_cit = selection.begin();
          selection_cit != selection.end();
          ++selection_cit) {
       // TODO(chasseur): Can probably elide some copies for parts of the
       // selection that are ScalarAttribute or ScalarLiteral.
-      temp_result.addColumn((*selection_cit)->getAllValues(accessor.get(), &sub_blocks_ref));
+      temp_result.addColumn(
+          (*selection_cit)->getAllValues(accessor.get(),
+                                         &sub_blocks_ref,
+                                         &scalar_cache));
     }
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/storage/WindowAggregationOperationState.cpp
----------------------------------------------------------------------
diff --git a/storage/WindowAggregationOperationState.cpp b/storage/WindowAggregationOperationState.cpp
index 58bdf18..2c571ef 100644
--- a/storage/WindowAggregationOperationState.cpp
+++ b/storage/WindowAggregationOperationState.cpp
@@ -33,6 +33,7 @@
 #include "expressions/Expressions.pb.h"
 #include "expressions/scalar/Scalar.hpp"
 #include "expressions/scalar/ScalarAttribute.hpp"
+#include "expressions/scalar/ScalarCache.hpp"
 #include "expressions/window_aggregation/WindowAggregateFunction.hpp"
 #include "expressions/window_aggregation/WindowAggregateFunctionFactory.hpp"
 #include "expressions/window_aggregation/WindowAggregationHandle.hpp"
@@ -236,11 +237,16 @@ void WindowAggregationOperationState::windowAggregateBlocks(
       argument_accessor = new ColumnVectorsValueAccessor();
     }
 
+    std::unique_ptr<ScalarCache> scalar_cache = std::make_unique<ScalarCache>();
     for (const std::unique_ptr<const Scalar> &argument : arguments_) {
       argument_accessor->addColumn(argument->getAllValues(tuple_accessor,
-                                                          &sub_block_ref));
+                                                          &sub_block_ref,
+                                                          scalar_cache.get()));
     }
 
+    // Release common subexpression cache as early as possible.
+    scalar_cache.reset();
+
     InvokeOnAnyValueAccessor(tuple_accessor,
                              [&] (auto *tuple_accessor) -> void {  // NOLINT(build/c++11)
       tuple_accessor->beginIteration();

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/types/containers/ColumnVector.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVector.hpp b/types/containers/ColumnVector.hpp
index fc65656..430a844 100644
--- a/types/containers/ColumnVector.hpp
+++ b/types/containers/ColumnVector.hpp
@@ -43,6 +43,9 @@ namespace quickstep {
 // TODO(chasseur): Look into ways to allocate ColumnVector memory from the
 // StorageManager.
 
+class ColumnVector;
+typedef std::shared_ptr<const ColumnVector> ColumnVectorPtr;
+
 /**
  * @brief A vector of values of the same type. Two implementations exist:
  *        NativeColumnVector (an array of fixed-size data elements) and

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/types/containers/ColumnVectorsValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVectorsValueAccessor.hpp b/types/containers/ColumnVectorsValueAccessor.hpp
index 6dc1124..d9cf49d 100644
--- a/types/containers/ColumnVectorsValueAccessor.hpp
+++ b/types/containers/ColumnVectorsValueAccessor.hpp
@@ -74,22 +74,23 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
    *             this value-accessor is responsible for freeing this column
    *             vector.
    **/
-  void addColumn(ColumnVector *column, const bool owns = true) {
+  void addColumn(ColumnVectorPtr column) {
     // If this is not the first column to be added, make sure it is the same
     // length as the others.
     DCHECK(columns_.empty()
            || (column->isNative()
-               ? (static_cast<const NativeColumnVector*>(column)->size() == column_length_)
-               : (static_cast<const IndirectColumnVector*>(column)->size() == column_length_)));
+               ? (static_cast<const NativeColumnVector*>(column.get())->size() == column_length_)
+               : (static_cast<const IndirectColumnVector*>(column.get())->size() == column_length_)));
     columns_.push_back(column);
     column_native_.push_back(column->isNative());
-    if (owns) {
-      deleter_.addObject(column);
-    }
-    column_length_
-        = column->isNative()
-          ? static_cast<const NativeColumnVector*>(column)->size()
-          : static_cast<const IndirectColumnVector*>(column)->size();
+    column_length_ =
+        column->isNative()
+            ? static_cast<const NativeColumnVector*>(column.get())->size()
+            : static_cast<const IndirectColumnVector*>(column.get())->size();
+  }
+
+  void addColumn(ColumnVector *column) {
+    addColumn(ColumnVectorPtr(column));
   }
 
   inline void beginIteration() {
@@ -309,11 +310,10 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
            && (static_cast<std::vector<ColumnVector*>::size_type>(attr_id) < columns_.size());
   }
 
-  std::vector<ColumnVector*> columns_;
+  std::vector<ColumnVectorPtr> columns_;
   std::vector<bool> column_native_;
   std::size_t column_length_;
   std::size_t current_position_;
-  ScopedDeleter deleter_;
 
   DISALLOW_COPY_AND_ASSIGN(ColumnVectorsValueAccessor);
 };

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index ca04462..ea9ee43 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -182,6 +182,7 @@ add_library(quickstep_utility_ExecutionDAGVisualizer
             ExecutionDAGVisualizer.cpp
             ExecutionDAGVisualizer.hpp)
 add_library(quickstep_utility_Glob Glob.cpp Glob.hpp)
+add_library(quickstep_utility_HashError ../empty_src.cpp HashError.hpp)
 add_library(quickstep_utility_HashPair ../empty_src.cpp HashPair.hpp)
 add_library(quickstep_utility_Macros ../empty_src.cpp Macros.hpp)
 add_library(quickstep_utility_MemStream ../empty_src.cpp MemStream.hpp)
@@ -350,6 +351,7 @@ target_link_libraries(quickstep_utility
                       quickstep_utility_EqualsAnyConstant
                       quickstep_utility_ExecutionDAGVisualizer
                       quickstep_utility_Glob
+                      quickstep_utility_HashError
                       quickstep_utility_HashPair
                       quickstep_utility_Macros
                       quickstep_utility_MemStream

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/utility/HashError.hpp
----------------------------------------------------------------------
diff --git a/utility/HashError.hpp b/utility/HashError.hpp
new file mode 100644
index 0000000..3a59979
--- /dev/null
+++ b/utility/HashError.hpp
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_HASH_ERROR_HPP_
+#define QUICKSTEP_UTILITY_HASH_ERROR_HPP_
+
+#include <exception>
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+class HashNotSupported : public std::exception {
+ public:
+  /**
+   * @brief Constructor.
+   *
+   * @param message The error message.
+   **/
+  HashNotSupported(const std::string &message)
+      : message_(message) {}
+
+  ~HashNotSupported() throw() {}
+
+  virtual const char* what() const throw() {
+    return message_.c_str();
+  }
+
+ private:
+  const std::string message_;
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_UTILITY_HASH_ERROR_HPP_


[5/5] incubator-quickstep git commit: Thread private numeric aggregation

Posted by ji...@apache.org.
Thread private numeric aggregation


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/b0acc9ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/b0acc9ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/b0acc9ce

Branch: refs/heads/common-subexpression
Commit: b0acc9ce93e713bfcce48655c144f3133dc3716f
Parents: cd01af2
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Sat Apr 8 19:04:12 2017 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Wed Apr 12 14:32:49 2017 -0500

----------------------------------------------------------------------
 query_optimizer/ExecutionGenerator.cpp          |  15 +-
 .../cost_model/StarSchemaSimpleCostModel.cpp    |  51 ++
 .../cost_model/StarSchemaSimpleCostModel.hpp    |   3 +
 storage/AggregationOperationState.cpp           |  57 ++-
 storage/AggregationOperationState.hpp           |   6 +-
 storage/CMakeLists.txt                          |  20 +
 storage/CollisionFreeVectorTable.hpp            |   4 +
 storage/HashTable.proto                         |   1 +
 storage/HashTableBase.hpp                       |   5 +-
 storage/HashTableFactory.hpp                    |  13 +-
 storage/HashTablePool.hpp                       |   4 +
 storage/PackedPayloadHashTable.hpp              |   4 +
 storage/ThreadPrivateNumericHashTable.cpp       |   0
 storage/ThreadPrivateNumericHashTable.hpp       | 483 +++++++++++++++++++
 14 files changed, 650 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 3e0f647..0304e2e 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -1580,14 +1580,23 @@ void ExecutionGenerator::convertAggregate(
             ->canUseCollisionFreeAggregation(physical_plan,
                                              estimated_num_groups,
                                              &max_num_groups)) {
+      std::cout << "Use collision free\n";
       aggr_state_proto->set_hash_table_impl_type(
           serialization::HashTableImplType::COLLISION_FREE_VECTOR);
       aggr_state_proto->set_estimated_num_entries(max_num_groups);
       use_parallel_initialization = true;
     } else {
-      // Otherwise, use SeparateChaining.
-      aggr_state_proto->set_hash_table_impl_type(
-          serialization::HashTableImplType::SEPARATE_CHAINING);
+      if (cost_model_for_aggregation_->canUseTwoPhaseNumericAggregation(
+              physical_plan, estimated_num_groups)) {
+        std::cout << "Use two phase numeric\n";
+        aggr_state_proto->set_hash_table_impl_type(
+            serialization::HashTableImplType::THREAD_PRIVATE_NUMERIC);
+      } else {
+        // Otherwise, use SeparateChaining.
+        std::cout << "Use normal\n";
+        aggr_state_proto->set_hash_table_impl_type(
+            serialization::HashTableImplType::SEPARATE_CHAINING);
+      }
       aggr_state_proto->set_estimated_num_entries(std::max(16uL, estimated_num_groups));
     }
   } else {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index b17fac0..272186f 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -700,6 +700,57 @@ bool StarSchemaSimpleCostModel::canUseCollisionFreeAggregation(
   return true;
 }
 
+bool StarSchemaSimpleCostModel::canUseTwoPhaseNumericAggregation(
+    const physical::AggregatePtr &aggregate,
+    const std::size_t estimated_num_groups) {
+  if (estimated_num_groups >= 1000u) {
+    return false;
+  }
+
+  std::size_t total_key_size = 0;
+  for (const auto &key_expr : aggregate->grouping_expressions()) {
+    const Type &type = key_expr->getValueType();
+    if (type.isVariableLength()) {
+      return false;
+    }
+
+    const std::size_t key_size = type.maximumByteLength();
+    if (!QUICKSTEP_EQUALS_ANY_CONSTANT(key_size, 1u, 2u, 4u, 8u)) {
+      return false;
+    }
+
+    total_key_size += key_size;
+    if (total_key_size > 8u) {
+      return false;
+    }
+  }
+
+  for (const auto &agg_alias : aggregate->aggregate_expressions()) {
+    const E::AggregateFunctionPtr agg_expr =
+        std::static_pointer_cast<const E::AggregateFunction>(agg_alias->expression());
+    if (agg_expr->is_distinct()) {
+      return false;
+    }
+    switch (agg_expr->getAggregate().getAggregationID()) {
+      case AggregationID::kCount:
+        break;
+      case AggregationID::kSum: {
+        DCHECK_EQ(1u, agg_expr->getArguments().size());
+        const auto &argument = agg_expr->getArguments().front();
+        if (!QUICKSTEP_EQUALS_ANY_CONSTANT(argument->getValueType().getTypeID(),
+                                           kInt, kLong, kFloat, kDouble)) {
+          return false;
+        }
+        break;
+      }
+      default:
+        return false;
+    }
+  }
+
+  return true;
+}
+
 }  // namespace cost
 }  // namespace optimizer
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
index 0461077..111e3e1 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp
@@ -184,6 +184,9 @@ class StarSchemaSimpleCostModel : public CostModel {
                                       const std::size_t estimated_num_groups,
                                       std::size_t *max_num_groups);
 
+  bool canUseTwoPhaseNumericAggregation(const physical::AggregatePtr &aggregate,
+                                        const std::size_t estimated_num_groups);
+
  private:
   std::size_t estimateCardinalityForAggregate(
       const physical::AggregatePtr &physical_plan);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/storage/AggregationOperationState.cpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.cpp b/storage/AggregationOperationState.cpp
index facc7fa..d6f21bc 100644
--- a/storage/AggregationOperationState.cpp
+++ b/storage/AggregationOperationState.cpp
@@ -49,6 +49,7 @@
 #include "storage/StorageBlockInfo.hpp"
 #include "storage/StorageManager.hpp"
 #include "storage/SubBlocksReference.hpp"
+#include "storage/ThreadPrivateNumericHashTable.hpp"
 #include "storage/TupleIdSequence.hpp"
 #include "storage/TupleStorageSubBlock.hpp"
 #include "storage/ValueAccessor.hpp"
@@ -94,11 +95,15 @@ AggregationOperationState::AggregationOperationState(
                                     !is_distinct_.empty(), std::logical_and<bool>())),
       storage_manager_(storage_manager) {
   if (!group_by.empty()) {
-    if (hash_table_impl_type == HashTableImplType::kCollisionFreeVector) {
-      is_aggregate_collision_free_ = true;
-    } else {
-      is_aggregate_partitioned_ = checkAggregatePartitioned(
-          estimated_num_entries, is_distinct_, group_by, aggregate_functions);
+    switch (hash_table_impl_type) {
+      case HashTableImplType::kCollisionFreeVector:
+        is_aggregate_collision_free_ = true;
+        break;
+      case HashTableImplType::kThreadPrivateNumeric:
+        break;
+      default:
+        is_aggregate_partitioned_ = checkAggregatePartitioned(
+            estimated_num_entries, is_distinct_, group_by, aggregate_functions);
     }
   }
 
@@ -715,7 +720,17 @@ void AggregationOperationState::finalizeHashTable(
     finalizeHashTableImplPartitioned(partition_id, output_destination);
   } else {
     DCHECK_EQ(0u, partition_id);
-    finalizeHashTableImplThreadPrivate(output_destination);
+    DCHECK(group_by_hashtable_pool_ != nullptr);
+    switch (group_by_hashtable_pool_->getHashTableImplType()) {
+      case HashTableImplType::kSeparateChaining:
+        finalizeHashTableImplThreadPrivatePackedPayload(output_destination);
+        break;
+      case HashTableImplType::kThreadPrivateNumeric:
+        finalizeHashTableImplThreadPrivateNumeric(output_destination);
+        break;
+      default:
+        LOG(FATAL) << "Not supported";
+    }
   }
 }
 
@@ -840,7 +855,7 @@ void AggregationOperationState::finalizeHashTableImplPartitioned(
   output_destination->bulkInsertTuples(&complete_result);
 }
 
-void AggregationOperationState::finalizeHashTableImplThreadPrivate(
+void AggregationOperationState::finalizeHashTableImplThreadPrivatePackedPayload(
     InsertDestination *output_destination) {
   // TODO(harshad) - The merge phase may be slower when each hash table contains
   // large number of entries. We should find ways in which we can perform a
@@ -948,6 +963,34 @@ void AggregationOperationState::finalizeHashTableImplThreadPrivate(
   output_destination->bulkInsertTuples(&complete_result);
 }
 
+void AggregationOperationState::finalizeHashTableImplThreadPrivateNumeric(
+    InsertDestination *output_destination) {
+  auto *hash_tables = group_by_hashtable_pool_->getAllHashTables();
+  DCHECK(hash_tables != nullptr);
+  if (hash_tables->empty()) {
+    return;
+  }
+
+  std::unique_ptr<ThreadPrivateNumericHashTable> final_hash_table(
+      static_cast<ThreadPrivateNumericHashTable*>(hash_tables->back().release()));
+  for (std::size_t i = 0; i < hash_tables->size() - 1; ++i) {
+    std::unique_ptr<AggregationStateHashTableBase> hash_table(
+        hash_tables->at(i).release());
+    final_hash_table->merge(
+        static_cast<const ThreadPrivateNumericHashTable*>(hash_table.get()));
+    hash_table->destroyPayload();
+  }
+
+//  final_hash_table->print();
+
+  ColumnVectorsValueAccessor complete_result;
+  final_hash_table->finalize(&complete_result);
+  final_hash_table->destroyPayload();
+
+  // Bulk-insert the complete result.
+  output_destination->bulkInsertTuples(&complete_result);
+}
+
 std::size_t AggregationOperationState::getMemoryConsumptionBytes() const {
   std::size_t memory = getMemoryConsumptionBytesHelper(distinctify_hashtables_);
   memory += getMemoryConsumptionBytesHelper(group_by_hashtables_);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/storage/AggregationOperationState.hpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.hpp b/storage/AggregationOperationState.hpp
index e6af494..e666a68 100644
--- a/storage/AggregationOperationState.hpp
+++ b/storage/AggregationOperationState.hpp
@@ -256,7 +256,11 @@ class AggregationOperationState {
   void finalizeHashTableImplPartitioned(const std::size_t partition_id,
                                         InsertDestination *output_destination);
 
-  void finalizeHashTableImplThreadPrivate(InsertDestination *output_destination);
+  void finalizeHashTableImplThreadPrivatePackedPayload(
+      InsertDestination *output_destination);
+
+  void finalizeHashTableImplThreadPrivateNumeric(
+      InsertDestination *output_destination);
 
   std::size_t getMemoryConsumptionBytesHelper(
       const std::vector<std::unique_ptr<AggregationStateHashTableBase>>

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/storage/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt
index 0a1d484..b971240 100644
--- a/storage/CMakeLists.txt
+++ b/storage/CMakeLists.txt
@@ -250,6 +250,9 @@ add_library(quickstep_storage_StorageManager StorageManager.cpp StorageManager.h
 add_library(quickstep_storage_SubBlockTypeRegistry SubBlockTypeRegistry.cpp SubBlockTypeRegistry.hpp)
 add_library(quickstep_storage_SubBlockTypeRegistryMacros ../empty_src.cpp SubBlockTypeRegistryMacros.hpp)
 add_library(quickstep_storage_SubBlocksReference ../empty_src.cpp SubBlocksReference.hpp)
+add_library(quickstep_storage_ThreadPrivateNumericHashTable
+            ThreadPrivateNumericHashTable.cpp
+            ThreadPrivateNumericHashTable.hpp)
 add_library(quickstep_storage_TupleIdSequence ../empty_src.cpp TupleIdSequence.hpp)
 add_library(quickstep_storage_TupleReference ../empty_src.cpp TupleReference.hpp)
 add_library(quickstep_storage_TupleStorageSubBlock TupleStorageSubBlock.cpp TupleStorageSubBlock.hpp)
@@ -289,6 +292,7 @@ target_link_libraries(quickstep_storage_AggregationOperationState
                       quickstep_storage_StorageBlockInfo
                       quickstep_storage_StorageManager
                       quickstep_storage_SubBlocksReference
+                      quickstep_storage_ThreadPrivateNumericHashTable
                       quickstep_storage_TupleIdSequence
                       quickstep_storage_TupleStorageSubBlock
                       quickstep_storage_ValueAccessor
@@ -724,6 +728,7 @@ target_link_libraries(quickstep_storage_HashTableFactory
                       quickstep_storage_PackedPayloadHashTable
                       quickstep_storage_SeparateChainingHashTable
                       quickstep_storage_SimpleScalarSeparateChainingHashTable
+                      quickstep_storage_ThreadPrivateNumericHashTable
                       quickstep_storage_TupleReference
                       quickstep_types_Type
                       quickstep_types_TypeFactory
@@ -1039,6 +1044,20 @@ target_link_libraries(quickstep_storage_SubBlockTypeRegistry
 target_link_libraries(quickstep_storage_SubBlocksReference
                       glog
                       quickstep_utility_PtrVector)
+target_link_libraries(quickstep_storage_ThreadPrivateNumericHashTable
+                      glog
+                      quickstep_catalog_CatalogTypedefs
+                      quickstep_expressions_aggregation_AggregationHandle
+                      quickstep_expressions_aggregation_AggregationID
+                      quickstep_storage_HashTableBase
+                      quickstep_storage_ValueAccessorMultiplexer
+                      quickstep_storage_ValueAccessorUtil
+                      quickstep_types_Type
+                      quickstep_types_TypeID
+                      quickstep_types_containers_ColumnVector
+                      quickstep_types_containers_ColumnVectorsValueAccessor
+                      quickstep_utility_Macros
+                      quickstep_utility_ScopedBuffer)
 target_link_libraries(quickstep_storage_TupleIdSequence
                       quickstep_storage_StorageBlockInfo
                       quickstep_utility_BitVector
@@ -1164,6 +1183,7 @@ target_link_libraries(quickstep_storage
                       quickstep_storage_SubBlockTypeRegistry
                       quickstep_storage_SubBlockTypeRegistryMacros
                       quickstep_storage_SubBlocksReference
+                      quickstep_storage_ThreadPrivateNumericHashTable
                       quickstep_storage_TupleIdSequence
                       quickstep_storage_TupleReference
                       quickstep_storage_TupleStorageSubBlock

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/storage/CollisionFreeVectorTable.hpp
----------------------------------------------------------------------
diff --git a/storage/CollisionFreeVectorTable.hpp b/storage/CollisionFreeVectorTable.hpp
index 490a5cc..221a221 100644
--- a/storage/CollisionFreeVectorTable.hpp
+++ b/storage/CollisionFreeVectorTable.hpp
@@ -70,6 +70,10 @@ class CollisionFreeVectorTable : public AggregationStateHashTableBase {
 
   ~CollisionFreeVectorTable() override;
 
+  HashTableImplType getImplType() const override {
+    return HashTableImplType::kCollisionFreeVector;
+  }
+
   void destroyPayload() override;
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/storage/HashTable.proto
----------------------------------------------------------------------
diff --git a/storage/HashTable.proto b/storage/HashTable.proto
index 6839ebc..80e363c 100644
--- a/storage/HashTable.proto
+++ b/storage/HashTable.proto
@@ -26,6 +26,7 @@ enum HashTableImplType {
   LINEAR_OPEN_ADDRESSING = 1;
   SEPARATE_CHAINING = 2;
   SIMPLE_SCALAR_SEPARATE_CHAINING = 3;
+  THREAD_PRIVATE_NUMERIC = 4;
 }
 
 // NOTE(chasseur): This proto describes the run-time parameters for a resizable

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/storage/HashTableBase.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTableBase.hpp b/storage/HashTableBase.hpp
index 8be388a..c3cbddf 100644
--- a/storage/HashTableBase.hpp
+++ b/storage/HashTableBase.hpp
@@ -44,7 +44,8 @@ enum class HashTableImplType {
   kCollisionFreeVector,
   kLinearOpenAddressing,
   kSeparateChaining,
-  kSimpleScalarSeparateChaining
+  kSimpleScalarSeparateChaining,
+  kThreadPrivateNumeric
 };
 
 /**
@@ -117,6 +118,8 @@ class AggregationStateHashTableBase {
 
   virtual std::size_t getMemoryConsumptionBytes() const = 0;
 
+  virtual HashTableImplType getImplType() const = 0;
+
  protected:
   AggregationStateHashTableBase() {}
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/storage/HashTableFactory.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTableFactory.hpp b/storage/HashTableFactory.hpp
index 9686429..52f4d5f 100644
--- a/storage/HashTableFactory.hpp
+++ b/storage/HashTableFactory.hpp
@@ -32,6 +32,7 @@
 #include "storage/PackedPayloadHashTable.hpp"
 #include "storage/SeparateChainingHashTable.hpp"
 #include "storage/SimpleScalarSeparateChainingHashTable.hpp"
+#include "storage/ThreadPrivateNumericHashTable.hpp"
 #include "storage/TupleReference.hpp"
 #include "types/TypeFactory.hpp"
 #include "utility/BloomFilter.hpp"
@@ -123,6 +124,8 @@ inline HashTableImplType HashTableImplTypeFromProto(
       return HashTableImplType::kSeparateChaining;
     case serialization::HashTableImplType::SIMPLE_SCALAR_SEPARATE_CHAINING:
       return HashTableImplType::kSimpleScalarSeparateChaining;
+    case serialization::HashTableImplType::THREAD_PRIVATE_NUMERIC:
+      return HashTableImplType::kThreadPrivateNumeric;
     default: {
       LOG(FATAL) << "Unrecognized serialization::HashTableImplType\n";
     }
@@ -355,7 +358,6 @@ class AggregationStateHashTableFactory {
    *        hash table constructor.
    * @return A new aggregation state hash table.
    **/
-
   static AggregationStateHashTableBase* CreateResizable(
       const HashTableImplType hash_table_type,
       const std::vector<const Type*> &key_types,
@@ -363,13 +365,16 @@ class AggregationStateHashTableFactory {
       const std::vector<AggregationHandle *> &handles,
       StorageManager *storage_manager) {
     switch (hash_table_type) {
-      case HashTableImplType::kSeparateChaining:
-        return new PackedPayloadHashTable(
-            key_types, num_entries, handles, storage_manager);
       case HashTableImplType::kCollisionFreeVector:
         DCHECK_EQ(1u, key_types.size());
         return new CollisionFreeVectorTable(
             key_types.front(), num_entries, handles, storage_manager);
+      case HashTableImplType::kSeparateChaining:
+        return new PackedPayloadHashTable(
+            key_types, num_entries, handles, storage_manager);
+      case HashTableImplType::kThreadPrivateNumeric:
+        return new ThreadPrivateNumericHashTable(
+            key_types, num_entries, handles, storage_manager);
       default: {
         LOG(FATAL) << "Unrecognized HashTableImplType in "
                    << "AggregationStateHashTableFactory::createResizable()";

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/storage/HashTablePool.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTablePool.hpp b/storage/HashTablePool.hpp
index 6dbd7f9..7257906 100644
--- a/storage/HashTablePool.hpp
+++ b/storage/HashTablePool.hpp
@@ -75,6 +75,10 @@ class HashTablePool {
         handles_(handles),
         storage_manager_(DCHECK_NOTNULL(storage_manager)) {}
 
+  HashTableImplType getHashTableImplType() const {
+    return hash_table_impl_type_;
+  }
+
   /**
    * @brief Check out a hash table for insertion.
    *

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/storage/PackedPayloadHashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/PackedPayloadHashTable.hpp b/storage/PackedPayloadHashTable.hpp
index 960d5a7..3e89aab 100644
--- a/storage/PackedPayloadHashTable.hpp
+++ b/storage/PackedPayloadHashTable.hpp
@@ -88,6 +88,10 @@ class PackedPayloadHashTable : public AggregationStateHashTableBase {
 
   ~PackedPayloadHashTable() override;
 
+  HashTableImplType getImplType() const override {
+    return HashTableImplType::kSeparateChaining;
+  }
+
   /**
    * @brief Erase all entries in this hash table.
    *

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/storage/ThreadPrivateNumericHashTable.cpp
----------------------------------------------------------------------
diff --git a/storage/ThreadPrivateNumericHashTable.cpp b/storage/ThreadPrivateNumericHashTable.cpp
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b0acc9ce/storage/ThreadPrivateNumericHashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/ThreadPrivateNumericHashTable.hpp b/storage/ThreadPrivateNumericHashTable.hpp
new file mode 100644
index 0000000..2991900
--- /dev/null
+++ b/storage/ThreadPrivateNumericHashTable.hpp
@@ -0,0 +1,483 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_STORAGE_THREAD_PRIVATE_NUMERIC_HASH_TABLE_HPP_
+#define QUICKSTEP_STORAGE_THREAD_PRIVATE_NUMERIC_HASH_TABLE_HPP_
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <unordered_map>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "expressions/aggregation/AggregationHandle.hpp"
+#include "expressions/aggregation/AggregationID.hpp"
+#include "storage/HashTableBase.hpp"
+#include "storage/ValueAccessorMultiplexer.hpp"
+#include "storage/ValueAccessorUtil.hpp"
+#include "types/Type.hpp"
+#include "types/TypeID.hpp"
+#include "types/containers/ColumnVector.hpp"
+#include "types/containers/ColumnVectorsValueAccessor.hpp"
+#include "utility/Macros.hpp"
+#include "utility/ScopedBuffer.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+class ThreadPrivateNumericHashTable : public AggregationStateHashTableBase {
+ public:
+  ThreadPrivateNumericHashTable(
+      const std::vector<const Type *> &key_types,
+      const std::size_t num_entries,
+      const std::vector<AggregationHandle *> &handles,
+      StorageManager *storage_manager)
+      : key_types_(key_types),
+        handles_(handles),
+        bucket_size_(0),
+        num_buckets_(num_entries),
+        buckets_allocated_(0) {
+    for (const Type *key_type : key_types) {
+      DCHECK(!key_type->isVariableLength());
+
+      const std::size_t key_size = key_type->maximumByteLength();
+      DCHECK(key_size == 1u || key_size == 2u || key_size == 4u || key_size == 8u);
+
+      key_sizes_.emplace_back(key_size);
+    }
+
+    for (const AggregationHandle *handle : handles) {
+      state_offsets_.emplace_back(bucket_size_);
+
+      const std::vector<const Type*> arg_types = handle->getArgumentTypes();
+      DCHECK_LE(arg_types.size(), 1u);
+
+      std::size_t state_size = 0;
+      switch (handle->getAggregationID()) {
+        case AggregationID::kCount: {
+          state_size = sizeof(std::int64_t);
+          break;
+        }
+        case AggregationID::kSum: {
+          DCHECK_EQ(1u, arg_types.size());
+          switch (arg_types.front()->getTypeID()) {
+            case TypeID::kInt:  // Fall through
+            case TypeID::kLong:
+              state_size = sizeof(std::int64_t);
+              break;
+            case TypeID::kFloat:  // Fall through
+            case TypeID::kDouble:
+              state_size = sizeof(double);
+              break;
+            default:
+              LOG(FATAL) << "Not implemented";
+          }
+          break;
+        }
+        default:
+          LOG(FATAL) << "Not implemented";
+      }
+      bucket_size_ += state_size;
+    }
+
+    keys_.reset(sizeof(std::uint64_t) * num_buckets_);
+    buckets_.reset(bucket_size_ * num_buckets_);
+  }
+
+  ~ThreadPrivateNumericHashTable() override {}
+
+  HashTableImplType getImplType() const override {
+    return HashTableImplType::kThreadPrivateNumeric;
+  }
+
+  void destroyPayload() override {}
+
+  std::size_t getMemoryConsumptionBytes() const override {
+    return num_buckets_ * (bucket_size_ + sizeof(std::uint64_t));
+  }
+
+  inline std::size_t numEntries() const {
+    return buckets_allocated_;
+  }
+
+  bool upsertValueAccessorCompositeKey(
+      const std::vector<std::vector<MultiSourceAttributeId>> &argument_ids,
+      const std::vector<MultiSourceAttributeId> &key_attr_ids,
+      const ValueAccessorMultiplexer &accessor_mux) override {
+    ValueAccessor *base_accessor = accessor_mux.getBaseAccessor();
+    ValueAccessor *derived_accessor = accessor_mux.getDerivedAccessor();
+
+    DCHECK(base_accessor != nullptr);
+    const std::size_t num_tuples = base_accessor->getNumTuplesVirtual();
+
+    ScopedBuffer buffer(sizeof(std::uint64_t) * num_tuples);
+    std::uint64_t *key_codes = static_cast<std::uint64_t*>(buffer.get());
+    std::size_t key_code_offset = 0;
+    for (std::size_t i = 0; i < key_attr_ids.size(); ++i) {
+      const auto &key_attr_id = key_attr_ids[i];
+      ValueAccessor *accessor =
+          key_attr_id.source == ValueAccessorSource::kBase
+              ? base_accessor
+              : derived_accessor;
+      DCHECK(accessor != nullptr);
+
+      const std::size_t key_size = key_sizes_[i];
+      switch (key_size) {
+        case 1u:
+          ConstructKeyCode<std::uint8_t>(
+              key_code_offset, key_attr_id.attr_id, accessor, key_codes);
+          break;
+        case 2u:
+          ConstructKeyCode<std::uint16_t>(
+              key_code_offset, key_attr_id.attr_id, accessor, key_codes);
+          break;
+        case 4u:
+          ConstructKeyCode<std::uint32_t>(
+              key_code_offset, key_attr_id.attr_id, accessor, key_codes);
+          break;
+        case 8u:
+          ConstructKeyCode<std::uint64_t>(
+              key_code_offset, key_attr_id.attr_id, accessor, key_codes);
+          break;
+        default:
+          LOG(FATAL) << "Not implemented";
+      }
+
+      key_code_offset += key_size;
+    }
+
+    std::vector<BucketIndex> bucket_indices;
+    bucket_indices.reserve(num_tuples);
+    std::uint64_t *keys = static_cast<std::uint64_t*>(keys_.get());
+    for (std::size_t i = 0; i < num_tuples; ++i) {
+      const std::size_t code = key_codes[i];
+      const auto index_it = index_.find(code);
+      if (index_it == index_.end()) {
+        // TODO: Resize if overflow
+        index_.emplace(code, buckets_allocated_);
+        bucket_indices.emplace_back(buckets_allocated_);
+        keys[buckets_allocated_] = code;
+        ++buckets_allocated_;
+      } else {
+        bucket_indices.emplace_back(index_it->second);
+      }
+    }
+
+    // Dispatch
+    for (std::size_t i = 0; i < handles_.size(); ++i) {
+      const AggregationHandle *handle = handles_[i];
+      switch (handle->getAggregationID()) {
+        case AggregationID::kCount: {
+          upsertValueAccessorCount(bucket_indices, state_offsets_[i]);
+          break;
+        }
+        case AggregationID::kSum: {
+          DCHECK_EQ(1u, argument_ids[i].size());
+          const auto &argument_id = argument_ids[i].front();
+          ValueAccessor *accessor =
+              argument_id.source == ValueAccessorSource::kBase
+                  ? base_accessor
+                  : derived_accessor;
+          DCHECK(accessor != nullptr);
+
+          DCHECK_EQ(1u, handle->getArgumentTypes().size());
+          const Type *argument_type = handle->getArgumentTypes().front();
+          switch (argument_type->getTypeID()) {
+            case kInt: {
+              upsertValueAccessorSum<int, std::int64_t>(
+                  bucket_indices, state_offsets_[i], argument_id.attr_id, accessor);
+              break;
+            }
+            case kLong: {
+              upsertValueAccessorSum<std::int64_t, std::int64_t>(
+                  bucket_indices, state_offsets_[i], argument_id.attr_id, accessor);
+              break;
+            }
+            case kFloat: {
+              upsertValueAccessorSum<float, double>(
+                  bucket_indices, state_offsets_[i], argument_id.attr_id, accessor);
+              break;
+            }
+            case kDouble: {
+              upsertValueAccessorSum<double, double>(
+                  bucket_indices, state_offsets_[i], argument_id.attr_id, accessor);
+              break;
+            }
+            default:
+              LOG(FATAL) << "Not implemented";
+          }
+          break;
+        }
+        default:
+          LOG(FATAL) << "Not implemented";
+      }
+    }
+
+    return true;
+  }
+
+  void merge(const ThreadPrivateNumericHashTable *other) {
+    std::vector<BucketIndex> dst_bucket_indices;
+    std::uint64_t *dst_keys = static_cast<std::uint64_t*>(keys_.get());
+
+    const char *src_buckets_start =
+        static_cast<const char*>(other->buckets_.get());
+    const std::uint64_t *src_keys =
+        static_cast<const std::uint64_t*>(other->keys_.get());
+
+    for (std::size_t i = 0; i < other->buckets_allocated_; ++i) {
+      const std::uint64_t code = src_keys[i];
+      const auto index_it = index_.find(code);
+
+      if (index_it == index_.end()) {
+        // TODO: Resize if overflow
+        index_.emplace(code, buckets_allocated_);
+        dst_bucket_indices.emplace_back(buckets_allocated_);
+        dst_keys[buckets_allocated_] = code;
+        ++buckets_allocated_;
+      } else {
+        dst_bucket_indices.emplace_back(index_it->second);
+      }
+    }
+
+    // Dispatch
+    for (std::size_t i = 0; i < handles_.size(); ++i) {
+      const AggregationHandle *handle = handles_[i];
+      switch (handle->getAggregationID()) {
+        case AggregationID::kCount: {
+          mergeStateSum<std::int64_t>(
+              dst_bucket_indices, src_buckets_start, state_offsets_[i]);
+          break;
+        }
+        case AggregationID::kSum: {
+          const Type *argument_type = handle->getArgumentTypes().front();
+          switch (argument_type->getTypeID()) {
+            case kInt:  // Fall through
+            case kLong: {
+              mergeStateSum<std::int64_t>(
+                  dst_bucket_indices, src_buckets_start, state_offsets_[i]);
+              break;
+            }
+            case kFloat:  // Fall through
+            case kDouble: {
+              mergeStateSum<double>(
+                  dst_bucket_indices, src_buckets_start, state_offsets_[i]);
+              break;
+            }
+            default:
+              LOG(FATAL) << "Not implemented";
+          }
+          break;
+        }
+        default:
+          LOG(FATAL) << "Not implemented";
+      }
+    }
+  }
+
+  void print() const {
+    std::cout << "num_entries = " << buckets_allocated_ << "\n";
+    const double *values = static_cast<const double*>(buckets_.get());
+    for (std::size_t i = 0; i < buckets_allocated_; ++i) {
+      std::cout << values[i] << "\n";
+    }
+  }
+
+  void finalize(ColumnVectorsValueAccessor *output) const {
+    std::size_t key_offset = 0;
+    for (std::size_t i = 0; i < key_types_.size(); ++i) {
+      const Type &key_type = *key_types_[i];
+      std::unique_ptr<NativeColumnVector> native_cv(
+          new NativeColumnVector(key_type, buckets_allocated_));
+
+      const std::size_t key_size = key_sizes_[i];
+      switch (key_size) {
+        case 1u:
+          finalizeKey<std::uint8_t>(key_offset, native_cv.get());
+          break;
+        case 2u:
+          finalizeKey<std::uint16_t>(key_offset, native_cv.get());
+          break;
+        case 4u:
+          finalizeKey<std::uint32_t>(key_offset, native_cv.get());
+          break;
+        case 8u:
+          finalizeKey<std::uint64_t>(key_offset, native_cv.get());
+          break;
+        default:
+          LOG(FATAL) << "Not implemented";
+      }
+      output->addColumn(native_cv.release());
+
+      key_offset += key_size;
+    }
+
+    // Dispatch
+    for (std::size_t i = 0; i < handles_.size(); ++i) {
+      const AggregationHandle *handle = handles_[i];
+      const Type &result_type = *handle->getResultType();
+      std::unique_ptr<NativeColumnVector> native_cv(
+          new NativeColumnVector(result_type, buckets_allocated_));
+
+      switch (handle->getAggregationID()) {
+        case AggregationID::kCount: {
+          finalizeStateSum<std::int64_t, std::int64_t>(
+              state_offsets_[i], native_cv.get());
+          break;
+        }
+        case AggregationID::kSum: {
+          const Type *argument_type = handle->getArgumentTypes().front();
+          switch (argument_type->getTypeID()) {
+            case kInt:  // Fall through
+            case kLong: {
+              finalizeStateSum<std::int64_t, std::int64_t>(
+                  state_offsets_[i], native_cv.get());
+              break;
+            }
+            case kFloat:  // Fall through
+            case kDouble: {
+              finalizeStateSum<double, double>(
+                  state_offsets_[i], native_cv.get());
+              break;
+            }
+            default:
+              LOG(FATAL) << "Not implemented";
+          }
+          break;
+        }
+        default:
+          LOG(FATAL) << "Not implemented";
+      }
+      output->addColumn(native_cv.release());
+    }
+  }
+
+ private:
+  using BucketIndex = std::uint32_t;
+
+  template <typename KeyT>
+  inline static void ConstructKeyCode(const std::size_t offset,
+                                      const attribute_id attr_id,
+                                      ValueAccessor *accessor,
+                                      void *key_code_start) {
+    InvokeOnAnyValueAccessor(
+        accessor,
+        [&](auto *accessor) -> void {  // NOLINT(build/c++11)
+      char *key_code_ptr = static_cast<char*>(key_code_start) + offset;
+      accessor->beginIteration();
+      while (accessor->next()) {
+        *reinterpret_cast<KeyT*>(key_code_ptr) =
+            *static_cast<const KeyT*>(
+                accessor->template getUntypedValue<false>(attr_id));
+        key_code_ptr += sizeof(std::uint64_t);
+      }
+    });
+  }
+
+  inline void upsertValueAccessorCount(const std::vector<BucketIndex> &bucket_indices,
+                                       const std::size_t state_offset) {
+    char *state_start = static_cast<char*>(buckets_.get()) + state_offset;
+    for (const BucketIndex idx : bucket_indices) {
+      char *state_ptr = state_start + bucket_size_ * idx;
+      *reinterpret_cast<std::int64_t*>(state_ptr) += 1;
+    }
+  }
+
+  template <typename ArgumentT, typename StateT>
+  inline void upsertValueAccessorSum(const std::vector<BucketIndex> &bucket_indices,
+                                     const std::size_t state_offset,
+                                     const attribute_id attr_id,
+                                     ValueAccessor *accessor) {
+    InvokeOnAnyValueAccessor(
+        accessor,
+        [&](auto *accessor) -> void {  // NOLINT(build/c++11)
+      accessor->beginIteration();
+
+      char *state_start = static_cast<char*>(buckets_.get()) + state_offset;
+      std::size_t loc = 0;
+      while (accessor->next()) {
+        char *state_ptr = state_start + bucket_size_ * bucket_indices[loc];
+        *reinterpret_cast<StateT*>(state_ptr) +=
+            *static_cast<const ArgumentT*>(
+                accessor->template getUntypedValue<false>(attr_id));
+        ++loc;
+      }
+    });
+  }
+
+  template <typename StateT>
+  inline void mergeStateSum(const std::vector<BucketIndex> &dst_bucket_indices,
+                            const void *src_buckets_start,
+                            const std::size_t state_offset) {
+    char *dst_state_start = static_cast<char*>(buckets_.get()) + state_offset;
+    const char* src_state_start =
+        static_cast<const char*>(src_buckets_start) + state_offset;
+    for (std::size_t i = 0; i < dst_bucket_indices.size(); ++i) {
+      char *dst_state_ptr = dst_state_start + bucket_size_ * dst_bucket_indices[i];
+      const char *src_state_ptr = src_state_start + bucket_size_ * i;
+      *reinterpret_cast<StateT*>(dst_state_ptr) +=
+          *reinterpret_cast<const StateT*>(src_state_ptr);
+    }
+  }
+
+  template <typename KeyT>
+  inline void finalizeKey(const std::size_t offset,
+                          NativeColumnVector *output_cv) const {
+    const char *key_ptr = static_cast<const char*>(keys_.get()) + offset;
+    for (std::size_t i = 0; i < buckets_allocated_; ++i) {
+      *static_cast<KeyT*>(output_cv->getPtrForDirectWrite()) =
+          *reinterpret_cast<const KeyT*>(key_ptr);
+      key_ptr += sizeof(std::uint64_t);
+    }
+  }
+
+  template <typename StateT, typename ResultT>
+  inline void finalizeStateSum(const std::size_t state_offset,
+                               NativeColumnVector *output_cv) const {
+    char *state_ptr = static_cast<char*>(buckets_.get()) + state_offset;
+    for (std::size_t i = 0; i < buckets_allocated_; ++i) {
+      *static_cast<ResultT*>(output_cv->getPtrForDirectWrite()) =
+          *reinterpret_cast<const StateT*>(state_ptr);
+      state_ptr += bucket_size_;
+    }
+  }
+
+  const std::vector<const Type*> key_types_;
+  const std::vector<AggregationHandle *> handles_;
+
+  std::vector<std::size_t> key_sizes_;
+  std::vector<std::size_t> state_offsets_;
+  std::size_t bucket_size_;
+
+  std::unordered_map<std::uint64_t, BucketIndex> index_;
+
+  std::size_t num_buckets_;
+  std::size_t buckets_allocated_;
+
+  ScopedBuffer keys_;
+  ScopedBuffer buckets_;
+
+  DISALLOW_COPY_AND_ASSIGN(ThreadPrivateNumericHashTable);
+};
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_STORAGE_THREAD_PRIVATE_NUMERIC_HASH_TABLE_HPP_



[4/5] incubator-quickstep git commit: Initial commit

Posted by ji...@apache.org.
Initial commit


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/cd01af24
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/cd01af24
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/cd01af24

Branch: refs/heads/common-subexpression
Commit: cd01af24a6ce1db761652a9ec7051602e70edfac
Parents: 563abe0
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Wed Apr 5 14:10:01 2017 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Wed Apr 12 14:32:49 2017 -0500

----------------------------------------------------------------------
 expressions/CMakeLists.txt                      |   6 +
 expressions/Expression.hpp                      |  47 ++++
 expressions/ExpressionFactories.cpp             |  11 +
 expressions/Expressions.proto                   |   8 +
 expressions/predicate/CMakeLists.txt            |   4 +-
 expressions/predicate/ComparisonPredicate.cpp   |  85 ++++--
 expressions/predicate/ComparisonPredicate.hpp   |  11 +
 expressions/predicate/NegationPredicate.cpp     |  18 ++
 expressions/predicate/NegationPredicate.hpp     |  11 +
 expressions/predicate/Predicate.cpp             |  13 +
 expressions/predicate/Predicate.hpp             |  15 +-
 expressions/predicate/PredicateWithList.cpp     |  54 ++++
 expressions/predicate/PredicateWithList.hpp     |  11 +
 expressions/scalar/CMakeLists.txt               |  23 ++
 expressions/scalar/Scalar.cpp                   |  12 +
 expressions/scalar/Scalar.hpp                   |  33 ++-
 expressions/scalar/ScalarAttribute.cpp          |  42 ++-
 expressions/scalar/ScalarAttribute.hpp          |  22 +-
 expressions/scalar/ScalarBinaryExpression.cpp   | 249 ++++++++++-------
 expressions/scalar/ScalarBinaryExpression.hpp   |  23 +-
 expressions/scalar/ScalarCache.hpp              |  64 +++++
 expressions/scalar/ScalarCaseExpression.cpp     | 123 ++++++---
 expressions/scalar/ScalarCaseExpression.hpp     |  32 ++-
 expressions/scalar/ScalarLiteral.cpp            |  47 +++-
 expressions/scalar/ScalarLiteral.hpp            |  23 +-
 expressions/scalar/ScalarSharedExpression.cpp   | 141 ++++++++++
 expressions/scalar/ScalarSharedExpression.hpp   | 119 +++++++++
 expressions/scalar/ScalarUnaryExpression.cpp    |  82 ++++--
 expressions/scalar/ScalarUnaryExpression.hpp    |  23 +-
 query_optimizer/CMakeLists.txt                  |   1 +
 query_optimizer/PhysicalGenerator.cpp           |   6 +-
 .../expressions/AttributeReference.cpp          |  18 ++
 .../expressions/AttributeReference.hpp          |   4 +
 .../expressions/BinaryExpression.cpp            |  17 ++
 .../expressions/BinaryExpression.hpp            |   4 +
 query_optimizer/expressions/CMakeLists.txt      |  31 ++-
 query_optimizer/expressions/Cast.cpp            |  16 ++
 query_optimizer/expressions/Cast.hpp            |   4 +
 .../expressions/CommonSubexpression.cpp         |  70 +++++
 .../expressions/CommonSubexpression.hpp         | 133 ++++++++++
 query_optimizer/expressions/ExpressionType.hpp  |   3 +-
 query_optimizer/expressions/ExpressionUtil.hpp  |   6 +-
 query_optimizer/expressions/NamedExpression.cpp |   2 +
 query_optimizer/expressions/NamedExpression.hpp |  13 -
 query_optimizer/expressions/PatternMatcher.hpp  |   9 +-
 query_optimizer/expressions/Scalar.hpp          |  19 ++
 query_optimizer/expressions/ScalarLiteral.cpp   |  15 ++
 query_optimizer/expressions/ScalarLiteral.hpp   |   4 +
 query_optimizer/expressions/SimpleCase.cpp      |  46 ++++
 query_optimizer/expressions/SimpleCase.hpp      |   4 +
 query_optimizer/expressions/UnaryExpression.cpp |  17 ++
 query_optimizer/expressions/UnaryExpression.hpp |   6 +
 query_optimizer/rules/CMakeLists.txt            |  21 ++
 .../rules/CommonSubexpressionExtraction.cpp     | 264 +++++++++++++++++++
 .../rules/CommonSubexpressionExtraction.hpp     | 135 ++++++++++
 relational_operators/CMakeLists.txt             |   2 +
 relational_operators/HashJoinOperator.cpp       |  51 +++-
 .../NestedLoopsJoinOperator.cpp                 |   6 +-
 storage/AggregationOperationState.cpp           |   4 +-
 storage/CMakeLists.txt                          |   3 +
 storage/StorageBlock.cpp                        |  10 +-
 storage/WindowAggregationOperationState.cpp     |   8 +-
 types/containers/ColumnVector.hpp               |   3 +
 types/containers/ColumnVectorsValueAccessor.hpp |  24 +-
 utility/CMakeLists.txt                          |   2 +
 utility/HashError.hpp                           |  55 ++++
 66 files changed, 2095 insertions(+), 293 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/expressions/CMakeLists.txt b/expressions/CMakeLists.txt
index b1f1fb1..33606cd 100644
--- a/expressions/CMakeLists.txt
+++ b/expressions/CMakeLists.txt
@@ -25,12 +25,16 @@ QS_PROTOBUF_GENERATE_CPP(expressions_Expressions_proto_srcs
                          expressions_Expressions_proto_hdrs
                          Expressions.proto)
 
+add_library(quickstep_expressions_Expression ../empty_src.cpp Expression.hpp)
 add_library(quickstep_expressions_ExpressionFactories
             ExpressionFactories.cpp
             ExpressionFactories.hpp)
 add_library(quickstep_expressions_Expressions_proto
             ${expressions_Expressions_proto_srcs})
 
+target_link_libraries(quickstep_expressions_Expression
+                      quickstep_utility_Macros
+                      quickstep_utility_TreeStringSerializable)
 target_link_libraries(quickstep_expressions_ExpressionFactories
                       glog
                       quickstep_catalog_CatalogDatabaseLite
@@ -48,6 +52,7 @@ target_link_libraries(quickstep_expressions_ExpressionFactories
                       quickstep_expressions_scalar_ScalarBinaryExpression
                       quickstep_expressions_scalar_ScalarCaseExpression
                       quickstep_expressions_scalar_ScalarLiteral
+                      quickstep_expressions_scalar_ScalarSharedExpression
                       quickstep_expressions_scalar_ScalarUnaryExpression
                       quickstep_types_TypeFactory
                       quickstep_types_TypedValue
@@ -64,6 +69,7 @@ target_link_libraries(quickstep_expressions_Expressions_proto
 # Module all-in-one library:
 add_library(quickstep_expressions ../empty_src.cpp ExpressionsModule.hpp)
 target_link_libraries(quickstep_expressions
+                      quickstep_expressions_Expression
                       quickstep_expressions_ExpressionFactories
                       quickstep_expressions_Expressions_proto
                       quickstep_expressions_aggregation

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/Expression.hpp
----------------------------------------------------------------------
diff --git a/expressions/Expression.hpp b/expressions/Expression.hpp
new file mode 100644
index 0000000..8deaadc
--- /dev/null
+++ b/expressions/Expression.hpp
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_EXPRESSIONS_SCALAR_EXPRESSION_HPP_
+#define QUICKSTEP_EXPRESSIONS_SCALAR_EXPRESSION_HPP_
+
+#include "utility/Macros.hpp"
+#include "utility/TreeStringSerializable.hpp"
+
+namespace quickstep {
+
+/** \addtogroup Expressions
+ *  @{
+ */
+
+class Expression : public TreeStringSerializable<const Expression*> {
+ public:
+  virtual ~Expression() {}
+
+ protected:
+  Expression() {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Expression);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_EXPRESSIONS_SCALAR_EXPRESSION_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/ExpressionFactories.cpp
----------------------------------------------------------------------
diff --git a/expressions/ExpressionFactories.cpp b/expressions/ExpressionFactories.cpp
index 01d22a0..b6d5f5f 100644
--- a/expressions/ExpressionFactories.cpp
+++ b/expressions/ExpressionFactories.cpp
@@ -39,6 +39,7 @@
 #include "expressions/scalar/ScalarBinaryExpression.hpp"
 #include "expressions/scalar/ScalarCaseExpression.hpp"
 #include "expressions/scalar/ScalarLiteral.hpp"
+#include "expressions/scalar/ScalarSharedExpression.hpp"
 #include "expressions/scalar/ScalarUnaryExpression.hpp"
 #include "types/TypeFactory.hpp"
 #include "types/TypedValue.hpp"
@@ -179,6 +180,11 @@ Scalar* ScalarFactory::ReconstructFromProto(const serialization::Scalar &proto,
           ReconstructFromProto(proto.GetExtension(serialization::ScalarBinaryExpression::left_operand), database),
           ReconstructFromProto(proto.GetExtension(serialization::ScalarBinaryExpression::right_operand), database));
     }
+    case serialization::Scalar::SHARED_EXPRESSION: {
+      return new ScalarSharedExpression(
+          proto.GetExtension(serialization::ScalarSharedExpression::share_id),
+          ReconstructFromProto(proto.GetExtension(serialization::ScalarSharedExpression::operand), database));
+    }
     case serialization::Scalar::CASE_EXPRESSION: {
       const Type &result_type = TypeFactory::ReconstructFromProto(
           proto.GetExtension(serialization::ScalarCaseExpression::result_type));
@@ -260,6 +266,11 @@ bool ScalarFactory::ProtoIsValid(const serialization::Scalar &proto,
       }
       break;
     }
+    case serialization::Scalar::SHARED_EXPRESSION: {
+      return proto.HasExtension(serialization::ScalarSharedExpression::share_id)
+             && proto.HasExtension(serialization::ScalarSharedExpression::operand);
+      break;
+    }
     case serialization::Scalar::CASE_EXPRESSION: {
       // Check result type.
       if (!(proto.HasExtension(serialization::ScalarCaseExpression::result_type)

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/Expressions.proto
----------------------------------------------------------------------
diff --git a/expressions/Expressions.proto b/expressions/Expressions.proto
index 8d923c5..8b4611e 100644
--- a/expressions/Expressions.proto
+++ b/expressions/Expressions.proto
@@ -50,6 +50,7 @@ message Scalar {
     UNARY_EXPRESSION = 2;
     BINARY_EXPRESSION = 3;
     CASE_EXPRESSION = 4;
+    SHARED_EXPRESSION = 5;
   }
 
   required ScalarDataSource data_source = 1;
@@ -123,3 +124,10 @@ message ScalarCaseExpression {
     optional Scalar else_result_expression = 163;
   }
 }
+
+message ScalarSharedExpression {
+  extend Scalar {
+    optional int32 share_id = 196;
+    optional Scalar operand = 197;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/predicate/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/expressions/predicate/CMakeLists.txt b/expressions/predicate/CMakeLists.txt
index b90562c..04abfc7 100644
--- a/expressions/predicate/CMakeLists.txt
+++ b/expressions/predicate/CMakeLists.txt
@@ -35,7 +35,7 @@ add_library(quickstep_expressions_predicate_PredicateCost
             ../../empty_src.cpp
             PredicateCost.hpp)
 add_library(quickstep_expressions_predicate_PredicateWithList
-            ../../empty_src.cpp
+            PredicateWithList.cpp
             PredicateWithList.hpp)
 add_library(quickstep_expressions_predicate_TrivialPredicates
             ../../empty_src.cpp
@@ -61,6 +61,7 @@ target_link_libraries(quickstep_expressions_predicate_ComparisonPredicate
                       quickstep_types_containers_ColumnVector
                       quickstep_types_operations_Operation_proto
                       quickstep_types_operations_comparisons_Comparison
+                      quickstep_types_operations_comparisons_ComparisonID
                       quickstep_utility_Macros
                       quickstep_utility_PtrVector)
 target_link_libraries(quickstep_expressions_predicate_ConjunctionPredicate
@@ -92,6 +93,7 @@ target_link_libraries(quickstep_expressions_predicate_NegationPredicate
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_expressions_predicate_Predicate
                       quickstep_catalog_CatalogTypedefs
+                      quickstep_expressions_Expression
                       quickstep_expressions_Expressions_proto
                       quickstep_storage_StorageBlockInfo
                       quickstep_storage_TupleIdSequence

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/predicate/ComparisonPredicate.cpp
----------------------------------------------------------------------
diff --git a/expressions/predicate/ComparisonPredicate.cpp b/expressions/predicate/ComparisonPredicate.cpp
index 5f8612e..84bc51a 100644
--- a/expressions/predicate/ComparisonPredicate.cpp
+++ b/expressions/predicate/ComparisonPredicate.cpp
@@ -41,6 +41,7 @@
 #include "types/containers/ColumnVector.hpp"
 #include "types/operations/Operation.pb.h"
 #include "types/operations/comparisons/Comparison.hpp"
+#include "types/operations/comparisons/ComparisonID.hpp"
 #include "utility/Macros.hpp"
 #include "utility/PtrVector.hpp"
 
@@ -190,18 +191,20 @@ TupleIdSequence* ComparisonPredicate::getAllMatches(
 #endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
 
     if (short_circuit_adapter) {
-      std::unique_ptr<ColumnVector> right_values(right_operand_->getAllValues(
+      ColumnVectorPtr right_values(right_operand_->getAllValues(
           short_circuit_adapter.get(),
-          sub_blocks_ref));
+          sub_blocks_ref,
+          nullptr /* scalar_cache */));
       return fast_comparator_->compareStaticValueAndColumnVector(
           left_operand_->getStaticValue(),
           *right_values,
           nullptr,
           filter);
     } else {
-      std::unique_ptr<ColumnVector> right_values(right_operand_->getAllValues(
+      ColumnVectorPtr right_values(right_operand_->getAllValues(
           accessor,
-          sub_blocks_ref));
+          sub_blocks_ref,
+          nullptr /* scalar_cache */));
       return fast_comparator_->compareStaticValueAndColumnVector(
           left_operand_->getStaticValue(),
           *right_values,
@@ -222,18 +225,20 @@ TupleIdSequence* ComparisonPredicate::getAllMatches(
 #endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
 
     if (short_circuit_adapter) {
-      std::unique_ptr<ColumnVector> left_values(left_operand_->getAllValues(
+      ColumnVectorPtr left_values(left_operand_->getAllValues(
           short_circuit_adapter.get(),
-          sub_blocks_ref));
+          sub_blocks_ref,
+          nullptr /* scalar_cache */));
       return fast_comparator_->compareColumnVectorAndStaticValue(
           *left_values,
           right_operand_->getStaticValue(),
           nullptr,
           filter);
     } else {
-      std::unique_ptr<ColumnVector> left_values(left_operand_->getAllValues(
+      ColumnVectorPtr left_values(left_operand_->getAllValues(
           accessor,
-          sub_blocks_ref));
+          sub_blocks_ref,
+          nullptr /* scalar_cache */));
       return fast_comparator_->compareColumnVectorAndStaticValue(
           *left_values,
           right_operand_->getStaticValue(),
@@ -255,9 +260,10 @@ TupleIdSequence* ComparisonPredicate::getAllMatches(
                                                             filter);
       } else {
         if (short_circuit_adapter) {
-          std::unique_ptr<ColumnVector> right_values(right_operand_->getAllValues(
+          ColumnVectorPtr right_values(right_operand_->getAllValues(
               short_circuit_adapter.get(),
-              sub_blocks_ref));
+              sub_blocks_ref,
+              nullptr /* scalar_cache */));
           return fast_comparator_->compareValueAccessorAndColumnVector(
               short_circuit_adapter.get(),
               left_operand_attr_id,
@@ -265,9 +271,10 @@ TupleIdSequence* ComparisonPredicate::getAllMatches(
               nullptr,
               filter);
         } else {
-          std::unique_ptr<ColumnVector> right_values(right_operand_->getAllValues(
+          ColumnVectorPtr right_values(right_operand_->getAllValues(
               accessor,
-              sub_blocks_ref));
+              sub_blocks_ref,
+              nullptr /* scalar_cache */));
           return fast_comparator_->compareValueAccessorAndColumnVector(accessor,
                                                                        left_operand_attr_id,
                                                                        *right_values,
@@ -277,9 +284,10 @@ TupleIdSequence* ComparisonPredicate::getAllMatches(
       }
     } else if (right_operand_attr_id != -1) {
       if (short_circuit_adapter) {
-        std::unique_ptr<ColumnVector> left_values(left_operand_->getAllValues(
+        ColumnVectorPtr left_values(left_operand_->getAllValues(
             short_circuit_adapter.get(),
-            sub_blocks_ref));
+            sub_blocks_ref,
+            nullptr /* scalar_cache */));
         return fast_comparator_->compareColumnVectorAndValueAccessor(
             *left_values,
             short_circuit_adapter.get(),
@@ -287,9 +295,10 @@ TupleIdSequence* ComparisonPredicate::getAllMatches(
             nullptr,
             filter);
       } else {
-        std::unique_ptr<ColumnVector> left_values(left_operand_->getAllValues(
+        ColumnVectorPtr left_values(left_operand_->getAllValues(
             accessor,
-            sub_blocks_ref));
+            sub_blocks_ref,
+            nullptr /* scalar_cache */));
         return fast_comparator_->compareColumnVectorAndValueAccessor(*left_values,
                                                                      accessor,
                                                                      right_operand_attr_id,
@@ -300,23 +309,27 @@ TupleIdSequence* ComparisonPredicate::getAllMatches(
 #endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
 
     if (short_circuit_adapter) {
-      std::unique_ptr<ColumnVector> left_values(left_operand_->getAllValues(
+      ColumnVectorPtr left_values(left_operand_->getAllValues(
           short_circuit_adapter.get(),
-          sub_blocks_ref));
-      std::unique_ptr<ColumnVector> right_values(right_operand_->getAllValues(
+          sub_blocks_ref,
+          nullptr /* scalar_cache */));
+      ColumnVectorPtr right_values(right_operand_->getAllValues(
           short_circuit_adapter.get(),
-          sub_blocks_ref));
+          sub_blocks_ref,
+          nullptr /* scalar_cache */));
       return fast_comparator_->compareColumnVectors(*left_values,
                                                     *right_values,
                                                     nullptr,
                                                     filter);
     } else {
-      std::unique_ptr<ColumnVector> left_values(left_operand_->getAllValues(
+      ColumnVectorPtr left_values(left_operand_->getAllValues(
           accessor,
-          sub_blocks_ref));
-      std::unique_ptr<ColumnVector> right_values(right_operand_->getAllValues(
+          sub_blocks_ref,
+          nullptr /* scalar_cache */));
+      ColumnVectorPtr right_values(right_operand_->getAllValues(
           accessor,
-          sub_blocks_ref));
+          sub_blocks_ref,
+          nullptr /* scalar_cache */));
       return fast_comparator_->compareColumnVectors(*left_values,
                                                     *right_values,
                                                     filter,
@@ -373,4 +386,28 @@ void ComparisonPredicate::initHelper(bool own_children) {
   }
 }
 
+void ComparisonPredicate::getFieldStringItems(
+    std::vector<std::string> *inline_field_names,
+    std::vector<std::string> *inline_field_values,
+    std::vector<std::string> *non_container_child_field_names,
+    std::vector<const Expression*> *non_container_child_fields,
+    std::vector<std::string> *container_child_field_names,
+    std::vector<std::vector<const Expression*>> *container_child_fields) const {
+  Predicate::getFieldStringItems(inline_field_names,
+                                 inline_field_values,
+                                 non_container_child_field_names,
+                                 non_container_child_fields,
+                                 container_child_field_names,
+                                 container_child_fields);
+
+  inline_field_names->emplace_back("comparison");
+  inline_field_values->emplace_back(
+      kComparisonNames[static_cast<int>(comparison_.getComparisonID())]);
+
+  non_container_child_field_names->emplace_back("left_operand");
+  non_container_child_fields->emplace_back(left_operand_.get());
+  non_container_child_field_names->emplace_back("right_operand");
+  non_container_child_fields->emplace_back(right_operand_.get());
+}
+
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/predicate/ComparisonPredicate.hpp
----------------------------------------------------------------------
diff --git a/expressions/predicate/ComparisonPredicate.hpp b/expressions/predicate/ComparisonPredicate.hpp
index 9030857..212af52 100644
--- a/expressions/predicate/ComparisonPredicate.hpp
+++ b/expressions/predicate/ComparisonPredicate.hpp
@@ -21,7 +21,9 @@
 #define QUICKSTEP_EXPRESSIONS_PREDICATE_COMPARISON_PREDICATE_HPP_
 
 #include <memory>
+#include <string>
 #include <utility>
+#include <vector>
 
 #include "catalog/CatalogTypedefs.hpp"
 #include "expressions/Expressions.pb.h"
@@ -137,6 +139,15 @@ class ComparisonPredicate : public Predicate {
    **/
   std::pair<bool, attribute_id> getAttributeFromAttributeLiteralComparison() const;
 
+ protected:
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<const Expression*> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<const Expression*>> *container_child_fields) const override;
+
  private:
   const Comparison &comparison_;
   std::unique_ptr<Scalar> left_operand_;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/predicate/NegationPredicate.cpp
----------------------------------------------------------------------
diff --git a/expressions/predicate/NegationPredicate.cpp b/expressions/predicate/NegationPredicate.cpp
index bee1c8d..0365c6f 100644
--- a/expressions/predicate/NegationPredicate.cpp
+++ b/expressions/predicate/NegationPredicate.cpp
@@ -120,4 +120,22 @@ Predicate* NegationPredicate::NegatePredicate(Predicate *operand) {
   }
 }
 
+void NegationPredicate::getFieldStringItems(
+    std::vector<std::string> *inline_field_names,
+    std::vector<std::string> *inline_field_values,
+    std::vector<std::string> *non_container_child_field_names,
+    std::vector<const Expression*> *non_container_child_fields,
+    std::vector<std::string> *container_child_field_names,
+    std::vector<std::vector<const Expression*>> *container_child_fields) const {
+  Predicate::getFieldStringItems(inline_field_names,
+                                 inline_field_values,
+                                 non_container_child_field_names,
+                                 non_container_child_fields,
+                                 container_child_field_names,
+                                 container_child_fields);
+
+  non_container_child_field_names->emplace_back("operand");
+  non_container_child_fields->emplace_back(operand_.get());
+}
+
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/predicate/NegationPredicate.hpp
----------------------------------------------------------------------
diff --git a/expressions/predicate/NegationPredicate.hpp b/expressions/predicate/NegationPredicate.hpp
index 33c6df8..9d6084c 100644
--- a/expressions/predicate/NegationPredicate.hpp
+++ b/expressions/predicate/NegationPredicate.hpp
@@ -21,6 +21,8 @@
 #define QUICKSTEP_EXPRESSIONS_PREDICATE_NEGATION_PREDICATE_HPP_
 
 #include <memory>
+#include <string>
+#include <vector>
 
 #include "catalog/CatalogTypedefs.hpp"
 #include "expressions/Expressions.pb.h"
@@ -105,6 +107,15 @@ class NegationPredicate : public Predicate {
 
   bool getStaticResult() const override;
 
+ protected:
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<const Expression*> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<const Expression*>> *container_child_fields) const override;
+
  private:
   void initHelper();
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/predicate/Predicate.cpp
----------------------------------------------------------------------
diff --git a/expressions/predicate/Predicate.cpp b/expressions/predicate/Predicate.cpp
index 006e8f1..2a1e7da 100644
--- a/expressions/predicate/Predicate.cpp
+++ b/expressions/predicate/Predicate.cpp
@@ -57,4 +57,17 @@ TupleIdSequence* Predicate::GenerateSequenceForStaticResult(
   return result;
 }
 
+void Predicate::getFieldStringItems(
+    std::vector<std::string> *inline_field_names,
+    std::vector<std::string> *inline_field_values,
+    std::vector<std::string> *non_container_child_field_names,
+    std::vector<const Expression*> *non_container_child_fields,
+    std::vector<std::string> *container_child_field_names,
+    std::vector<std::vector<const Expression*>> *container_child_fields) const {
+  if (hasStaticResult()) {
+    inline_field_names->emplace_back("static_result");
+    inline_field_values->emplace_back(getStaticResult() ? "true" : "false");
+  }
+}
+
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/predicate/Predicate.hpp
----------------------------------------------------------------------
diff --git a/expressions/predicate/Predicate.hpp b/expressions/predicate/Predicate.hpp
index 5fb3ef5..467d5fb 100644
--- a/expressions/predicate/Predicate.hpp
+++ b/expressions/predicate/Predicate.hpp
@@ -21,6 +21,7 @@
 #define QUICKSTEP_EXPRESSIONS_PREDICATE_PREDICATE_HPP_
 
 #include "catalog/CatalogTypedefs.hpp"
+#include "expressions/Expression.hpp"
 #include "expressions/Expressions.pb.h"
 #include "storage/StorageBlockInfo.hpp"
 #include "utility/Macros.hpp"
@@ -39,7 +40,7 @@ struct SubBlocksReference;
 /**
  * @brief Base class for all predicates.
  **/
-class Predicate {
+class Predicate : public Expression {
  public:
   /**
    * @brief The possible types of predicates.
@@ -67,6 +68,10 @@ class Predicate {
   virtual ~Predicate() {
   }
 
+  std::string getName() const override {
+    return kPredicateTypeNames[static_cast<int>(getPredicateType())];
+  }
+
   /**
    * @brief Serialize this predicate in Protocol Buffer form.
    *
@@ -189,6 +194,14 @@ class Predicate {
   virtual bool getStaticResult() const;
 
  protected:
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<const Expression*> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<const Expression*>> *container_child_fields) const override;
+
   Predicate() {
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/predicate/PredicateWithList.cpp
----------------------------------------------------------------------
diff --git a/expressions/predicate/PredicateWithList.cpp b/expressions/predicate/PredicateWithList.cpp
new file mode 100644
index 0000000..6b3d7a7
--- /dev/null
+++ b/expressions/predicate/PredicateWithList.cpp
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "expressions/predicate/PredicateWithList.hpp"
+
+#include <string>
+#include <vector>
+
+namespace quickstep {
+
+void PredicateWithList::getFieldStringItems(
+    std::vector<std::string> *inline_field_names,
+    std::vector<std::string> *inline_field_values,
+    std::vector<std::string> *non_container_child_field_names,
+    std::vector<const Expression*> *non_container_child_fields,
+    std::vector<std::string> *container_child_field_names,
+    std::vector<std::vector<const Expression*>> *container_child_fields) const {
+  Predicate::getFieldStringItems(inline_field_names,
+                                 inline_field_values,
+                                 non_container_child_field_names,
+                                 non_container_child_fields,
+                                 container_child_field_names,
+                                 container_child_fields);
+
+  container_child_field_names->emplace_back("static_operand_list");
+  container_child_fields->emplace_back();
+  for (const auto &static_operand : static_operand_list_) {
+    container_child_fields->back().emplace_back(&static_operand);
+  }
+
+  container_child_field_names->emplace_back("dynamic_operand_list");
+  container_child_fields->emplace_back();
+  for (const auto &dynamic_operand : dynamic_operand_list_) {
+    container_child_fields->back().emplace_back(&dynamic_operand);
+  }
+}
+
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/predicate/PredicateWithList.hpp
----------------------------------------------------------------------
diff --git a/expressions/predicate/PredicateWithList.hpp b/expressions/predicate/PredicateWithList.hpp
index b1bf7e5..c6fb99d 100644
--- a/expressions/predicate/PredicateWithList.hpp
+++ b/expressions/predicate/PredicateWithList.hpp
@@ -20,6 +20,9 @@
 #ifndef QUICKSTEP_EXPRESSIONS_PREDICATE_PREDICATE_WITH_LIST_HPP_
 #define QUICKSTEP_EXPRESSIONS_PREDICATE_PREDICATE_WITH_LIST_HPP_
 
+#include <string>
+#include <vector>
+
 #include "expressions/predicate/Predicate.hpp"
 #include "utility/Macros.hpp"
 #include "utility/PtrList.hpp"
@@ -58,6 +61,14 @@ class PredicateWithList : public Predicate {
   }
 
  protected:
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<const Expression*> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<const Expression*>> *container_child_fields) const override;
+
   PtrList<Predicate> static_operand_list_;
   PtrList<Predicate> dynamic_operand_list_;
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/expressions/scalar/CMakeLists.txt b/expressions/scalar/CMakeLists.txt
index 8f509da..72d554c 100644
--- a/expressions/scalar/CMakeLists.txt
+++ b/expressions/scalar/CMakeLists.txt
@@ -23,12 +23,16 @@ add_library(quickstep_expressions_scalar_ScalarAttribute
 add_library(quickstep_expressions_scalar_ScalarBinaryExpression
             ScalarBinaryExpression.cpp
             ScalarBinaryExpression.hpp)
+add_library(quickstep_expressions_scalar_ScalarCache ../../empty_src.cpp ScalarCache.hpp)
 add_library(quickstep_expressions_scalar_ScalarCaseExpression
             ScalarCaseExpression.cpp
             ScalarCaseExpression.hpp)
 add_library(quickstep_expressions_scalar_ScalarLiteral
             ScalarLiteral.cpp
             ScalarLiteral.hpp)
+add_library(quickstep_expressions_scalar_ScalarSharedExpression
+            ScalarSharedExpression.cpp
+            ScalarSharedExpression.hpp)
 add_library(quickstep_expressions_scalar_ScalarUnaryExpression
             ScalarUnaryExpression.cpp
             ScalarUnaryExpression.hpp)
@@ -36,9 +40,11 @@ add_library(quickstep_expressions_scalar_ScalarUnaryExpression
 # Link dependencies:
 target_link_libraries(quickstep_expressions_scalar_Scalar
                       quickstep_catalog_CatalogTypedefs
+                      quickstep_expressions_Expression
                       quickstep_expressions_Expressions_proto
                       quickstep_storage_StorageBlockInfo
                       quickstep_types_TypedValue
+                      quickstep_types_containers_ColumnVector
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_expressions_scalar_ScalarAttribute
                       quickstep_catalog_CatalogAttribute
@@ -65,6 +71,10 @@ target_link_libraries(quickstep_expressions_scalar_ScalarBinaryExpression
                       quickstep_types_containers_ColumnVector
                       quickstep_types_operations_Operation_proto
                       quickstep_types_operations_binaryoperations_BinaryOperation
+                      quickstep_types_operations_binaryoperations_BinaryOperationID
+                      quickstep_utility_Macros)
+target_link_libraries(quickstep_expressions_scalar_ScalarCache
+                      quickstep_types_containers_ColumnVector
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_expressions_scalar_ScalarCaseExpression
                       quickstep_catalog_CatalogTypedefs
@@ -92,6 +102,16 @@ target_link_libraries(quickstep_expressions_scalar_ScalarLiteral
                       quickstep_types_TypedValue_proto
                       quickstep_types_containers_ColumnVector
                       quickstep_utility_Macros)
+target_link_libraries(quickstep_expressions_scalar_ScalarSharedExpression
+                      quickstep_catalog_CatalogTypedefs
+                      quickstep_expressions_Expressions_proto
+                      quickstep_expressions_scalar_Scalar
+                      quickstep_expressions_scalar_ScalarCache
+                      quickstep_storage_StorageBlockInfo
+                      quickstep_storage_ValueAccessor
+                      quickstep_types_TypedValue
+                      quickstep_types_containers_ColumnVector
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_expressions_scalar_ScalarUnaryExpression
                       quickstep_catalog_CatalogTypedefs
                       quickstep_expressions_Expressions_proto
@@ -104,6 +124,7 @@ target_link_libraries(quickstep_expressions_scalar_ScalarUnaryExpression
                       quickstep_types_containers_ColumnVector
                       quickstep_types_operations_Operation_proto
                       quickstep_types_operations_unaryoperations_UnaryOperation
+                      quickstep_types_operations_unaryoperations_UnaryOperationID
                       quickstep_utility_Macros)
 
 # Submodule all-in-one library:
@@ -112,8 +133,10 @@ target_link_libraries(quickstep_expressions_scalar
                       quickstep_expressions_scalar_Scalar
                       quickstep_expressions_scalar_ScalarAttribute
                       quickstep_expressions_scalar_ScalarBinaryExpression
+                      quickstep_expressions_scalar_ScalarCache
                       quickstep_expressions_scalar_ScalarCaseExpression
                       quickstep_expressions_scalar_ScalarLiteral
+                      quickstep_expressions_scalar_ScalarSharedExpression
                       quickstep_expressions_scalar_ScalarUnaryExpression)
 
 # Tests:

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/Scalar.cpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/Scalar.cpp b/expressions/scalar/Scalar.cpp
index a1c436c..5e6b2e1 100644
--- a/expressions/scalar/Scalar.cpp
+++ b/expressions/scalar/Scalar.cpp
@@ -28,6 +28,7 @@ const char *Scalar::kScalarDataSourceNames[] = {
   "Attribute",
   "UnaryExpression",
   "BinaryExpression",
+  "SharedExpression",
   "SimpleCase"
 };
 
@@ -35,4 +36,15 @@ const TypedValue& Scalar::getStaticValue() const {
   FATAL_ERROR("Called getStaticValue() on a Scalar which does not have a static value");
 }
 
+void Scalar::getFieldStringItems(
+    std::vector<std::string> *inline_field_names,
+    std::vector<std::string> *inline_field_values,
+    std::vector<std::string> *non_container_child_field_names,
+    std::vector<const Expression*> *non_container_child_fields,
+    std::vector<std::string> *container_child_field_names,
+    std::vector<std::vector<const Expression*>> *container_child_fields) const {
+  inline_field_names->emplace_back("result_type");
+  inline_field_values->emplace_back(type_.getName());
+}
+
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/Scalar.hpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/Scalar.hpp b/expressions/scalar/Scalar.hpp
index 2db850a..995fd67 100644
--- a/expressions/scalar/Scalar.hpp
+++ b/expressions/scalar/Scalar.hpp
@@ -20,18 +20,21 @@
 #ifndef QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_HPP_
 #define QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_HPP_
 
+#include <string>
 #include <utility>
 #include <vector>
 
 #include "catalog/CatalogTypedefs.hpp"
+#include "expressions/Expression.hpp"
 #include "expressions/Expressions.pb.h"
 #include "storage/StorageBlockInfo.hpp"
 #include "types/TypedValue.hpp"
+#include "types/containers/ColumnVector.hpp"
 #include "utility/Macros.hpp"
 
 namespace quickstep {
 
-class ColumnVector;
+class ScalarCache;
 class Type;
 class ValueAccessor;
 
@@ -44,7 +47,7 @@ struct SubBlocksReference;
 /**
  * @brief Base class for anything which evaluates to a Scalar value.
  **/
-class Scalar {
+class Scalar : public Expression {
  public:
   /**
    * @brief The possible provenance of Scalar values.
@@ -55,6 +58,7 @@ class Scalar {
     kUnaryExpression,
     kBinaryExpression,
     kCaseExpression,
+    kSharedExpression,
     kNumScalarDataSources  // Not a real ScalarDataSource, exists for counting purposes.
   };
 
@@ -70,6 +74,10 @@ class Scalar {
   virtual ~Scalar() {
   }
 
+  std::string getName() const override {
+    return kScalarDataSourceNames[static_cast<int>(getDataSource())];
+  }
+
   /**
    * @brief Serialize this scalar in Protocol Buffer form.
    *
@@ -200,8 +208,9 @@ class Scalar {
    * @return A ColumnVector of this Scalar's values for each tuple accesible
    *         via accessor.
    **/
-  virtual ColumnVector* getAllValues(ValueAccessor *accessor,
-                                     const SubBlocksReference *sub_blocks_ref) const = 0;
+  virtual ColumnVectorPtr getAllValues(ValueAccessor *accessor,
+                                       const SubBlocksReference *sub_blocks_ref,
+                                       ScalarCache *scalar_cache) const = 0;
 
   /**
    * @brief Get this Scalar's value for all specified joined tuples from two
@@ -218,16 +227,24 @@ class Scalar {
    * @return A ColumnVector of this Scalar's values for all the joined tuples
    *         specified by joined_tuple_ids.
    **/
-  virtual ColumnVector* getAllValuesForJoin(
+  virtual ColumnVectorPtr getAllValuesForJoin(
       const relation_id left_relation_id,
       ValueAccessor *left_accessor,
       const relation_id right_relation_id,
       ValueAccessor *right_accessor,
-      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids) const = 0;
+      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+      ScalarCache *scalar_cache) const = 0;
 
  protected:
-  explicit Scalar(const Type &type) : type_(type) {
-  }
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<const Expression*> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<const Expression*>> *container_child_fields) const override;
+
+  explicit Scalar(const Type &type) : Expression(), type_(type) {}
 
   const Type &type_;
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarAttribute.cpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarAttribute.cpp b/expressions/scalar/ScalarAttribute.cpp
index cc42084..8f063e6 100644
--- a/expressions/scalar/ScalarAttribute.cpp
+++ b/expressions/scalar/ScalarAttribute.cpp
@@ -19,6 +19,7 @@
 
 #include "expressions/scalar/ScalarAttribute.hpp"
 
+#include <string>
 #include <utility>
 #include <vector>
 
@@ -88,13 +89,15 @@ relation_id ScalarAttribute::getRelationIdForValueAccessor() const {
   return attribute_.getParent().getID();
 }
 
-ColumnVector* ScalarAttribute::getAllValues(ValueAccessor *accessor,
-                                            const SubBlocksReference *sub_blocks_ref) const {
+ColumnVectorPtr ScalarAttribute::getAllValues(
+    ValueAccessor *accessor,
+    const SubBlocksReference *sub_blocks_ref,
+    ScalarCache *scalar_cache) const {
   const attribute_id attr_id = attribute_.getID();
   const Type &result_type = attribute_.getType();
   return InvokeOnValueAccessorMaybeTupleIdSequenceAdapter(
       accessor,
-      [&attr_id, &result_type](auto *accessor) -> ColumnVector* {  // NOLINT(build/c++11)
+      [&attr_id, &result_type](auto *accessor) -> ColumnVectorPtr {  // NOLINT(build/c++11)
     if (NativeColumnVector::UsableForType(result_type)) {
       NativeColumnVector *result = new NativeColumnVector(result_type,
                                                           accessor->getNumTuples());
@@ -139,7 +142,7 @@ ColumnVector* ScalarAttribute::getAllValues(ValueAccessor *accessor,
           }
         }
       }
-      return result;
+      return ColumnVectorPtr(result);
     } else {
       IndirectColumnVector *result = new IndirectColumnVector(result_type,
                                                               accessor->getNumTuples());
@@ -147,17 +150,18 @@ ColumnVector* ScalarAttribute::getAllValues(ValueAccessor *accessor,
       while (accessor->next()) {
         result->appendTypedValue(accessor->getTypedValue(attr_id));
       }
-      return result;
+      return ColumnVectorPtr(result);
     }
   });
 }
 
-ColumnVector* ScalarAttribute::getAllValuesForJoin(
+ColumnVectorPtr ScalarAttribute::getAllValuesForJoin(
     const relation_id left_relation_id,
     ValueAccessor *left_accessor,
     const relation_id right_relation_id,
     ValueAccessor *right_accessor,
-    const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids) const {
+    const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+    ScalarCache *scalar_cache) const {
   DCHECK((attribute_.getParent().getID() == left_relation_id)
          || (attribute_.getParent().getID() == right_relation_id));
 
@@ -173,7 +177,7 @@ ColumnVector* ScalarAttribute::getAllValuesForJoin(
       [&joined_tuple_ids,
        &attr_id,
        &result_type,
-       &using_left_relation](auto *accessor) -> ColumnVector* {  // NOLINT(build/c++11)
+       &using_left_relation](auto *accessor) -> ColumnVectorPtr {  // NOLINT(build/c++11)
     if (NativeColumnVector::UsableForType(result_type)) {
       NativeColumnVector *result = new NativeColumnVector(result_type,
                                                           joined_tuple_ids.size());
@@ -196,7 +200,7 @@ ColumnVector* ScalarAttribute::getAllValuesForJoin(
                   using_left_relation ? joined_pair.first : joined_pair.second));
         }
       }
-      return result;
+      return ColumnVectorPtr(result);
     } else {
       IndirectColumnVector *result = new IndirectColumnVector(result_type,
                                                               joined_tuple_ids.size());
@@ -206,9 +210,27 @@ ColumnVector* ScalarAttribute::getAllValuesForJoin(
                   attr_id,
                   using_left_relation ? joined_pair.first : joined_pair.second));
       }
-      return result;
+      return ColumnVectorPtr(result);
     }
   });
 }
 
+void ScalarAttribute::getFieldStringItems(
+    std::vector<std::string> *inline_field_names,
+    std::vector<std::string> *inline_field_values,
+    std::vector<std::string> *non_container_child_field_names,
+    std::vector<const Expression*> *non_container_child_fields,
+    std::vector<std::string> *container_child_field_names,
+    std::vector<std::vector<const Expression*>> *container_child_fields) const {
+  Scalar::getFieldStringItems(inline_field_names,
+                              inline_field_values,
+                              non_container_child_field_names,
+                              non_container_child_fields,
+                              container_child_field_names,
+                              container_child_fields);
+
+  inline_field_names->emplace_back("attribute");
+  inline_field_values->emplace_back(std::to_string(attribute_.getID()));
+}
+
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarAttribute.hpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarAttribute.hpp b/expressions/scalar/ScalarAttribute.hpp
index c6a41df..1bf8dab 100644
--- a/expressions/scalar/ScalarAttribute.hpp
+++ b/expressions/scalar/ScalarAttribute.hpp
@@ -20,6 +20,7 @@
 #ifndef QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_ATTRIBUTE_HPP_
 #define QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_ATTRIBUTE_HPP_
 
+#include <string>
 #include <utility>
 #include <vector>
 
@@ -28,12 +29,13 @@
 #include "expressions/scalar/Scalar.hpp"
 #include "storage/StorageBlockInfo.hpp"
 #include "types/TypedValue.hpp"
+#include "types/containers/ColumnVector.hpp"
 #include "utility/Macros.hpp"
 
 namespace quickstep {
 
 class CatalogAttribute;
-class ColumnVector;
+class ScalarCache;
 class ValueAccessor;
 
 struct SubBlocksReference;
@@ -77,21 +79,31 @@ class ScalarAttribute : public Scalar {
 
   relation_id getRelationIdForValueAccessor() const override;
 
-  ColumnVector* getAllValues(ValueAccessor *accessor,
-                             const SubBlocksReference *sub_blocks_ref) const override;
+  ColumnVectorPtr getAllValues(ValueAccessor *accessor,
+                               const SubBlocksReference *sub_blocks_ref,
+                               ScalarCache *scalar_cache) const override;
 
-  ColumnVector* getAllValuesForJoin(
+  ColumnVectorPtr getAllValuesForJoin(
       const relation_id left_relation_id,
       ValueAccessor *left_accessor,
       const relation_id right_relation_id,
       ValueAccessor *right_accessor,
-      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids) const override;
+      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+      ScalarCache *scalar_cache) const override;
 
   const CatalogAttribute& getAttribute() const {
     return attribute_;
   }
 
  protected:
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<const Expression*> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<const Expression*>> *container_child_fields) const override;
+
   const CatalogAttribute &attribute_;
 
  private:

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarBinaryExpression.cpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarBinaryExpression.cpp b/expressions/scalar/ScalarBinaryExpression.cpp
index 5fe6cde..3da6083 100644
--- a/expressions/scalar/ScalarBinaryExpression.cpp
+++ b/expressions/scalar/ScalarBinaryExpression.cpp
@@ -33,6 +33,7 @@
 #include "types/containers/ColumnVector.hpp"
 #include "types/operations/Operation.pb.h"
 #include "types/operations/binary_operations/BinaryOperation.hpp"
+#include "types/operations/binary_operations/BinaryOperationID.hpp"
 
 #include "glog/logging.h"
 
@@ -101,13 +102,15 @@ TypedValue ScalarBinaryExpression::getValueForJoinedTuples(
   }
 }
 
-ColumnVector* ScalarBinaryExpression::getAllValues(
+ColumnVectorPtr ScalarBinaryExpression::getAllValues(
     ValueAccessor *accessor,
-    const SubBlocksReference *sub_blocks_ref) const {
+    const SubBlocksReference *sub_blocks_ref,
+    ScalarCache *scalar_cache) const {
   if (fast_operator_.get() == nullptr) {
-    return ColumnVector::MakeVectorOfValue(getType(),
-                                           static_value_,
-                                           accessor->getNumTuplesVirtual());
+    return ColumnVectorPtr(
+        ColumnVector::MakeVectorOfValue(getType(),
+                                        static_value_,
+                                        accessor->getNumTuplesVirtual()));
   } else {
     // NOTE(chasseur): We don't check if BOTH operands have a static value,
     // because if they did then this expression would also have a static value
@@ -117,35 +120,39 @@ ColumnVector* ScalarBinaryExpression::getAllValues(
       const attribute_id right_operand_attr_id
           = right_operand_->getAttributeIdForValueAccessor();
       if (right_operand_attr_id != -1) {
-        return fast_operator_->applyToStaticValueAndValueAccessor(
-            left_operand_->getStaticValue(),
-            accessor,
-            right_operand_attr_id);
+        return ColumnVectorPtr(
+            fast_operator_->applyToStaticValueAndValueAccessor(
+                left_operand_->getStaticValue(),
+                accessor,
+                right_operand_attr_id));
       }
 #endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
 
-      std::unique_ptr<ColumnVector> right_result(
-          right_operand_->getAllValues(accessor, sub_blocks_ref));
-      return fast_operator_->applyToStaticValueAndColumnVector(
-          left_operand_->getStaticValue(),
-          *right_result);
+      ColumnVectorPtr right_result(
+          right_operand_->getAllValues(accessor, sub_blocks_ref, scalar_cache));
+      return ColumnVectorPtr(
+          fast_operator_->applyToStaticValueAndColumnVector(
+              left_operand_->getStaticValue(),
+              *right_result));
     } else if (right_operand_->hasStaticValue()) {
 #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
       const attribute_id left_operand_attr_id
           = left_operand_->getAttributeIdForValueAccessor();
       if (left_operand_attr_id != -1) {
-        return fast_operator_->applyToValueAccessorAndStaticValue(
-            accessor,
-            left_operand_attr_id,
-            right_operand_->getStaticValue());
+        return ColumnVectorPtr(
+            fast_operator_->applyToValueAccessorAndStaticValue(
+                accessor,
+                left_operand_attr_id,
+                right_operand_->getStaticValue()));
       }
 #endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
 
-      std::unique_ptr<ColumnVector> left_result(
-          left_operand_->getAllValues(accessor, sub_blocks_ref));
-      return fast_operator_->applyToColumnVectorAndStaticValue(
-          *left_result,
-          right_operand_->getStaticValue());
+      ColumnVectorPtr left_result(
+          left_operand_->getAllValues(accessor, sub_blocks_ref, scalar_cache));
+      return ColumnVectorPtr(
+          fast_operator_->applyToColumnVectorAndStaticValue(
+              *left_result,
+              right_operand_->getStaticValue()));
     } else {
 #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
       const attribute_id left_operand_attr_id
@@ -155,44 +162,53 @@ ColumnVector* ScalarBinaryExpression::getAllValues(
 
       if (left_operand_attr_id != -1) {
         if (right_operand_attr_id != -1) {
-          return fast_operator_->applyToSingleValueAccessor(accessor,
-                                                            left_operand_attr_id,
-                                                            right_operand_attr_id);
+          return ColumnVectorPtr(
+              fast_operator_->applyToSingleValueAccessor(
+                  accessor,
+                  left_operand_attr_id,
+                  right_operand_attr_id));
         } else {
-          std::unique_ptr<ColumnVector> right_result(
-              right_operand_->getAllValues(accessor, sub_blocks_ref));
-          return fast_operator_->applyToValueAccessorAndColumnVector(accessor,
-                                                                     left_operand_attr_id,
-                                                                     *right_result);
+          ColumnVectorPtr right_result(
+              right_operand_->getAllValues(accessor, sub_blocks_ref, scalar_cache));
+          return ColumnVectorPtr(
+              fast_operator_->applyToValueAccessorAndColumnVector(
+                  accessor,
+                  left_operand_attr_id,
+                  *right_result));
         }
       } else if (right_operand_attr_id != -1) {
-        std::unique_ptr<ColumnVector> left_result(
-            left_operand_->getAllValues(accessor, sub_blocks_ref));
-        return fast_operator_->applyToColumnVectorAndValueAccessor(*left_result,
-                                                                   accessor,
-                                                                   right_operand_attr_id);
+        ColumnVectorPtr left_result(
+            left_operand_->getAllValues(accessor, sub_blocks_ref, scalar_cache));
+        return ColumnVectorPtr(
+            fast_operator_->applyToColumnVectorAndValueAccessor(
+                *left_result,
+                accessor,
+                right_operand_attr_id));
       }
 #endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
 
-      std::unique_ptr<ColumnVector> left_result(
-          left_operand_->getAllValues(accessor, sub_blocks_ref));
-      std::unique_ptr<ColumnVector> right_result(
-          right_operand_->getAllValues(accessor, sub_blocks_ref));
-      return fast_operator_->applyToColumnVectors(*left_result, *right_result);
+      ColumnVectorPtr left_result(
+          left_operand_->getAllValues(accessor, sub_blocks_ref, scalar_cache));
+      ColumnVectorPtr right_result(
+          right_operand_->getAllValues(accessor, sub_blocks_ref, scalar_cache));
+      return ColumnVectorPtr(
+          fast_operator_->applyToColumnVectors(*left_result, *right_result));
     }
   }
 }
 
-ColumnVector* ScalarBinaryExpression::getAllValuesForJoin(
+ColumnVectorPtr ScalarBinaryExpression::getAllValuesForJoin(
     const relation_id left_relation_id,
     ValueAccessor *left_accessor,
     const relation_id right_relation_id,
     ValueAccessor *right_accessor,
-    const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids) const {
+    const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+    ScalarCache *scalar_cache) const {
   if (fast_operator_.get() == nullptr) {
-    return ColumnVector::MakeVectorOfValue(getType(),
-                                           static_value_,
-                                           joined_tuple_ids.size());
+    return ColumnVectorPtr(
+        ColumnVector::MakeVectorOfValue(getType(),
+                                        static_value_,
+                                        joined_tuple_ids.size()));
   } else {
     if (left_operand_->hasStaticValue()) {
 #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_JOIN
@@ -207,24 +223,27 @@ ColumnVector* ScalarBinaryExpression::getAllValuesForJoin(
         const bool using_left_relation = (right_operand_relation_id == left_relation_id);
         ValueAccessor *right_operand_accessor = using_left_relation ? left_accessor
                                                                     : right_accessor;
-        return fast_operator_->applyToStaticValueAndValueAccessorForJoin(
-            left_operand_->getStaticValue(),
-            right_operand_accessor,
-            using_left_relation,
-            right_operand_attr_id,
-            joined_tuple_ids);
+        return ColumnVectorPtr(
+            fast_operator_->applyToStaticValueAndValueAccessorForJoin(
+                left_operand_->getStaticValue(),
+                right_operand_accessor,
+                using_left_relation,
+                right_operand_attr_id,
+                joined_tuple_ids));
       }
 #endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_JOIN
 
-      std::unique_ptr<ColumnVector> right_result(
+      ColumnVectorPtr right_result(
           right_operand_->getAllValuesForJoin(left_relation_id,
                                               left_accessor,
                                               right_relation_id,
                                               right_accessor,
-                                              joined_tuple_ids));
-      return fast_operator_->applyToStaticValueAndColumnVector(
-          left_operand_->getStaticValue(),
-          *right_result);
+                                              joined_tuple_ids,
+                                              scalar_cache));
+      return ColumnVectorPtr(
+          fast_operator_->applyToStaticValueAndColumnVector(
+              left_operand_->getStaticValue(),
+              *right_result));
     } else if (right_operand_->hasStaticValue()) {
 #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_JOIN
       const attribute_id left_operand_attr_id
@@ -238,24 +257,27 @@ ColumnVector* ScalarBinaryExpression::getAllValuesForJoin(
         const bool using_left_relation = (left_operand_relation_id == left_relation_id);
         ValueAccessor *left_operand_accessor = using_left_relation ? left_accessor
                                                                    : right_accessor;
-        return fast_operator_->applyToValueAccessorAndStaticValueForJoin(
-            left_operand_accessor,
-            using_left_relation,
-            left_operand_attr_id,
-            right_operand_->getStaticValue(),
-            joined_tuple_ids);
+        return ColumnVectorPtr(
+            fast_operator_->applyToValueAccessorAndStaticValueForJoin(
+                left_operand_accessor,
+                using_left_relation,
+                left_operand_attr_id,
+                right_operand_->getStaticValue(),
+                joined_tuple_ids));
       }
 #endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_JOIN
 
-      std::unique_ptr<ColumnVector> left_result(
+      ColumnVectorPtr left_result(
           left_operand_->getAllValuesForJoin(left_relation_id,
                                              left_accessor,
                                              right_relation_id,
                                              right_accessor,
-                                             joined_tuple_ids));
-      return fast_operator_->applyToColumnVectorAndStaticValue(
-          *left_result,
-          right_operand_->getStaticValue());
+                                             joined_tuple_ids,
+                                             scalar_cache));
+      return ColumnVectorPtr(
+          fast_operator_->applyToColumnVectorAndStaticValue(
+              *left_result,
+              right_operand_->getStaticValue()));
     } else {
 #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_JOIN
       const attribute_id left_operand_attr_id
@@ -284,28 +306,30 @@ ColumnVector* ScalarBinaryExpression::getAllValuesForJoin(
               = (right_operand_relation_id == left_relation_id);
           ValueAccessor *right_operand_accessor = using_left_relation_for_right_operand ? left_accessor
                                                                                         : right_accessor;
-          return fast_operator_->applyToValueAccessorsForJoin(left_operand_accessor,
-                                                              using_left_relation_for_left_operand,
-                                                              left_operand_attr_id,
-                                                              right_operand_accessor,
-                                                              using_left_relation_for_right_operand,
-                                                              right_operand_attr_id,
-                                                              joined_tuple_ids);
+          return ColumnVectorPtr(
+              fast_operator_->applyToValueAccessorsForJoin(left_operand_accessor,
+                                                           using_left_relation_for_left_operand,
+                                                           left_operand_attr_id,
+                                                           right_operand_accessor,
+                                                           using_left_relation_for_right_operand,
+                                                           right_operand_attr_id,
+                                                           joined_tuple_ids));
         }
 #endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_JOIN_WITH_BINARY_EXPRESSIONS
-        std::unique_ptr<ColumnVector> right_result(
+        ColumnVectorPtr right_result(
             right_operand_->getAllValuesForJoin(left_relation_id,
                                                 left_accessor,
                                                 right_relation_id,
                                                 right_accessor,
                                                 joined_tuple_ids));
 
-        return fast_operator_->applyToValueAccessorAndColumnVectorForJoin(
-            left_operand_accessor,
-            using_left_relation_for_left_operand,
-            left_operand_attr_id,
-            *right_result,
-            joined_tuple_ids);
+        return ColumnVectorPtr(
+            fast_operator_->applyToValueAccessorAndColumnVectorForJoin(
+                left_operand_accessor,
+                using_left_relation_for_left_operand,
+                left_operand_attr_id,
+                *right_result,
+                joined_tuple_ids));
       } else if (right_operand_attr_id != -1) {
         const relation_id right_operand_relation_id
             = right_operand_->getRelationIdForValueAccessor();
@@ -317,34 +341,38 @@ ColumnVector* ScalarBinaryExpression::getAllValuesForJoin(
         ValueAccessor *right_operand_accessor = using_left_relation_for_right_operand ? left_accessor
                                                                                       : right_accessor;
 
-        std::unique_ptr<ColumnVector> left_result(
+        ColumnVectorPtr left_result(
             left_operand_->getAllValuesForJoin(left_relation_id,
                                                left_accessor,
                                                right_relation_id,
                                                right_accessor,
                                                joined_tuple_ids));
-        return fast_operator_->applyToColumnVectorAndValueAccessorForJoin(
-            *left_result,
-            right_operand_accessor,
-            using_left_relation_for_right_operand,
-            right_operand_attr_id,
-            joined_tuple_ids);
+        return ColumnVectorPtr(
+            fast_operator_->applyToColumnVectorAndValueAccessorForJoin(
+                *left_result,
+                right_operand_accessor,
+                using_left_relation_for_right_operand,
+                right_operand_attr_id,
+                joined_tuple_ids));
       }
 #endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_JOIN
 
-      std::unique_ptr<ColumnVector> left_result(
+      ColumnVectorPtr left_result(
           left_operand_->getAllValuesForJoin(left_relation_id,
                                              left_accessor,
                                              right_relation_id,
                                              right_accessor,
-                                             joined_tuple_ids));
-      std::unique_ptr<ColumnVector> right_result(
+                                             joined_tuple_ids,
+                                             scalar_cache));
+      ColumnVectorPtr right_result(
           right_operand_->getAllValuesForJoin(left_relation_id,
                                               left_accessor,
                                               right_relation_id,
                                               right_accessor,
-                                              joined_tuple_ids));
-      return fast_operator_->applyToColumnVectors(*left_result, *right_result);
+                                              joined_tuple_ids,
+                                              scalar_cache));
+      return ColumnVectorPtr(
+          fast_operator_->applyToColumnVectors(*left_result, *right_result));
     }
   }
 }
@@ -374,4 +402,37 @@ void ScalarBinaryExpression::initHelper(bool own_children) {
   }
 }
 
+void ScalarBinaryExpression::getFieldStringItems(
+    std::vector<std::string> *inline_field_names,
+    std::vector<std::string> *inline_field_values,
+    std::vector<std::string> *non_container_child_field_names,
+    std::vector<const Expression*> *non_container_child_fields,
+    std::vector<std::string> *container_child_field_names,
+    std::vector<std::vector<const Expression*>> *container_child_fields) const {
+  Scalar::getFieldStringItems(inline_field_names,
+                              inline_field_values,
+                              non_container_child_field_names,
+                              non_container_child_fields,
+                              container_child_field_names,
+                              container_child_fields);
+
+  if (fast_operator_ == nullptr) {
+    inline_field_names->emplace_back("static_value");
+    if (static_value_.isNull()) {
+      inline_field_values->emplace_back("NULL");
+    } else {
+      inline_field_values->emplace_back(type_.printValueToString(static_value_));
+    }
+  }
+
+  inline_field_names->emplace_back("operation");
+  inline_field_values->emplace_back(
+      kBinaryOperationNames[static_cast<int>(operation_.getBinaryOperationID())]);
+
+  non_container_child_field_names->emplace_back("left_operand");
+  non_container_child_fields->emplace_back(left_operand_.get());
+  non_container_child_field_names->emplace_back("right_operand");
+  non_container_child_fields->emplace_back(right_operand_.get());
+}
+
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarBinaryExpression.hpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarBinaryExpression.hpp b/expressions/scalar/ScalarBinaryExpression.hpp
index c84792a..76f9dbc 100644
--- a/expressions/scalar/ScalarBinaryExpression.hpp
+++ b/expressions/scalar/ScalarBinaryExpression.hpp
@@ -21,6 +21,7 @@
 #define QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_BINARY_EXPRESSION_HPP_
 
 #include <memory>
+#include <string>
 #include <utility>
 #include <vector>
 
@@ -29,6 +30,7 @@
 #include "expressions/scalar/Scalar.hpp"
 #include "storage/StorageBlockInfo.hpp"
 #include "types/TypedValue.hpp"
+#include "types/containers/ColumnVector.hpp"
 #include "types/operations/binary_operations/BinaryOperation.hpp"
 #include "utility/Macros.hpp"
 
@@ -36,7 +38,7 @@
 
 namespace quickstep {
 
-class ColumnVector;
+class ScalarCache;
 class ValueAccessor;
 
 struct SubBlocksReference;
@@ -97,15 +99,26 @@ class ScalarBinaryExpression : public Scalar {
     return static_value_;
   }
 
-  ColumnVector* getAllValues(ValueAccessor *accessor,
-                             const SubBlocksReference *sub_blocks_ref) const override;
+  ColumnVectorPtr getAllValues(ValueAccessor *accessor,
+                               const SubBlocksReference *sub_blocks_ref,
+                               ScalarCache *scalar_cache) const override;
 
-  ColumnVector* getAllValuesForJoin(
+  ColumnVectorPtr getAllValuesForJoin(
       const relation_id left_relation_id,
       ValueAccessor *left_accessor,
       const relation_id right_relation_id,
       ValueAccessor *right_accessor,
-      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids) const override;
+      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+      ScalarCache *scalar_cache) const override;
+
+ protected:
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<const Expression*> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<const Expression*>> *container_child_fields) const override;
 
  private:
   void initHelper(bool own_children);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarCache.hpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarCache.hpp b/expressions/scalar/ScalarCache.hpp
new file mode 100644
index 0000000..1d51362
--- /dev/null
+++ b/expressions/scalar/ScalarCache.hpp
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_CACHE_HPP_
+#define QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_CACHE_HPP_
+
+#include <unordered_map>
+
+#include "types/containers/ColumnVector.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Expressions
+ *  @{
+ */
+
+class ScalarCache {
+ public:
+  ScalarCache() {}
+
+  inline bool has(const int share_id) const {
+    return cv_cache_.find(share_id) != cv_cache_.end();
+  }
+
+  inline ColumnVectorPtr get(const int share_id) const {
+    DCHECK(has(share_id));
+    return cv_cache_.at(share_id);
+  }
+
+  inline void set(const int share_id, const ColumnVectorPtr &cv) {
+    DCHECK(!has(share_id));
+    cv_cache_.emplace(share_id, cv);
+  }
+
+ private:
+  std::unordered_map<int, ColumnVectorPtr> cv_cache_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScalarCache);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_EXPRESSIONS_SCALAR_SCALAR_CACHE_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarCaseExpression.cpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarCaseExpression.cpp b/expressions/scalar/ScalarCaseExpression.cpp
index c81f723..7e7a2c8 100644
--- a/expressions/scalar/ScalarCaseExpression.cpp
+++ b/expressions/scalar/ScalarCaseExpression.cpp
@@ -193,18 +193,21 @@ TypedValue ScalarCaseExpression::getValueForJoinedTuples(
   }
 }
 
-ColumnVector* ScalarCaseExpression::getAllValues(
+ColumnVectorPtr ScalarCaseExpression::getAllValues(
     ValueAccessor *accessor,
-    const SubBlocksReference *sub_blocks_ref) const {
+    const SubBlocksReference *sub_blocks_ref,
+    ScalarCache *scalar_cache) const {
   return InvokeOnValueAccessorMaybeTupleIdSequenceAdapter(
       accessor,
-      [&](auto *accessor) -> ColumnVector* {  // NOLINT(build/c++11)
+      [&](auto *accessor) -> ColumnVectorPtr {  // NOLINT(build/c++11)
     if (has_static_value_) {
-      return ColumnVector::MakeVectorOfValue(type_,
-                                             static_value_,
-                                             accessor->getNumTuples());
+      return ColumnVectorPtr(
+          ColumnVector::MakeVectorOfValue(type_,
+                                          static_value_,
+                                          accessor->getNumTuples()));
     } else if (fixed_result_expression_ != nullptr) {
-      return fixed_result_expression_->getAllValues(accessor, sub_blocks_ref);
+      return fixed_result_expression_->getAllValues(
+          accessor, sub_blocks_ref, scalar_cache);
     }
 
     const TupleIdSequence *accessor_sequence = accessor->getTupleIdSequence();
@@ -238,21 +241,23 @@ ColumnVector* ScalarCaseExpression::getAllValues(
     }
 
     // Generate a ColumnVector of all the values for each case.
-    std::vector<std::unique_ptr<ColumnVector>> case_results;
+    std::vector<ColumnVectorPtr> case_results;
     for (std::vector<std::unique_ptr<TupleIdSequence>>::size_type case_idx = 0;
          case_idx < case_matches.size();
          ++case_idx) {
       std::unique_ptr<ValueAccessor> case_accessor(
           accessor->createSharedTupleIdSequenceAdapter(*case_matches[case_idx]));
       case_results.emplace_back(
-          result_expressions_[case_idx]->getAllValues(case_accessor.get(), sub_blocks_ref));
+          result_expressions_[case_idx]->getAllValues(
+              case_accessor.get(), sub_blocks_ref, scalar_cache));
     }
 
-    std::unique_ptr<ColumnVector> else_results;
+    ColumnVectorPtr else_results;
     if (!else_matches->empty()) {
       std::unique_ptr<ValueAccessor> else_accessor(
           accessor->createSharedTupleIdSequenceAdapter(*else_matches));
-      else_results.reset(else_result_expression_->getAllValues(else_accessor.get(), sub_blocks_ref));
+      else_results = else_result_expression_->getAllValues(
+          else_accessor.get(), sub_blocks_ref, scalar_cache);
     }
 
     // Multiplex per-case results into a single ColumnVector with values in the
@@ -262,17 +267,18 @@ ColumnVector* ScalarCaseExpression::getAllValues(
         accessor_sequence,
         case_matches,
         *else_matches,
-        &case_results,
-        else_results.get());
+        case_results,
+        else_results);
   });
 }
 
-ColumnVector* ScalarCaseExpression::getAllValuesForJoin(
+ColumnVectorPtr ScalarCaseExpression::getAllValuesForJoin(
     const relation_id left_relation_id,
     ValueAccessor *left_accessor,
     const relation_id right_relation_id,
     ValueAccessor *right_accessor,
-    const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids) const {
+    const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+    ScalarCache *scalar_cache) const {
   // Slice 'joined_tuple_ids' apart by case.
   //
   // NOTE(chasseur): We use TupleIdSequence to keep track of the positions in
@@ -321,7 +327,7 @@ ColumnVector* ScalarCaseExpression::getAllValuesForJoin(
   }
 
   // Generate a ColumnVector of all the values for each case.
-  std::vector<std::unique_ptr<ColumnVector>> case_results;
+  std::vector<ColumnVectorPtr> case_results;
   for (std::vector<std::vector<std::pair<tuple_id, tuple_id>>>::size_type case_idx = 0;
        case_idx < case_matches.size();
        ++case_idx) {
@@ -330,22 +336,24 @@ ColumnVector* ScalarCaseExpression::getAllValuesForJoin(
         left_accessor,
         right_relation_id,
         right_accessor,
-        case_matches[case_idx]));
+        case_matches[case_idx],
+        scalar_cache));
   }
 
-  std::unique_ptr<ColumnVector> else_results;
+  ColumnVectorPtr else_results;
   if (!else_positions.empty()) {
     std::vector<std::pair<tuple_id, tuple_id>> else_matches;
     for (tuple_id pos : else_positions) {
       else_matches.emplace_back(joined_tuple_ids[pos]);
     }
 
-    else_results.reset(else_result_expression_->getAllValuesForJoin(
+    else_results = else_result_expression_->getAllValuesForJoin(
         left_relation_id,
         left_accessor,
         right_relation_id,
         right_accessor,
-        else_matches));
+        else_matches,
+        scalar_cache);
   }
 
   // Multiplex per-case results into a single ColumnVector with values in the
@@ -355,8 +363,8 @@ ColumnVector* ScalarCaseExpression::getAllValuesForJoin(
       nullptr,
       case_positions,
       else_positions,
-      &case_results,
-      else_results.get());
+      case_results,
+      else_results);
 }
 
 void ScalarCaseExpression::MultiplexNativeColumnVector(
@@ -420,15 +428,15 @@ void ScalarCaseExpression::MultiplexNativeColumnVector(
 void ScalarCaseExpression::MultiplexIndirectColumnVector(
     const TupleIdSequence *source_sequence,
     const TupleIdSequence &case_matches,
-    IndirectColumnVector *case_result,
+    const IndirectColumnVector &case_result,
     IndirectColumnVector *output) {
   if (source_sequence == nullptr) {
     TupleIdSequence::const_iterator output_pos_it = case_matches.begin();
     for (std::size_t input_pos = 0;
-         input_pos < case_result->size();
+         input_pos < case_result.size();
          ++input_pos, ++output_pos_it) {
       output->positionalWriteTypedValue(*output_pos_it,
-                                        case_result->moveTypedValue(input_pos));
+                                        case_result.getTypedValue(input_pos));
     }
   } else {
     std::size_t input_pos = 0;
@@ -438,20 +446,20 @@ void ScalarCaseExpression::MultiplexIndirectColumnVector(
          ++output_pos, ++source_sequence_it) {
       if (case_matches.get(*source_sequence_it)) {
         output->positionalWriteTypedValue(output_pos,
-                                          case_result->moveTypedValue(input_pos++));
+                                          case_result.getTypedValue(input_pos++));
       }
     }
   }
 }
 
-ColumnVector* ScalarCaseExpression::multiplexColumnVectors(
+ColumnVectorPtr ScalarCaseExpression::multiplexColumnVectors(
     const std::size_t output_size,
     const TupleIdSequence *source_sequence,
     const std::vector<std::unique_ptr<TupleIdSequence>> &case_matches,
     const TupleIdSequence &else_matches,
-    std::vector<std::unique_ptr<ColumnVector>> *case_results,
-    ColumnVector *else_result) const {
-  DCHECK_EQ(case_matches.size(), case_results->size());
+    const std::vector<ColumnVectorPtr> &case_results,
+    const ColumnVectorPtr &else_result) const {
+  DCHECK_EQ(case_matches.size(), case_results.size());
 
   if (NativeColumnVector::UsableForType(type_)) {
     std::unique_ptr<NativeColumnVector> native_result(
@@ -461,12 +469,12 @@ ColumnVector* ScalarCaseExpression::multiplexColumnVectors(
     for (std::vector<std::unique_ptr<TupleIdSequence>>::size_type case_idx = 0;
          case_idx < case_matches.size();
          ++case_idx) {
-      DCHECK((*case_results)[case_idx]->isNative());
+      DCHECK(case_results[case_idx]->isNative());
       if (!case_matches[case_idx]->empty()) {
         MultiplexNativeColumnVector(
             source_sequence,
             *case_matches[case_idx],
-            static_cast<const NativeColumnVector&>(*(*case_results)[case_idx]),
+            static_cast<const NativeColumnVector&>(*case_results[case_idx]),
             native_result.get());
       }
     }
@@ -480,7 +488,7 @@ ColumnVector* ScalarCaseExpression::multiplexColumnVectors(
                                   native_result.get());
     }
 
-    return native_result.release();
+    return ColumnVectorPtr(native_result.release());
   } else {
     std::unique_ptr<IndirectColumnVector> indirect_result(
         new IndirectColumnVector(type_, output_size));
@@ -489,12 +497,12 @@ ColumnVector* ScalarCaseExpression::multiplexColumnVectors(
     for (std::vector<std::unique_ptr<TupleIdSequence>>::size_type case_idx = 0;
          case_idx < case_matches.size();
          ++case_idx) {
-      DCHECK(!(*case_results)[case_idx]->isNative());
+      DCHECK(!case_results[case_idx]->isNative());
       if (!case_matches[case_idx]->empty()) {
         MultiplexIndirectColumnVector(
             source_sequence,
             *case_matches[case_idx],
-            static_cast<IndirectColumnVector*>((*case_results)[case_idx].get()),
+            static_cast<const IndirectColumnVector&>(*case_results[case_idx]),
             indirect_result.get());
       }
     }
@@ -504,11 +512,52 @@ ColumnVector* ScalarCaseExpression::multiplexColumnVectors(
       DCHECK(!else_matches.empty());
       MultiplexIndirectColumnVector(source_sequence,
                                     else_matches,
-                                    static_cast<IndirectColumnVector*>(else_result),
+                                    static_cast<const IndirectColumnVector&>(*else_result),
                                     indirect_result.get());
     }
 
-    return indirect_result.release();
+    return ColumnVectorPtr(indirect_result.release());
+  }
+}
+
+void ScalarCaseExpression::getFieldStringItems(
+    std::vector<std::string> *inline_field_names,
+    std::vector<std::string> *inline_field_values,
+    std::vector<std::string> *non_container_child_field_names,
+    std::vector<const Expression*> *non_container_child_fields,
+    std::vector<std::string> *container_child_field_names,
+    std::vector<std::vector<const Expression*>> *container_child_fields) const {
+  Scalar::getFieldStringItems(inline_field_names,
+                              inline_field_values,
+                              non_container_child_field_names,
+                              non_container_child_fields,
+                              container_child_field_names,
+                              container_child_fields);
+
+  if (has_static_value_) {
+    inline_field_names->emplace_back("static_value");
+    if (static_value_.isNull()) {
+      inline_field_values->emplace_back("NULL");
+    } else {
+      inline_field_values->emplace_back(type_.printValueToString(static_value_));
+    }
+  }
+
+  container_child_field_names->emplace_back("when_predicates");
+  container_child_fields->emplace_back();
+  for (const auto &predicate : when_predicates_) {
+    container_child_fields->back().emplace_back(predicate.get());
+  }
+
+  container_child_field_names->emplace_back("result_expressions");
+  container_child_fields->emplace_back();
+  for (const auto &expression : result_expressions_) {
+    container_child_fields->back().emplace_back(expression.get());
+  }
+
+  if (else_result_expression_ != nullptr) {
+    non_container_child_field_names->emplace_back("else_result_expression");
+    non_container_child_fields->emplace_back(else_result_expression_.get());
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cd01af24/expressions/scalar/ScalarCaseExpression.hpp
----------------------------------------------------------------------
diff --git a/expressions/scalar/ScalarCaseExpression.hpp b/expressions/scalar/ScalarCaseExpression.hpp
index e6809fa..f9062b1 100644
--- a/expressions/scalar/ScalarCaseExpression.hpp
+++ b/expressions/scalar/ScalarCaseExpression.hpp
@@ -22,6 +22,7 @@
 
 #include <cstddef>
 #include <memory>
+#include <string>
 #include <utility>
 #include <vector>
 
@@ -31,15 +32,13 @@
 #include "expressions/scalar/Scalar.hpp"
 #include "storage/StorageBlockInfo.hpp"
 #include "types/TypedValue.hpp"
+#include "types/containers/ColumnVector.hpp"
 #include "utility/Macros.hpp"
 
 #include "glog/logging.h"
 
 namespace quickstep {
 
-class ColumnVector;
-class IndirectColumnVector;
-class NativeColumnVector;
 class TupleIdSequence;
 class Type;
 class ValueAccessor;
@@ -132,15 +131,26 @@ class ScalarCaseExpression : public Scalar {
     }
   }
 
-  ColumnVector* getAllValues(ValueAccessor *accessor,
-                             const SubBlocksReference *sub_blocks_ref) const override;
+  ColumnVectorPtr getAllValues(ValueAccessor *accessor,
+                               const SubBlocksReference *sub_blocks_ref,
+                               ScalarCache *scalar_cache) const override;
 
-  ColumnVector* getAllValuesForJoin(
+  ColumnVectorPtr getAllValuesForJoin(
       const relation_id left_relation_id,
       ValueAccessor *left_accessor,
       const relation_id right_relation_id,
       ValueAccessor *right_accessor,
-      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids) const override;
+      const std::vector<std::pair<tuple_id, tuple_id>> &joined_tuple_ids,
+      ScalarCache *scalar_cache) const override;
+
+ protected:
+  void getFieldStringItems(
+      std::vector<std::string> *inline_field_names,
+      std::vector<std::string> *inline_field_values,
+      std::vector<std::string> *non_container_child_field_names,
+      std::vector<const Expression*> *non_container_child_fields,
+      std::vector<std::string> *container_child_field_names,
+      std::vector<std::vector<const Expression*>> *container_child_fields) const override;
 
  private:
   // Merge the values in the NativeColumnVector 'case_result' into '*output' at
@@ -158,7 +168,7 @@ class ScalarCaseExpression : public Scalar {
   static void MultiplexIndirectColumnVector(
       const TupleIdSequence *source_sequence,
       const TupleIdSequence &case_matches,
-      IndirectColumnVector *case_result,
+      const IndirectColumnVector &case_result,
       IndirectColumnVector *output);
 
   // Create and return a new ColumnVector by multiplexing the ColumnVectors
@@ -171,13 +181,13 @@ class ScalarCaseExpression : public Scalar {
   // the explicit WHEN clauses. Similarly, '*case_results' are the values
   // generated for the tuples matching each WHEN clause, and '*else_results'
   // are the values generated for the ELSE tuples.
-  ColumnVector* multiplexColumnVectors(
+  ColumnVectorPtr multiplexColumnVectors(
       const std::size_t output_size,
       const TupleIdSequence *source_sequence,
       const std::vector<std::unique_ptr<TupleIdSequence>> &case_matches,
       const TupleIdSequence &else_matches,
-      std::vector<std::unique_ptr<ColumnVector>> *case_results,
-      ColumnVector *else_result) const;
+      const std::vector<ColumnVectorPtr> &case_results,
+      const ColumnVectorPtr &else_result) const;
 
   std::vector<std::unique_ptr<Predicate>> when_predicates_;
   std::vector<std::unique_ptr<Scalar>> result_expressions_;