You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2017/04/21 21:04:48 UTC

[1/2] incubator-quickstep git commit: Updates

Repository: incubator-quickstep
Updated Branches:
  refs/heads/common-subexpression f3d695347 -> 6dc3a3b47


Updates


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/3294cf28
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/3294cf28
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/3294cf28

Branch: refs/heads/common-subexpression
Commit: 3294cf287975e782c8670b276deea4e691b86f60
Parents: f3d6953
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Fri Apr 21 13:44:10 2017 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Fri Apr 21 13:44:10 2017 -0500

----------------------------------------------------------------------
 query_optimizer/PhysicalGenerator.cpp           |   3 +-
 .../expressions/BinaryExpression.cpp            |   2 +-
 query_optimizer/expressions/UnaryExpression.cpp |   3 +-
 query_optimizer/rules/CMakeLists.txt            |   1 +
 query_optimizer/rules/CollapseSelection.cpp     |   2 +
 .../rules/ExtractCommonSubexpression.cpp        |  68 +-
 .../rules/ReuseAggregateExpressions.cpp         |   4 +-
 .../tests/execution_generator/CMakeLists.txt    |  12 +
 .../CommonSubexpression.test                    |  52 ++
 .../tests/physical_generator/CMakeLists.txt     |   4 +
 .../physical_generator/CommonSubexpression.test | 772 +++++++++++++++++++
 .../tests/physical_generator/Select.test        | 112 +--
 12 files changed, 970 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/3294cf28/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index e002ba7..d1ad58b 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -187,8 +187,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
              << physical_plan_->toString();
   }
 
-//  DVLOG(4) << "Optimized physical plan:\n" << physical_plan_->toString();
-  std::cerr << "Optimized physical plan:\n" << physical_plan_->toString();
+  DVLOG(4) << "Optimized physical plan:\n" << physical_plan_->toString();
 
   if (FLAGS_visualize_plan) {
     quickstep::PlanVisualizer plan_visualizer;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/3294cf28/query_optimizer/expressions/BinaryExpression.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/BinaryExpression.cpp b/query_optimizer/expressions/BinaryExpression.cpp
index 52e7b19..f49c6a2 100644
--- a/query_optimizer/expressions/BinaryExpression.cpp
+++ b/query_optimizer/expressions/BinaryExpression.cpp
@@ -124,7 +124,7 @@ std::size_t BinaryExpression::computeHash() const {
 bool BinaryExpression::equals(const ScalarPtr &other) const {
   BinaryExpressionPtr expr;
   if (SomeBinaryExpression::MatchesWithConditionalCast(other, &expr) &&
-      operation_.getBinaryOperationID() == expr->operation_.getBinaryOperationID()) {
+      &operation_ == &expr->operation_) {
     ScalarPtr left = left_;
     ScalarPtr right = right_;
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/3294cf28/query_optimizer/expressions/UnaryExpression.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/expressions/UnaryExpression.cpp b/query_optimizer/expressions/UnaryExpression.cpp
index b8d3c63..b448553 100644
--- a/query_optimizer/expressions/UnaryExpression.cpp
+++ b/query_optimizer/expressions/UnaryExpression.cpp
@@ -68,8 +68,7 @@ std::size_t UnaryExpression::computeHash() const {
 bool UnaryExpression::equals(const ScalarPtr &other) const {
   UnaryExpressionPtr expr;
   if (SomeUnaryExpression::MatchesWithConditionalCast(other, &expr)) {
-    return operation_.getUnaryOperationID() == expr->operation_.getUnaryOperationID()
-           && operand_->equals(expr->operand_);
+    return &operation_ == &expr->operation_ && operand_->equals(expr->operand_);
   }
   return false;
 }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/3294cf28/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index fa73612..0acd869 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -233,6 +233,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_ReorderColumns
                       quickstep_queryoptimizer_rules_Rule
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_rules_ReuseAggregateExpressions
+                      ${GFLAGS_LIB_NAME}
                       glog
                       quickstep_expressions_aggregation_AggregateFunction
                       quickstep_expressions_aggregation_AggregateFunctionFactory

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/3294cf28/query_optimizer/rules/CollapseSelection.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CollapseSelection.cpp b/query_optimizer/rules/CollapseSelection.cpp
index a03faa5..e5427b4 100644
--- a/query_optimizer/rules/CollapseSelection.cpp
+++ b/query_optimizer/rules/CollapseSelection.cpp
@@ -37,8 +37,10 @@ P::PhysicalPtr CollapseSelection::applyToNode(const P::PhysicalPtr &input) {
   P::SelectionPtr selection;
   P::SelectionPtr child_selection;
 
+  // TODO(jianqiao): Handle the case where filter predicates are present.
   if (P::SomeSelection::MatchesWithConditionalCast(input, &selection) &&
       P::SomeSelection::MatchesWithConditionalCast(selection->input(), &child_selection) &&
+      selection->filter_predicate() == nullptr &&
       child_selection->filter_predicate() == nullptr) {
     std::vector<E::NamedExpressionPtr> project_expressions =
         selection->project_expressions();

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/3294cf28/query_optimizer/rules/ExtractCommonSubexpression.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ExtractCommonSubexpression.cpp b/query_optimizer/rules/ExtractCommonSubexpression.cpp
index ad2aec1..6b51744 100644
--- a/query_optimizer/rules/ExtractCommonSubexpression.cpp
+++ b/query_optimizer/rules/ExtractCommonSubexpression.cpp
@@ -31,6 +31,8 @@
 #include "query_optimizer/expressions/PatternMatcher.hpp"
 #include "query_optimizer/expressions/Scalar.hpp"
 #include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/NestedLoopsJoin.hpp"
 #include "query_optimizer/physical/Physical.hpp"
 #include "query_optimizer/physical/PhysicalType.hpp"
 #include "query_optimizer/physical/Selection.hpp"
@@ -82,21 +84,6 @@ P::PhysicalPtr ExtractCommonSubexpression::applyInternal(
           : input->copyWithNewChildren(new_children);
 
   switch (node->getPhysicalType()) {
-    case P::PhysicalType::kSelection: {
-      const P::SelectionPtr selection =
-          std::static_pointer_cast<const P::Selection>(node);
-
-      const std::vector<E::NamedExpressionPtr> new_expressions =
-          DownCast<E::NamedExpression>(
-              transformExpressions(UpCast(selection->project_expressions())));
-
-      if (new_expressions != selection->project_expressions()) {
-        return P::Selection::Create(selection->input(),
-                                    new_expressions,
-                                    selection->filter_predicate());
-      }
-      break;
-    }
     case P::PhysicalType::kAggregate: {
       const P::AggregatePtr aggregate =
           std::static_pointer_cast<const P::Aggregate>(node);
@@ -131,6 +118,57 @@ P::PhysicalPtr ExtractCommonSubexpression::applyInternal(
       }
       break;
     }
+    case P::PhysicalType::kSelection: {
+      const P::SelectionPtr selection =
+          std::static_pointer_cast<const P::Selection>(node);
+
+      const std::vector<E::NamedExpressionPtr> new_expressions =
+          DownCast<E::NamedExpression>(
+              transformExpressions(UpCast(selection->project_expressions())));
+
+      if (new_expressions != selection->project_expressions()) {
+        return P::Selection::Create(selection->input(),
+                                    new_expressions,
+                                    selection->filter_predicate());
+      }
+      break;
+    }
+    case P::PhysicalType::kHashJoin: {
+      const P::HashJoinPtr hash_join =
+          std::static_pointer_cast<const P::HashJoin>(node);
+
+      const std::vector<E::NamedExpressionPtr> new_expressions =
+          DownCast<E::NamedExpression>(
+              transformExpressions(UpCast(hash_join->project_expressions())));
+
+      if (new_expressions != hash_join->project_expressions()) {
+        return P::HashJoin::Create(hash_join->left(),
+                                   hash_join->right(),
+                                   hash_join->left_join_attributes(),
+                                   hash_join->right_join_attributes(),
+                                   hash_join->residual_predicate(),
+                                   new_expressions,
+                                   hash_join->join_type());
+      }
+      break;
+
+    }
+    case P::PhysicalType::kNestedLoopsJoin: {
+      const P::NestedLoopsJoinPtr nested_loops_join =
+          std::static_pointer_cast<const P::NestedLoopsJoin>(node);
+
+      const std::vector<E::NamedExpressionPtr> new_expressions =
+          DownCast<E::NamedExpression>(
+              transformExpressions(UpCast(nested_loops_join->project_expressions())));
+
+      if (new_expressions != nested_loops_join->project_expressions()) {
+        return P::NestedLoopsJoin::Create(nested_loops_join->left(),
+                                          nested_loops_join->right(),
+                                          nested_loops_join->join_predicate(),
+                                          new_expressions);
+      }
+      break;
+    }
     default:
       break;
   }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/3294cf28/query_optimizer/rules/ReuseAggregateExpressions.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReuseAggregateExpressions.cpp b/query_optimizer/rules/ReuseAggregateExpressions.cpp
index e63d8f6..7903605 100644
--- a/query_optimizer/rules/ReuseAggregateExpressions.cpp
+++ b/query_optimizer/rules/ReuseAggregateExpressions.cpp
@@ -56,12 +56,12 @@
 namespace quickstep {
 namespace optimizer {
 
-DEFINE_double(reuse_aggregate_group_size_threshold, 100000u,
+DEFINE_uint64(reuse_aggregate_group_size_threshold, 1000u,
               "The threshold on estimated number of groups for an aggregation "
               "below which the ReuseAggregateExpressions optimization will be "
               "performed.");
 
-DEFINE_uint64(reuse_aggregate_ratio_threshold, 0.3,
+DEFINE_double(reuse_aggregate_ratio_threshold, 0.3,
               "The threshold on the ratio of (# of eliminable columns) to "
               "(# of original columns) for an aggregation above which the "
               "ReuseAggregateExpressions optimization will be performed.");

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/3294cf28/query_optimizer/tests/execution_generator/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/execution_generator/CMakeLists.txt b/query_optimizer/tests/execution_generator/CMakeLists.txt
index 40629ee..595d09d 100644
--- a/query_optimizer/tests/execution_generator/CMakeLists.txt
+++ b/query_optimizer/tests/execution_generator/CMakeLists.txt
@@ -15,6 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
+add_test(quickstep_queryoptimizer_tests_executiongenerator_commonsubexpression
+         "../quickstep_queryoptimizer_tests_ExecutionGeneratorTest"
+         "${CMAKE_CURRENT_SOURCE_DIR}/CommonSubexpression.test"
+         "${CMAKE_CURRENT_BINARY_DIR}/CommonSubexpression.test"
+         "${CMAKE_CURRENT_BINARY_DIR}/CommonSubexpression/")
 add_test(quickstep_queryoptimizer_tests_executiongenerator_create
          "../quickstep_queryoptimizer_tests_ExecutionGeneratorTest"
          "${CMAKE_CURRENT_SOURCE_DIR}/Create.test"
@@ -77,6 +82,11 @@ add_test(quickstep_queryoptimizer_tests_executiongenerator_update
          "${CMAKE_CURRENT_BINARY_DIR}/Update/")
 
 if (ENABLE_DISTRIBUTED)
+  add_test(quickstep_queryoptimizer_tests_executiongenerator_commonsubexpression_distributed
+           "../quickstep_queryoptimizer_tests_DistributedExecutionGeneratorTest"
+           "${CMAKE_CURRENT_SOURCE_DIR}/CommonSubexpression.test"
+           "${CMAKE_CURRENT_BINARY_DIR}/CommonSubexpressionDistributed.test"
+           "${CMAKE_CURRENT_BINARY_DIR}/CommonSubexpressionDistributed/")
   add_test(quickstep_queryoptimizer_tests_executiongenerator_create_distributed
            "../quickstep_queryoptimizer_tests_DistributedExecutionGeneratorTest"
            "${CMAKE_CURRENT_SOURCE_DIR}/Create.test"
@@ -141,6 +151,7 @@ endif(ENABLE_DISTRIBUTED)
 
 # Create the folders where the unit tests will store their data blocks for the
 # duration of their test.
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/CommonSubexpression)
 file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Create)
 file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Delete)
 file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Distinct)
@@ -155,6 +166,7 @@ file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/TableGenerator)
 file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Update)
 
 if (ENABLE_DISTRIBUTED)
+  file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/CommonSubexpressionDistributed)
   file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/CreateDistributed)
   file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/DeleteDistributed)
   file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/DistinctDistributed)

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/3294cf28/query_optimizer/tests/execution_generator/CommonSubexpression.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/execution_generator/CommonSubexpression.test b/query_optimizer/tests/execution_generator/CommonSubexpression.test
new file mode 100644
index 0000000..e0b5e2d
--- /dev/null
+++ b/query_optimizer/tests/execution_generator/CommonSubexpression.test
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+SELECT int_col+1, 1+int_col
+FROM test
+ORDER BY int_col
+LIMIT 5;
+--
++-----------+-----------+
+|(int_col+1)|(1+int_col)|
++-----------+-----------+
+|        -22|        -22|
+|        -20|        -20|
+|        -18|        -18|
+|        -16|        -16|
+|        -14|        -14|
++-----------+-----------+
+==
+
+SELECT SUM(int_col+long_col), SUM((long_col+int_col)*2)
+FROM test;
+--
++-----------------------+---------------------------+
+|SUM((int_col+long_col))|SUM(((long_col+int_col)*2))|
++-----------------------+---------------------------+
+|                   4382|                       8764|
++-----------------------+---------------------------+
+==
+
+SELECT SUM(long_col+1), SUM(long_col+1)*2, AVG(long_col+1), AVG(long_col+1)*2, COUNT(*)
+FROM test;
+--
++--------------------+---------------------+--------------------+---------------------+--------------------+
+|SUM((long_col+1))   |(SUM((long_col+1))*2)|AVG((long_col+1))   |(AVG((long_col+1))*2)|COUNT(*)            |
++--------------------+---------------------+--------------------+---------------------+--------------------+
+|                4925|                 9850|                 197|                  394|                  25|
++--------------------+---------------------+--------------------+---------------------+--------------------+
+==

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/3294cf28/query_optimizer/tests/physical_generator/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/physical_generator/CMakeLists.txt b/query_optimizer/tests/physical_generator/CMakeLists.txt
index c752cdd..fc3390e 100644
--- a/query_optimizer/tests/physical_generator/CMakeLists.txt
+++ b/query_optimizer/tests/physical_generator/CMakeLists.txt
@@ -15,6 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
+add_test(quickstep_queryoptimizer_tests_physicalgenerator_commonsubexpression
+         "../quickstep_queryoptimizer_tests_OptimizerTextTest"
+         "${CMAKE_CURRENT_SOURCE_DIR}/CommonSubexpression.test"
+         "${CMAKE_CURRENT_BINARY_DIR}/CommonSubexpression.test")
 add_test(quickstep_queryoptimizer_tests_physicalgenerator_copy
          "../quickstep_queryoptimizer_tests_OptimizerTextTest"
          "${CMAKE_CURRENT_SOURCE_DIR}/Copy.test"

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/3294cf28/query_optimizer/tests/physical_generator/CommonSubexpression.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/physical_generator/CommonSubexpression.test b/query_optimizer/tests/physical_generator/CommonSubexpression.test
new file mode 100644
index 0000000..b23a97d
--- /dev/null
+++ b/query_optimizer/tests/physical_generator/CommonSubexpression.test
@@ -0,0 +1,772 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[default optimized_logical_plan physical_plan]
+
+SELECT int_col+1, 1+int_col
+FROM test;
+--
+[Optimized Logical Plan]
+TopLevelPlan
++-plan=Project
+| +-input=TableReference[relation_name=Test,relation_alias=test]
+| | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | +-AttributeReference[id=5,name=vchar_col,relation=test,type=VarChar(20) NULL]
+| +-project_list=
+|   +-Alias[id=6,name=,alias=(int_col+1),relation=,type=Int NULL]
+|   | +-Add
+|   |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |   +-Literal[value=1,type=Int]
+|   +-Alias[id=7,name=,alias=(1+int_col),relation=,type=Int NULL]
+|     +-Add
+|       +-Literal[value=1,type=Int]
+|       +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=6,name=,alias=(int_col+1),relation=,type=Int NULL]
+  +-AttributeReference[id=7,name=,alias=(1+int_col),relation=,type=Int NULL]
+[Physical Plan]
+TopLevelPlan
++-plan=Selection
+| +-input=TableReference[relation=Test,alias=test]
+| | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | +-AttributeReference[id=5,name=vchar_col,relation=test,type=VarChar(20) NULL]
+| +-project_expressions=
+|   +-Alias[id=6,name=,alias=(int_col+1),relation=,type=Int NULL]
+|   | +-CommonSubexpression[common_subexpression_id=8]
+|   |   +-Operand=Add
+|   |     +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |     +-Literal[value=1,type=Int]
+|   +-Alias[id=7,name=,alias=(1+int_col),relation=,type=Int NULL]
+|     +-CommonSubexpression[common_subexpression_id=8]
+|       +-Operand=Add
+|         +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|         +-Literal[value=1,type=Int]
++-output_attributes=
+  +-AttributeReference[id=6,name=,alias=(int_col+1),relation=,type=Int NULL]
+  +-AttributeReference[id=7,name=,alias=(1+int_col),relation=,type=Int NULL]
+==
+
+# Aggregate
+SELECT SUM(int_col+float_col), MIN((float_col+int_col)*2)
+FROM test;
+--
+[Optimized Logical Plan]
+TopLevelPlan
++-plan=Project
+| +-input=Aggregate
+| | +-input=TableReference[relation_name=Test,relation_alias=test]
+| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | |   type=VarChar(20) NULL]
+| | +-grouping_expressions=
+| | | +-[]
+| | +-aggregate_expressions=
+| |   +-Alias[id=6,name=,alias=$aggregate0,relation=$aggregate,type=Double NULL]
+| |   | +-AggregateFunction[function=SUM]
+| |   |   +-Add
+| |   |     +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| |   |     +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| |   +-Alias[id=7,name=,alias=$aggregate1,relation=$aggregate,type=Float NULL]
+| |     +-AggregateFunction[function=MIN]
+| |       +-Multiply
+| |         +-Add
+| |         | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| |         | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| |         +-Literal[value=2,type=Int]
+| +-project_list=
+|   +-Alias[id=6,name=,alias=SUM((int_col+float_col)),relation=,type=Double NULL]
+|   | +-AttributeReference[id=6,name=,alias=$aggregate0,relation=$aggregate,
+|   |   type=Double NULL]
+|   +-Alias[id=7,name=,alias=MIN(((float_col+int_col)*2)),relation=,
+|     type=Float NULL]
+|     +-AttributeReference[id=7,name=,alias=$aggregate1,relation=$aggregate,
+|       type=Float NULL]
++-output_attributes=
+  +-AttributeReference[id=6,name=,alias=SUM((int_col+float_col)),relation=,
+  | type=Double NULL]
+  +-AttributeReference[id=7,name=,alias=MIN(((float_col+int_col)*2)),relation=,
+    type=Float NULL]
+[Physical Plan]
+TopLevelPlan
++-plan=Selection
+| +-input=Aggregate
+| | +-input=TableReference[relation=Test,alias=test]
+| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | |   type=VarChar(20) NULL]
+| | +-grouping_expressions=
+| | | +-[]
+| | +-aggregate_expressions=
+| |   +-Alias[id=6,name=,alias=$aggregate0,relation=$aggregate,type=Double NULL]
+| |   | +-AggregateFunction[function=SUM]
+| |   |   +-CommonSubexpression[common_subexpression_id=8]
+| |   |     +-Operand=Add
+| |   |       +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| |   |       +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| |   +-Alias[id=7,name=,alias=$aggregate1,relation=$aggregate,type=Float NULL]
+| |     +-AggregateFunction[function=MIN]
+| |       +-Multiply
+| |         +-CommonSubexpression[common_subexpression_id=8]
+| |         | +-Operand=Add
+| |         |   +-AttributeReference[id=0,name=int_col,relation=test,
+| |         |   | type=Int NULL]
+| |         |   +-AttributeReference[id=2,name=float_col,relation=test,
+| |         |     type=Float]
+| |         +-Literal[value=2,type=Int]
+| +-project_expressions=
+|   +-Alias[id=6,name=,alias=SUM((int_col+float_col)),relation=,type=Double NULL]
+|   | +-AttributeReference[id=6,name=,alias=$aggregate0,relation=$aggregate,
+|   |   type=Double NULL]
+|   +-Alias[id=7,name=,alias=MIN(((float_col+int_col)*2)),relation=,
+|     type=Float NULL]
+|     +-AttributeReference[id=7,name=,alias=$aggregate1,relation=$aggregate,
+|       type=Float NULL]
++-output_attributes=
+  +-AttributeReference[id=6,name=,alias=SUM((int_col+float_col)),relation=,
+  | type=Double NULL]
+  +-AttributeReference[id=7,name=,alias=MIN(((float_col+int_col)*2)),relation=,
+    type=Float NULL]
+==
+
+# HashJoin
+SELECT int_col + j, (int_col + j) * float_col
+FROM test, (SELECT i, i * i AS j FROM generate_series(1, 10) AS g(i)) t
+WHERE int_col = i AND (int_col + i) < float_col;
+--
+[Optimized Logical Plan]
+TopLevelPlan
++-plan=Project
+| +-input=Filter
+| | +-input=HashJoin
+| | | +-left=TableReference[relation_name=Test,relation_alias=test]
+| | | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | | |   type=VarChar(20) NULL]
+| | | +-right=Project
+| | | | +-input=TableGenerator[function_name=generate_series,table_alias=g]
+| | | | | +-AttributeReference[id=6,name=generate_series,alias=g,
+| | | | |   relation=generate_series,type=Int]
+| | | | +-project_list=
+| | | |   +-Alias[id=7,name=i,relation=,type=Int]
+| | | |   | +-AttributeReference[id=6,name=generate_series,alias=g,
+| | | |   |   relation=generate_series,type=Int]
+| | | |   +-Alias[id=8,name=j,relation=t,type=Int]
+| | | |     +-Multiply
+| | | |       +-AttributeReference[id=6,name=generate_series,alias=g,
+| | | |       | relation=generate_series,type=Int]
+| | | |       +-AttributeReference[id=6,name=generate_series,alias=g,
+| | | |         relation=generate_series,type=Int]
+| | | +-left_join_attributes=
+| | | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-right_join_attributes=
+| | |   +-AttributeReference[id=7,name=i,relation=,type=Int]
+| | +-filter_predicate=Less
+| |   +-Add
+| |   | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| |   | +-AttributeReference[id=7,name=i,relation=,type=Int]
+| |   +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| +-project_list=
+|   +-Alias[id=9,name=,alias=(int_col+j),relation=,type=Int NULL]
+|   | +-Add
+|   |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |   +-AttributeReference[id=8,name=j,relation=t,type=Int]
+|   +-Alias[id=10,name=,alias=((int_col+j)*float_col),relation=,type=Float NULL]
+|     +-Multiply
+|       +-Add
+|       | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|       | +-AttributeReference[id=8,name=j,relation=t,type=Int]
+|       +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
++-output_attributes=
+  +-AttributeReference[id=9,name=,alias=(int_col+j),relation=,type=Int NULL]
+  +-AttributeReference[id=10,name=,alias=((int_col+j)*float_col),relation=,
+    type=Float NULL]
+[Physical Plan]
+TopLevelPlan
++-plan=HashJoin
+| +-left=TableGenerator[function_name=generate_series,table_alias=g]
+| | +-AttributeReference[id=6,name=generate_series,alias=g,
+| |   relation=generate_series,type=Int]
+| +-right=TableReference[relation=Test,alias=test]
+| | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | +-AttributeReference[id=5,name=vchar_col,relation=test,type=VarChar(20) NULL]
+| +-residual_predicate=Less
+| | +-Add
+| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-AttributeReference[id=6,name=generate_series,alias=g,
+| | |   relation=generate_series,type=Int]
+| | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| +-project_expressions=
+| | +-Alias[id=9,name=,alias=(int_col+j),relation=,type=Int NULL]
+| | | +-CommonSubexpression[common_subexpression_id=11]
+| | |   +-Operand=Add
+| | |     +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | |     +-Multiply
+| | |       +-AttributeReference[id=6,name=generate_series,alias=g,
+| | |       | relation=generate_series,type=Int]
+| | |       +-AttributeReference[id=6,name=generate_series,alias=g,
+| | |         relation=generate_series,type=Int]
+| | +-Alias[id=10,name=,alias=((int_col+j)*float_col),relation=,type=Float NULL]
+| |   +-Multiply
+| |     +-CommonSubexpression[common_subexpression_id=11]
+| |     | +-Operand=Add
+| |     |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| |     |   +-Multiply
+| |     |     +-AttributeReference[id=6,name=generate_series,alias=g,
+| |     |     | relation=generate_series,type=Int]
+| |     |     +-AttributeReference[id=6,name=generate_series,alias=g,
+| |     |       relation=generate_series,type=Int]
+| |     +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| +-left_join_attributes=
+| | +-AttributeReference[id=6,name=generate_series,alias=g,
+| |   relation=generate_series,type=Int]
+| +-right_join_attributes=
+|   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
++-output_attributes=
+  +-AttributeReference[id=9,name=,alias=(int_col+j),relation=,type=Int NULL]
+  +-AttributeReference[id=10,name=,alias=((int_col+j)*float_col),relation=,
+    type=Float NULL]
+==
+
+# NestedLoopsJoin
+SELECT int_col + j, (int_col + j) * float_col
+FROM test, (SELECT i, i * i AS j FROM generate_series(1, 10) AS g(i)) t
+WHERE (int_col + i) < float_col;
+--
+[Optimized Logical Plan]
+TopLevelPlan
++-plan=Project
+| +-input=NestedLoopsJoin
+| | +-left=TableReference[relation_name=Test,relation_alias=test]
+| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | |   type=VarChar(20) NULL]
+| | +-right=Project
+| | | +-input=TableGenerator[function_name=generate_series,table_alias=g]
+| | | | +-AttributeReference[id=6,name=generate_series,alias=g,
+| | | |   relation=generate_series,type=Int]
+| | | +-project_list=
+| | |   +-Alias[id=7,name=i,relation=,type=Int]
+| | |   | +-AttributeReference[id=6,name=generate_series,alias=g,
+| | |   |   relation=generate_series,type=Int]
+| | |   +-Alias[id=8,name=j,relation=t,type=Int]
+| | |     +-Multiply
+| | |       +-AttributeReference[id=6,name=generate_series,alias=g,
+| | |       | relation=generate_series,type=Int]
+| | |       +-AttributeReference[id=6,name=generate_series,alias=g,
+| | |         relation=generate_series,type=Int]
+| | +-join_predicate=Less
+| |   +-Add
+| |   | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| |   | +-AttributeReference[id=7,name=i,relation=,type=Int]
+| |   +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| +-project_list=
+|   +-Alias[id=9,name=,alias=(int_col+j),relation=,type=Int NULL]
+|   | +-Add
+|   |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |   +-AttributeReference[id=8,name=j,relation=t,type=Int]
+|   +-Alias[id=10,name=,alias=((int_col+j)*float_col),relation=,type=Float NULL]
+|     +-Multiply
+|       +-Add
+|       | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|       | +-AttributeReference[id=8,name=j,relation=t,type=Int]
+|       +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
++-output_attributes=
+  +-AttributeReference[id=9,name=,alias=(int_col+j),relation=,type=Int NULL]
+  +-AttributeReference[id=10,name=,alias=((int_col+j)*float_col),relation=,
+    type=Float NULL]
+[Physical Plan]
+TopLevelPlan
++-plan=NestedLoopsJoin
+| +-left=TableReference[relation=Test,alias=test]
+| | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | +-AttributeReference[id=5,name=vchar_col,relation=test,type=VarChar(20) NULL]
+| +-right=TableGenerator[function_name=generate_series,table_alias=g]
+| | +-AttributeReference[id=6,name=generate_series,alias=g,
+| |   relation=generate_series,type=Int]
+| +-join_predicate=Less
+| | +-Add
+| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-AttributeReference[id=6,name=generate_series,alias=g,
+| | |   relation=generate_series,type=Int]
+| | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| +-project_expressions=
+|   +-Alias[id=9,name=,alias=(int_col+j),relation=,type=Int NULL]
+|   | +-CommonSubexpression[common_subexpression_id=11]
+|   |   +-Operand=Add
+|   |     +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |     +-Multiply
+|   |       +-AttributeReference[id=6,name=generate_series,alias=g,
+|   |       | relation=generate_series,type=Int]
+|   |       +-AttributeReference[id=6,name=generate_series,alias=g,
+|   |         relation=generate_series,type=Int]
+|   +-Alias[id=10,name=,alias=((int_col+j)*float_col),relation=,type=Float NULL]
+|     +-Multiply
+|       +-CommonSubexpression[common_subexpression_id=11]
+|       | +-Operand=Add
+|       |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|       |   +-Multiply
+|       |     +-AttributeReference[id=6,name=generate_series,alias=g,
+|       |     | relation=generate_series,type=Int]
+|       |     +-AttributeReference[id=6,name=generate_series,alias=g,
+|       |       relation=generate_series,type=Int]
+|       +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
++-output_attributes=
+  +-AttributeReference[id=9,name=,alias=(int_col+j),relation=,type=Int NULL]
+  +-AttributeReference[id=10,name=,alias=((int_col+j)*float_col),relation=,
+    type=Float NULL]
+==
+
+# Case expression
+SELECT long_col+1,
+       CASE WHEN int_col = 1 THEN (long_col+1)*(long_col+1)
+            WHEN int_col = 2 THEN (long_col+1)*(long_col+1)
+            ELSE long_col+1 END AS result
+FROM test;
+--
+[Optimized Logical Plan]
+TopLevelPlan
++-plan=Project
+| +-input=TableReference[relation_name=Test,relation_alias=test]
+| | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | +-AttributeReference[id=5,name=vchar_col,relation=test,type=VarChar(20) NULL]
+| +-project_list=
+|   +-Alias[id=6,name=,alias=(long_col+1),relation=,type=Long]
+|   | +-Add
+|   |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+|   |   +-Literal[value=1,type=Int]
+|   +-Alias[id=7,name=result,relation=,type=Long]
+|     +-SearchedCase
+|       +-else_result_expression=Add
+|       | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+|       | +-Literal[value=1,type=Long]
+|       +-condition_perdicates=
+|       | +-Equal
+|       | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|       | | +-Literal[value=1,type=Int]
+|       | +-Equal
+|       |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|       |   +-Literal[value=2,type=Int]
+|       +-conditional_result_expressions=
+|         +-Multiply
+|         | +-Add
+|         | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+|         | | +-Literal[value=1,type=Int]
+|         | +-Add
+|         |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+|         |   +-Literal[value=1,type=Int]
+|         +-Multiply
+|           +-Add
+|           | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+|           | +-Literal[value=1,type=Int]
+|           +-Add
+|             +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+|             +-Literal[value=1,type=Int]
++-output_attributes=
+  +-AttributeReference[id=6,name=,alias=(long_col+1),relation=,type=Long]
+  +-AttributeReference[id=7,name=result,relation=,type=Long]
+[Physical Plan]
+TopLevelPlan
++-plan=Selection
+| +-input=TableReference[relation=Test,alias=test]
+| | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | +-AttributeReference[id=5,name=vchar_col,relation=test,type=VarChar(20) NULL]
+| +-project_expressions=
+|   +-Alias[id=6,name=,alias=(long_col+1),relation=,type=Long]
+|   | +-Add
+|   |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+|   |   +-Literal[value=1,type=Int]
+|   +-Alias[id=7,name=result,relation=,type=Long]
+|     +-SearchedCase
+|       +-else_result_expression=Add
+|       | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+|       | +-Literal[value=1,type=Long]
+|       +-condition_perdicates=
+|       | +-Equal
+|       | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|       | | +-Literal[value=1,type=Int]
+|       | +-Equal
+|       |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|       |   +-Literal[value=2,type=Int]
+|       +-conditional_result_expressions=
+|         +-Multiply
+|         | +-CommonSubexpression[common_subexpression_id=8]
+|         | | +-Operand=Add
+|         | |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+|         | |   +-Literal[value=1,type=Int]
+|         | +-CommonSubexpression[common_subexpression_id=8]
+|         |   +-Operand=Add
+|         |     +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+|         |     +-Literal[value=1,type=Int]
+|         +-Multiply
+|           +-CommonSubexpression[common_subexpression_id=9]
+|           | +-Operand=Add
+|           |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+|           |   +-Literal[value=1,type=Int]
+|           +-CommonSubexpression[common_subexpression_id=9]
+|             +-Operand=Add
+|               +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+|               +-Literal[value=1,type=Int]
++-output_attributes=
+  +-AttributeReference[id=6,name=,alias=(long_col+1),relation=,type=Long]
+  +-AttributeReference[id=7,name=result,relation=,type=Long]
+==
+
+SELECT (int_col+1)*(int_col+2)*(int_col+3), (int_col+1)*(int_col+2), int_col+1
+FROM test;
+--
+[Optimized Logical Plan]
+TopLevelPlan
++-plan=Project
+| +-input=TableReference[relation_name=Test,relation_alias=test]
+| | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | +-AttributeReference[id=5,name=vchar_col,relation=test,type=VarChar(20) NULL]
+| +-project_list=
+|   +-Alias[id=6,name=,alias=(((int_col+1)*(int_col+2))*(int_col+3)),relation=,
+|   | type=Int NULL]
+|   | +-Multiply
+|   |   +-Multiply
+|   |   | +-Add
+|   |   | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |   | | +-Literal[value=1,type=Int]
+|   |   | +-Add
+|   |   |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |   |   +-Literal[value=2,type=Int]
+|   |   +-Add
+|   |     +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |     +-Literal[value=3,type=Int]
+|   +-Alias[id=7,name=,alias=((int_col+1)*(int_col+2)),relation=,type=Int NULL]
+|   | +-Multiply
+|   |   +-Add
+|   |   | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |   | +-Literal[value=1,type=Int]
+|   |   +-Add
+|   |     +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |     +-Literal[value=2,type=Int]
+|   +-Alias[id=8,name=,alias=(int_col+1),relation=,type=Int NULL]
+|     +-Add
+|       +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|       +-Literal[value=1,type=Int]
++-output_attributes=
+  +-AttributeReference[id=6,name=,alias=(((int_col+1)*(int_col+2))*(int_col+3)),
+  | relation=,type=Int NULL]
+  +-AttributeReference[id=7,name=,alias=((int_col+1)*(int_col+2)),relation=,
+  | type=Int NULL]
+  +-AttributeReference[id=8,name=,alias=(int_col+1),relation=,type=Int NULL]
+[Physical Plan]
+TopLevelPlan
++-plan=Selection
+| +-input=TableReference[relation=Test,alias=test]
+| | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | +-AttributeReference[id=5,name=vchar_col,relation=test,type=VarChar(20) NULL]
+| +-project_expressions=
+|   +-Alias[id=6,name=,alias=(((int_col+1)*(int_col+2))*(int_col+3)),relation=,
+|   | type=Int NULL]
+|   | +-Multiply
+|   |   +-CommonSubexpression[common_subexpression_id=10]
+|   |   | +-Operand=Multiply
+|   |   |   +-CommonSubexpression[common_subexpression_id=9]
+|   |   |   | +-Operand=Add
+|   |   |   |   +-AttributeReference[id=0,name=int_col,relation=test,
+|   |   |   |   | type=Int NULL]
+|   |   |   |   +-Literal[value=1,type=Int]
+|   |   |   +-Add
+|   |   |     +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |   |     +-Literal[value=2,type=Int]
+|   |   +-Add
+|   |     +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |     +-Literal[value=3,type=Int]
+|   +-Alias[id=7,name=,alias=((int_col+1)*(int_col+2)),relation=,type=Int NULL]
+|   | +-CommonSubexpression[common_subexpression_id=10]
+|   |   +-Operand=Multiply
+|   |     +-CommonSubexpression[common_subexpression_id=9]
+|   |     | +-Operand=Add
+|   |     |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |     |   +-Literal[value=1,type=Int]
+|   |     +-Add
+|   |       +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|   |       +-Literal[value=2,type=Int]
+|   +-Alias[id=8,name=,alias=(int_col+1),relation=,type=Int NULL]
+|     +-CommonSubexpression[common_subexpression_id=9]
+|       +-Operand=Add
+|         +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+|         +-Literal[value=1,type=Int]
++-output_attributes=
+  +-AttributeReference[id=6,name=,alias=(((int_col+1)*(int_col+2))*(int_col+3)),
+  | relation=,type=Int NULL]
+  +-AttributeReference[id=7,name=,alias=((int_col+1)*(int_col+2)),relation=,
+  | type=Int NULL]
+  +-AttributeReference[id=8,name=,alias=(int_col+1),relation=,type=Int NULL]
+==
+
+# Reuse aggregate expressions
+SELECT SUM(long_col+1), AVG(1+long_col), MIN(float_col), MIN(float_col)*2, COUNT(*)
+FROM test;
+--
+[Optimized Logical Plan]
+TopLevelPlan
++-plan=Project
+| +-input=Aggregate
+| | +-input=TableReference[relation_name=Test,relation_alias=test]
+| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | |   type=VarChar(20) NULL]
+| | +-grouping_expressions=
+| | | +-[]
+| | +-aggregate_expressions=
+| |   +-Alias[id=6,name=,alias=$aggregate0,relation=$aggregate,type=Long NULL]
+| |   | +-AggregateFunction[function=SUM]
+| |   |   +-Add
+| |   |     +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |   |     +-Literal[value=1,type=Int]
+| |   +-Alias[id=7,name=,alias=$aggregate1,relation=$aggregate,type=Double NULL]
+| |   | +-AggregateFunction[function=AVG]
+| |   |   +-Add
+| |   |     +-Literal[value=1,type=Int]
+| |   |     +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |   +-Alias[id=8,name=,alias=$aggregate2,relation=$aggregate,type=Float NULL]
+| |   | +-AggregateFunction[function=MIN]
+| |   |   +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| |   +-Alias[id=9,name=,alias=$aggregate3,relation=$aggregate,type=Float NULL]
+| |   | +-AggregateFunction[function=MIN]
+| |   |   +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| |   +-Alias[id=11,name=,alias=$aggregate4,relation=$aggregate,type=Long]
+| |     +-AggregateFunction[function=COUNT]
+| |       +-[]
+| +-project_list=
+|   +-Alias[id=6,name=,alias=SUM((long_col+1)),relation=,type=Long NULL]
+|   | +-AttributeReference[id=6,name=,alias=$aggregate0,relation=$aggregate,
+|   |   type=Long NULL]
+|   +-Alias[id=7,name=,alias=AVG((1+long_col)),relation=,type=Double NULL]
+|   | +-AttributeReference[id=7,name=,alias=$aggregate1,relation=$aggregate,
+|   |   type=Double NULL]
+|   +-Alias[id=8,name=,alias=MIN(float_col),relation=,type=Float NULL]
+|   | +-AttributeReference[id=8,name=,alias=$aggregate2,relation=$aggregate,
+|   |   type=Float NULL]
+|   +-Alias[id=10,name=,alias=(MIN(float_col)*2),relation=,type=Float NULL]
+|   | +-Multiply
+|   |   +-AttributeReference[id=9,name=,alias=$aggregate3,relation=$aggregate,
+|   |   | type=Float NULL]
+|   |   +-Literal[value=2,type=Int]
+|   +-Alias[id=11,name=,alias=COUNT(*),relation=,type=Long]
+|     +-AttributeReference[id=11,name=,alias=$aggregate4,relation=$aggregate,
+|       type=Long]
++-output_attributes=
+  +-AttributeReference[id=6,name=,alias=SUM((long_col+1)),relation=,
+  | type=Long NULL]
+  +-AttributeReference[id=7,name=,alias=AVG((1+long_col)),relation=,
+  | type=Double NULL]
+  +-AttributeReference[id=8,name=,alias=MIN(float_col),relation=,type=Float NULL]
+  +-AttributeReference[id=10,name=,alias=(MIN(float_col)*2),relation=,
+  | type=Float NULL]
+  +-AttributeReference[id=11,name=,alias=COUNT(*),relation=,type=Long]
+[Physical Plan]
+TopLevelPlan
++-plan=Selection
+| +-input=Aggregate
+| | +-input=TableReference[relation=Test,alias=test]
+| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | |   type=VarChar(20) NULL]
+| | +-grouping_expressions=
+| | | +-[]
+| | +-aggregate_expressions=
+| |   +-Alias[id=6,name=,alias=$aggregate0,relation=$aggregate,type=Long NULL]
+| |   | +-AggregateFunction[function=SUM]
+| |   |   +-Add
+| |   |     +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |   |     +-Literal[value=1,type=Int]
+| |   +-Alias[id=8,name=,alias=$aggregate2,relation=$aggregate,type=Float NULL]
+| |   | +-AggregateFunction[function=MIN]
+| |   |   +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| |   +-Alias[id=11,name=,alias=$aggregate4,relation=$aggregate,type=Long]
+| |     +-AggregateFunction[function=COUNT]
+| |       +-[]
+| +-project_expressions=
+|   +-Alias[id=6,name=,alias=SUM((long_col+1)),relation=,type=Long NULL]
+|   | +-AttributeReference[id=6,name=,alias=$aggregate0,relation=$aggregate,
+|   |   type=Long NULL]
+|   +-Alias[id=7,name=,alias=AVG((1+long_col)),relation=,type=Long NULL]
+|   | +-Divide
+|   |   +-AttributeReference[id=6,name=,alias=$aggregate0,relation=$aggregate,
+|   |   | type=Long NULL]
+|   |   +-AttributeReference[id=11,name=,alias=$aggregate4,relation=$aggregate,
+|   |     type=Long]
+|   +-Alias[id=8,name=,alias=MIN(float_col),relation=,type=Float NULL]
+|   | +-AttributeReference[id=8,name=,alias=$aggregate2,relation=$aggregate,
+|   |   type=Float NULL]
+|   +-Alias[id=10,name=,alias=(MIN(float_col)*2),relation=,type=Float NULL]
+|   | +-Multiply
+|   |   +-AttributeReference[id=8,name=,alias=$aggregate2,relation=$aggregate,
+|   |   | type=Float NULL]
+|   |   +-Literal[value=2,type=Int]
+|   +-Alias[id=11,name=,alias=COUNT(*),relation=,type=Long]
+|     +-AttributeReference[id=11,name=,alias=$aggregate4,relation=$aggregate,
+|       type=Long]
++-output_attributes=
+  +-AttributeReference[id=6,name=,alias=SUM((long_col+1)),relation=,
+  | type=Long NULL]
+  +-AttributeReference[id=7,name=,alias=AVG((1+long_col)),relation=,
+  | type=Long NULL]
+  +-AttributeReference[id=8,name=,alias=MIN(float_col),relation=,type=Float NULL]
+  +-AttributeReference[id=10,name=,alias=(MIN(float_col)*2),relation=,
+  | type=Float NULL]
+  +-AttributeReference[id=11,name=,alias=COUNT(*),relation=,type=Long]
+==
+
+SELECT SUM(long_col+1)+2, (SUM(long_col+1)+2)*3
+FROM test;
+--
+[Optimized Logical Plan]
+TopLevelPlan
++-plan=Project
+| +-input=Aggregate
+| | +-input=TableReference[relation_name=Test,relation_alias=test]
+| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | |   type=VarChar(20) NULL]
+| | +-grouping_expressions=
+| | | +-[]
+| | +-aggregate_expressions=
+| |   +-Alias[id=6,name=,alias=$aggregate0,relation=$aggregate,type=Long NULL]
+| |   | +-AggregateFunction[function=SUM]
+| |   |   +-Add
+| |   |     +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |   |     +-Literal[value=1,type=Int]
+| |   +-Alias[id=8,name=,alias=$aggregate1,relation=$aggregate,type=Long NULL]
+| |     +-AggregateFunction[function=SUM]
+| |       +-Add
+| |         +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |         +-Literal[value=1,type=Int]
+| +-project_list=
+|   +-Alias[id=7,name=,alias=(SUM((long_col+1))+2),relation=,type=Long NULL]
+|   | +-Add
+|   |   +-AttributeReference[id=6,name=,alias=$aggregate0,relation=$aggregate,
+|   |   | type=Long NULL]
+|   |   +-Literal[value=2,type=Int]
+|   +-Alias[id=9,name=,alias=((SUM((long_col+1))+2)*3),relation=,type=Long NULL]
+|     +-Multiply
+|       +-Add
+|       | +-AttributeReference[id=8,name=,alias=$aggregate1,relation=$aggregate,
+|       | | type=Long NULL]
+|       | +-Literal[value=2,type=Int]
+|       +-Literal[value=3,type=Int]
++-output_attributes=
+  +-AttributeReference[id=7,name=,alias=(SUM((long_col+1))+2),relation=,
+  | type=Long NULL]
+  +-AttributeReference[id=9,name=,alias=((SUM((long_col+1))+2)*3),relation=,
+    type=Long NULL]
+[Physical Plan]
+TopLevelPlan
++-plan=Selection
+| +-input=Aggregate
+| | +-input=TableReference[relation=Test,alias=test]
+| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | |   type=VarChar(20) NULL]
+| | +-grouping_expressions=
+| | | +-[]
+| | +-aggregate_expressions=
+| |   +-Alias[id=6,name=,alias=$aggregate0,relation=$aggregate,type=Long NULL]
+| |     +-AggregateFunction[function=SUM]
+| |       +-Add
+| |         +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |         +-Literal[value=1,type=Int]
+| +-project_expressions=
+|   +-Alias[id=7,name=,alias=(SUM((long_col+1))+2),relation=,type=Long NULL]
+|   | +-CommonSubexpression[common_subexpression_id=10]
+|   |   +-Operand=Add
+|   |     +-AttributeReference[id=6,name=,alias=$aggregate0,relation=$aggregate,
+|   |     | type=Long NULL]
+|   |     +-Literal[value=2,type=Int]
+|   +-Alias[id=9,name=,alias=((SUM((long_col+1))+2)*3),relation=,type=Long NULL]
+|     +-Multiply
+|       +-CommonSubexpression[common_subexpression_id=10]
+|       | +-Operand=Add
+|       |   +-AttributeReference[id=6,name=,alias=$aggregate0,
+|       |   | relation=$aggregate,type=Long NULL]
+|       |   +-Literal[value=2,type=Int]
+|       +-Literal[value=3,type=Int]
++-output_attributes=
+  +-AttributeReference[id=7,name=,alias=(SUM((long_col+1))+2),relation=,
+  | type=Long NULL]
+  +-AttributeReference[id=9,name=,alias=((SUM((long_col+1))+2)*3),relation=,
+    type=Long NULL]
+==

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/3294cf28/query_optimizer/tests/physical_generator/Select.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/physical_generator/Select.test b/query_optimizer/tests/physical_generator/Select.test
index f7de922..2148bbe 100644
--- a/query_optimizer/tests/physical_generator/Select.test
+++ b/query_optimizer/tests/physical_generator/Select.test
@@ -1022,34 +1022,51 @@ TopLevelPlan
 [Physical Plan]
 TopLevelPlan
 +-plan=Selection
-| +-input=Aggregate
-| | +-input=TableReference[relation=Test,alias=test]
-| | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
-| | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
-| | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
-| | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
-| | | +-AttributeReference[id=5,name=vchar_col,relation=test,
-| | |   type=VarChar(20) NULL]
-| | +-grouping_expressions=
-| | | +-[]
-| | +-aggregate_expressions=
-| |   +-Alias[id=6,name=,alias=$aggregate0,relation=$aggregate,type=Long]
-| |   | +-AggregateFunction[function=COUNT]
-| |   |   +-[]
-| |   +-Alias[id=7,name=,alias=$aggregate1,relation=$aggregate,type=Long]
-| |   | +-AggregateFunction[function=COUNT]
-| |   |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| |   +-Alias[id=8,name=,alias=$aggregate2,relation=$aggregate,type=Long NULL]
-| |   | +-AggregateFunction[function=SUM]
-| |   |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
-| |   +-Alias[id=9,name=,alias=$aggregate3,relation=$aggregate,type=Double NULL]
-| |   | +-AggregateFunction[function=AVG]
-| |   |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| |   +-Alias[id=11,name=,alias=$aggregate4,relation=$aggregate,type=Double NULL]
-| |     +-AggregateFunction[function=MAX]
-| |       +-AttributeReference[id=3,name=double_col,relation=test,
-| |         type=Double NULL]
+| +-input=Selection
+| | +-input=Aggregate
+| | | +-input=TableReference[relation=Test,alias=test]
+| | | | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | | | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | | | +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| | | | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
+| | | | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
+| | | | +-AttributeReference[id=5,name=vchar_col,relation=test,
+| | | |   type=VarChar(20) NULL]
+| | | +-grouping_expressions=
+| | | | +-[]
+| | | +-aggregate_expressions=
+| | |   +-Alias[id=6,name=,alias=$aggregate0,relation=$aggregate,type=Long]
+| | |   | +-AggregateFunction[function=COUNT]
+| | |   |   +-[]
+| | |   +-Alias[id=7,name=,alias=$aggregate1,relation=$aggregate,type=Long]
+| | |   | +-AggregateFunction[function=COUNT]
+| | |   |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | |   +-Alias[id=8,name=,alias=$aggregate2,relation=$aggregate,type=Long NULL]
+| | |   | +-AggregateFunction[function=SUM]
+| | |   |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| | |   +-Alias[id=12,name=,alias=$aggregate3,relation=,type=Long NULL]
+| | |   | +-AggregateFunction[function=SUM]
+| | |   |   +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
+| | |   +-Alias[id=11,name=,alias=$aggregate4,relation=$aggregate,
+| | |     type=Double NULL]
+| | |     +-AggregateFunction[function=MAX]
+| | |       +-AttributeReference[id=3,name=double_col,relation=test,
+| | |         type=Double NULL]
+| | +-project_expressions=
+| |   +-AttributeReference[id=6,name=,alias=$aggregate0,relation=$aggregate,
+| |   | type=Long]
+| |   +-AttributeReference[id=7,name=,alias=$aggregate1,relation=$aggregate,
+| |   | type=Long]
+| |   +-AttributeReference[id=8,name=,alias=$aggregate2,relation=$aggregate,
+| |   | type=Long NULL]
+| |   +-Alias[id=9,name=,alias=$aggregate3,relation=,type=Long NULL]
+| |   | +-Divide
+| |   |   +-AttributeReference[id=12,name=,alias=$aggregate3,relation=,
+| |   |   | type=Long NULL]
+| |   |   +-AttributeReference[id=7,name=,alias=$aggregate1,relation=$aggregate,
+| |   |     type=Long]
+| |   +-AttributeReference[id=11,name=,alias=$aggregate4,relation=$aggregate,
+| |     type=Double NULL]
 | +-filter_predicate=Greater
 | | +-Add
 | | | +-AttributeReference[id=11,name=,alias=$aggregate4,relation=$aggregate,
@@ -1311,31 +1328,40 @@ TopLevelPlan
 | |   +-Alias[id=8,name=,alias=$aggregate0,relation=$aggregate,type=Long]
 | |   | +-AggregateFunction[function=COUNT]
 | |   |   +-Add
-| |   |     +-Add
-| |   |     | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| |   |     | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
-| |   |     +-Add
-| |   |       +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
-| |   |       +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| |   |     +-CommonSubexpression[common_subexpression_id=13]
+| |   |     | +-Operand=Add
+| |   |     |   +-AttributeReference[id=0,name=int_col,relation=test,
+| |   |     |   | type=Int NULL]
+| |   |     |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |   |     +-CommonSubexpression[common_subexpression_id=14]
+| |   |       +-Operand=Add
+| |   |         +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |   |         +-AttributeReference[id=2,name=float_col,relation=test,
+| |   |           type=Float]
 | |   +-Alias[id=9,name=,alias=$aggregate1,relation=$aggregate,type=Long NULL]
 | |   | +-AggregateFunction[function=MAX]
 | |   |   +-Divide
-| |   |     +-Add
-| |   |     | +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| |   |     | +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |   |     +-CommonSubexpression[common_subexpression_id=13]
+| |   |     | +-Operand=Add
+| |   |     |   +-AttributeReference[id=0,name=int_col,relation=test,
+| |   |     |   | type=Int NULL]
+| |   |     |   +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
 | |   |     +-Literal[value=2,type=Int]
 | |   +-Alias[id=11,name=,alias=$aggregate2,relation=$aggregate,type=Long NULL]
 | |   | +-AggregateFunction[function=MAX]
 | |   |   +-Add
 | |   |     +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| |   |     +-Add
-| |   |       +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
-| |   |       +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |   |     +-CommonSubexpression[common_subexpression_id=13]
+| |   |       +-Operand=Add
+| |   |         +-AttributeReference[id=0,name=int_col,relation=test,
+| |   |         | type=Int NULL]
+| |   |         +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
 | |   +-Alias[id=12,name=,alias=$aggregate3,relation=$aggregate,type=Double NULL]
 | |     +-AggregateFunction[function=SUM]
-| |       +-Add
-| |         +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
-| |         +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
+| |       +-CommonSubexpression[common_subexpression_id=14]
+| |         +-Operand=Add
+| |           +-AttributeReference[id=1,name=long_col,relation=test,type=Long]
+| |           +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
 | +-filter_predicate=Greater
 | | +-AttributeReference[id=11,name=,alias=$aggregate2,relation=$aggregate,
 | | | type=Long NULL]


[2/2] incubator-quickstep git commit: Updates

Posted by ji...@apache.org.
Updates


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/6dc3a3b4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/6dc3a3b4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/6dc3a3b4

Branch: refs/heads/common-subexpression
Commit: 6dc3a3b47f60f60370af649bb40004dda248dde5
Parents: 3294cf2
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Fri Apr 21 16:03:34 2017 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Fri Apr 21 16:03:34 2017 -0500

----------------------------------------------------------------------
 query_optimizer/PhysicalGenerator.cpp           |  13 +--
 query_optimizer/rules/CMakeLists.txt            |   3 +
 .../rules/ExtractCommonSubexpression.cpp        | 115 +++++++++++++++----
 .../rules/ExtractCommonSubexpression.hpp        |  46 +++++++-
 .../physical_generator/CommonSubexpression.test |   4 +-
 5 files changed, 146 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6dc3a3b4/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index d1ad58b..5f3b1b7 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -124,12 +124,6 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
   std::vector<std::unique_ptr<Rule<P::Physical>>> rules;
   rules.emplace_back(new PruneColumns());
 
-  // TODO(jianqiao): It is possible for PushDownLowCostDisjunctivePredicate to
-  // generate two chaining Selection nodes that can actually be fused into one.
-  // Note that currently it is okay to have the two Selections because they are
-  // applied to a small cardinality stored relation, which is very light-weight.
-  // However it is better to have a FuseSelection optimization (or even a more
-  // general FusePhysical optimization) in the future.
   rules.emplace_back(new PushDownLowCostDisjunctivePredicate());
 
   rules.emplace_back(new ReduceGroupByAttributes(optimizer_context_));
@@ -156,9 +150,10 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
 
   rules.emplace_back(new FuseAggregateJoin());
 
-  // Some of the optimization passes (e.g. ReuseAggregateExpressions) might add
-  // extra Selection nodes and extra projection columns for convenience. So we
-  // collapse Selection nodes and prune unnecessary columns here.
+  // Some of the optimization passes (e.g. PushDownLowCostDisjunctivePredicate
+  // and ReuseAggregateExpressions) might add extra Selection nodes and extra
+  // projection columns for their convenience. So we collapse Selection nodes
+  // and prune unnecessary columns here.
   rules.emplace_back(new CollapseSelection());
   rules.emplace_back(new PruneColumns());
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6dc3a3b4/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 0acd869..f21d668 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -95,6 +95,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_CollapseSelection
 target_link_libraries(quickstep_queryoptimizer_rules_ExtractCommonSubexpression
                       glog
                       quickstep_queryoptimizer_OptimizerContext
+                      quickstep_queryoptimizer_expressions_AggregateFunction
                       quickstep_queryoptimizer_expressions_Alias
                       quickstep_queryoptimizer_expressions_CommonSubexpression
                       quickstep_queryoptimizer_expressions_Expression
@@ -103,6 +104,8 @@ target_link_libraries(quickstep_queryoptimizer_rules_ExtractCommonSubexpression
                       quickstep_queryoptimizer_expressions_PatternMatcher
                       quickstep_queryoptimizer_expressions_Scalar
                       quickstep_queryoptimizer_physical_Aggregate
+                      quickstep_queryoptimizer_physical_HashJoin
+                      quickstep_queryoptimizer_physical_NestedLoopsJoin
                       quickstep_queryoptimizer_physical_Physical
                       quickstep_queryoptimizer_physical_PhysicalType
                       quickstep_queryoptimizer_physical_Selection

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6dc3a3b4/query_optimizer/rules/ExtractCommonSubexpression.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ExtractCommonSubexpression.cpp b/query_optimizer/rules/ExtractCommonSubexpression.cpp
index 6b51744..6bbd902 100644
--- a/query_optimizer/rules/ExtractCommonSubexpression.cpp
+++ b/query_optimizer/rules/ExtractCommonSubexpression.cpp
@@ -24,6 +24,7 @@
 #include <vector>
 
 #include "query_optimizer/OptimizerContext.hpp"
+#include "query_optimizer/expressions/AggregateFunction.hpp"
 #include "query_optimizer/expressions/Alias.hpp"
 #include "query_optimizer/expressions/CommonSubexpression.hpp"
 #include "query_optimizer/expressions/ExpressionType.hpp"
@@ -49,10 +50,9 @@ namespace P = ::quickstep::optimizer::physical;
 ExtractCommonSubexpression::ExtractCommonSubexpression(
     OptimizerContext *optimizer_context)
     : optimizer_context_(optimizer_context) {
-  const std::vector<E::ExpressionType> whitelist = {
+  const std::vector<E::ExpressionType> homogeneous_expr_types = {
       E::ExpressionType::kAlias,
       E::ExpressionType::kAttributeReference,
-      E::ExpressionType::kAggregateFunction,
       E::ExpressionType::kBinaryExpression,
       E::ExpressionType::kCast,
       E::ExpressionType::kCommonSubexpression,
@@ -60,8 +60,8 @@ ExtractCommonSubexpression::ExtractCommonSubexpression(
       E::ExpressionType::kUnaryExpression
   };
 
-  for (const auto &expr_type : whitelist) {
-    homogeneous_whitelist_.emplace(static_cast<int>(expr_type));
+  for (const auto &expr_type : homogeneous_expr_types) {
+    homogeneous_expression_types_.emplace(expr_type);
   }
 }
 
@@ -73,6 +73,7 @@ P::PhysicalPtr ExtractCommonSubexpression::apply(const P::PhysicalPtr &input) {
 
 P::PhysicalPtr ExtractCommonSubexpression::applyInternal(
     const P::PhysicalPtr &input) {
+  // First process all child nodes.
   std::vector<P::PhysicalPtr> new_children;
   for (const auto &child : input->children()) {
     new_children.emplace_back(applyInternal(child));
@@ -83,34 +84,70 @@ P::PhysicalPtr ExtractCommonSubexpression::applyInternal(
           ? input
           : input->copyWithNewChildren(new_children);
 
+  // Process expressions of the current node.
   switch (node->getPhysicalType()) {
     case P::PhysicalType::kAggregate: {
       const P::AggregatePtr aggregate =
           std::static_pointer_cast<const P::Aggregate>(node);
-      std::vector<E::ExpressionPtr> expressions =
-          UpCast(aggregate->aggregate_expressions());
+
+      std::vector<E::ExpressionPtr> expressions;
+      // Gather grouping expressions and aggregate functions' argument expressions.
       for (const auto &expr : aggregate->grouping_expressions()) {
         expressions.emplace_back(expr);
       }
+      for (const auto &expr : aggregate->aggregate_expressions()) {
+        const E::AggregateFunctionPtr &func =
+            std::static_pointer_cast<const E::AggregateFunction>(expr->expression());
+        for (const auto &arg : func->getArguments()) {
+          expressions.emplace_back(arg);
+        }
+      }
 
+      // Transform the expressions so that common subexpressions are labelled.
       const std::vector<E::ExpressionPtr> new_expressions =
           transformExpressions(expressions);
 
       if (new_expressions != expressions) {
         std::vector<E::AliasPtr> new_aggregate_expressions;
         std::vector<E::NamedExpressionPtr> new_grouping_expressions;
-        const std::size_t num_aggrs = aggregate->aggregate_expressions().size();
 
-        for (std::size_t i = 0; i < num_aggrs; ++i) {
-          DCHECK(E::SomeAlias::Matches(new_expressions[i]));
-          new_aggregate_expressions.emplace_back(
-              std::static_pointer_cast<const E::Alias>(new_expressions[i]));
-        }
-        for (std::size_t i = num_aggrs; i < new_expressions.size(); ++i) {
+        // Reconstruct grouping expressions.
+        const std::size_t num_grouping_expressions =
+            aggregate->grouping_expressions().size();
+        for (std::size_t i = 0; i < num_grouping_expressions; ++i) {
           DCHECK(E::SomeNamedExpression::Matches(new_expressions[i]));
           new_grouping_expressions.emplace_back(
               std::static_pointer_cast<const E::NamedExpression>(new_expressions[i]));
         }
+
+        // Reconstruct aggregate expressions.
+        auto it = new_expressions.begin() + num_grouping_expressions;
+        for (const auto &expr : aggregate->aggregate_expressions()) {
+          const E::AggregateFunctionPtr &func =
+              std::static_pointer_cast<const E::AggregateFunction>(
+                  expr->expression());
+
+          std::vector<E::ScalarPtr> new_arguments;
+          for (std::size_t i = 0; i < func->getArguments().size(); ++i, ++it) {
+            DCHECK(E::SomeScalar::Matches(*it));
+            new_arguments.emplace_back(std::static_pointer_cast<const E::Scalar>(*it));
+          }
+
+          if (new_arguments == func->getArguments()) {
+            new_aggregate_expressions.emplace_back(expr);
+          } else {
+            const E::AggregateFunctionPtr new_func =
+                E::AggregateFunction::Create(func->getAggregate(),
+                                             new_arguments,
+                                             func->is_vector_aggregate(),
+                                             func->is_distinct());
+            new_aggregate_expressions.emplace_back(
+                E::Alias::Create(expr->id(),
+                                 new_func,
+                                 expr->attribute_name(),
+                                 expr->attribute_alias()));
+          }
+        }
         return P::Aggregate::Create(aggregate->input(),
                                     new_grouping_expressions,
                                     new_aggregate_expressions,
@@ -122,6 +159,7 @@ P::PhysicalPtr ExtractCommonSubexpression::applyInternal(
       const P::SelectionPtr selection =
           std::static_pointer_cast<const P::Selection>(node);
 
+      // Transform Selection's project expressions.
       const std::vector<E::NamedExpressionPtr> new_expressions =
           DownCast<E::NamedExpression>(
               transformExpressions(UpCast(selection->project_expressions())));
@@ -137,6 +175,7 @@ P::PhysicalPtr ExtractCommonSubexpression::applyInternal(
       const P::HashJoinPtr hash_join =
           std::static_pointer_cast<const P::HashJoin>(node);
 
+      // Transform HashJoin's project expressions.
       const std::vector<E::NamedExpressionPtr> new_expressions =
           DownCast<E::NamedExpression>(
               transformExpressions(UpCast(hash_join->project_expressions())));
@@ -157,6 +196,7 @@ P::PhysicalPtr ExtractCommonSubexpression::applyInternal(
       const P::NestedLoopsJoinPtr nested_loops_join =
           std::static_pointer_cast<const P::NestedLoopsJoin>(node);
 
+      // Transform NestedLoopsJoin's project expressions.
       const std::vector<E::NamedExpressionPtr> new_expressions =
           DownCast<E::NamedExpression>(
               transformExpressions(UpCast(nested_loops_join->project_expressions())));
@@ -178,12 +218,28 @@ P::PhysicalPtr ExtractCommonSubexpression::applyInternal(
 
 std::vector<E::ExpressionPtr> ExtractCommonSubexpression::transformExpressions(
     const std::vector<E::ExpressionPtr> &expressions) {
+  // Step 1. For each subexpression, count the number of its occurrences.
   ScalarCounter counter;
   ScalarHashable hashable;
   for (const auto &expr : expressions) {
     visitAndCount(expr, &counter, &hashable);
   }
 
+  // Note that any subexpression with count > 1 is a common subexpression.
+  // However, it is not necessary to create a CommonSubexpression node for every
+  // such subexpression. E.g. consider the case
+  // --
+  //   SELECT (x+1)*(x+2), (x+1)*(x+2)*3 FROM s;
+  // --
+  // We only need to create one *dominant* CommonSubexpression (x+1)*(x+2) and
+  // do not need to create the child (x+1) or (x+2) nodes.
+  //
+  // To address this issue. We define that a subtree S *dominates* its descendent
+  // subtree (or leaf node) T if and only if counter[S] >= counter[T].
+  //
+  // Then we create CommonSubexpression nodes for every subexpression that is
+  // not dominated by any ancestor.
+
   ScalarMap substitution_map;
   std::vector<E::ExpressionPtr> new_expressions;
   for (const auto &expr : expressions) {
@@ -202,11 +258,12 @@ bool ExtractCommonSubexpression::visitAndCount(
     const E::ExpressionPtr &expression,
     ScalarCounter *counter,
     ScalarHashable *hashable) {
+  // This bool flag is for avoiding some unnecessary hash() computation.
   bool children_hashable = true;
 
-  const auto homogeneous_whitelist_it =
-      homogeneous_whitelist_.find(static_cast<int>(expression->getExpressionType()));
-  if (homogeneous_whitelist_it != homogeneous_whitelist_.end()) {
+  const auto homogeneous_expression_types_it =
+      homogeneous_expression_types_.find(expression->getExpressionType());
+  if (homogeneous_expression_types_it != homogeneous_expression_types_.end()) {
     for (const auto &child : expression->children()) {
       children_hashable &= visitAndCount(child, counter, hashable);
     }
@@ -215,6 +272,9 @@ bool ExtractCommonSubexpression::visitAndCount(
   E::ScalarPtr scalar;
   if (children_hashable &&
       E::SomeScalar::MatchesWithConditionalCast(expression, &scalar)) {
+    // A scalar expression may or may not support the hash() computation.
+    // In the later case, a HashNotSupported exception will be thrown and we
+    // simply ignore this expression (and all its ancestor expressions).
     try {
       ++(*counter)[scalar];
     } catch (const HashNotSupported &e) {
@@ -232,7 +292,13 @@ E::ExpressionPtr ExtractCommonSubexpression::visitAndTransform(
     const ScalarCounter &counter,
     const ScalarHashable &hashable,
     ScalarMap *substitution_map) {
-  // TODO: figure out whether it is beneficial.
+  // TODO(jianqiao): Currently it is hardly beneficial to make AttributeReference
+  // a common subexpression due to the inefficiency of ScalarAttribute's
+  // size-not-known-at-compile-time std::memcpy() calls, compared to copy-elision
+  // at selection level. Even in the case of compressed column store, it requires
+  // an attribute to occur at least 4 times for the common subexpression version
+  // to outperform the direct decoding version. We may look into ScalarAttribute
+  // and modify the heuristic here later.
   if (expression->getExpressionType() == E::ExpressionType::kScalarLiteral ||
       expression->getExpressionType() == E::ExpressionType::kAttributeReference) {
     return expression;
@@ -245,12 +311,14 @@ E::ExpressionPtr ExtractCommonSubexpression::visitAndTransform(
 
   std::size_t new_max_reference_count;
   if (is_hashable) {
-    // CommonSubexpression node already generated.
+    // CommonSubexpression node already generated somewhere. Just refer to that
+    // and return.
     const auto substitution_map_it = substitution_map->find(scalar);
     if (substitution_map_it != substitution_map->end()) {
       return substitution_map_it->second;
     }
 
+    // Otherwise, update the dominance count.
     const auto counter_it = counter.find(scalar);
     DCHECK(counter_it != counter.end());
     DCHECK_LE(max_reference_count, counter_it->second);
@@ -259,10 +327,13 @@ E::ExpressionPtr ExtractCommonSubexpression::visitAndTransform(
     new_max_reference_count = max_reference_count;
   }
 
+  // Process children.
   std::vector<E::ExpressionPtr> new_children;
-  const auto homogeneous_whitelist_it =
-      homogeneous_whitelist_.find(static_cast<int>(expression->getExpressionType()));
-  if (homogeneous_whitelist_it == homogeneous_whitelist_.end()) {
+  const auto homogeneous_expression_types_it =
+      homogeneous_expression_types_.find(expression->getExpressionType());
+  if (homogeneous_expression_types_it == homogeneous_expression_types_.end()) {
+    // If child subexpressions cannot be hoisted through the current expression,
+    // treat child expressions as isolated sub-optimizations.
     for (const auto &child : expression->children()) {
       new_children.emplace_back(transformExpression(child));
     }
@@ -285,6 +356,8 @@ E::ExpressionPtr ExtractCommonSubexpression::visitAndTransform(
         expression->copyWithNewChildren(new_children));
   }
 
+  // Wrap it with a new CommonSubexpression node if the current expression is a
+  // dominant subexpression.
   if (is_hashable && new_max_reference_count > max_reference_count) {
     DCHECK(E::SomeScalar::Matches(output));
     const E::CommonSubexpressionPtr common_subexpression =

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6dc3a3b4/query_optimizer/rules/ExtractCommonSubexpression.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ExtractCommonSubexpression.hpp b/query_optimizer/rules/ExtractCommonSubexpression.hpp
index 49db499..6984dac 100644
--- a/query_optimizer/rules/ExtractCommonSubexpression.hpp
+++ b/query_optimizer/rules/ExtractCommonSubexpression.hpp
@@ -20,13 +20,16 @@
 #ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_EXTRACT_COMMON_SUBEXPRESSION_HPP_
 #define QUICKSTEP_QUERY_OPTIMIZER_RULES_EXTRACT_COMMON_SUBEXPRESSION_HPP_
 
+#include <functional>
 #include <memory>
 #include <string>
+#include <type_traits>
 #include <unordered_map>
 #include <unordered_set>
 
 #include "query_optimizer/expressions/CommonSubexpression.hpp"
 #include "query_optimizer/expressions/Expression.hpp"
+#include "query_optimizer/expressions/ExpressionType.hpp"
 #include "query_optimizer/expressions/Scalar.hpp"
 #include "query_optimizer/physical/Physical.hpp"
 #include "query_optimizer/rules/Rule.hpp"
@@ -41,6 +44,14 @@ class OptimizerContext;
  *  @{
  */
 
+/**
+ * @brief Rule that applies to a physical plan to identify and label common
+ *        subexpressions.
+ *
+ * @note This is essentially a logical optimization pass. However, we need some
+ *       of the physical passes (e.g. ReuseAggregateExpressions) to be finalized
+ *       before this one to maximize optimization opportunities.
+ */
 class ExtractCommonSubexpression : public Rule<physical::Physical> {
  public:
   /**
@@ -74,28 +85,34 @@ class ExtractCommonSubexpression : public Rule<physical::Physical> {
     }
   };
 
+  // For memorizing whether a subexpression is hashable.
+  using ScalarHashable = std::unordered_set<expressions::ScalarPtr>;
+
+  // For counting the number of occurrences of each subexpression.
   using ScalarCounter =
       std::unordered_map<expressions::ScalarPtr, std::size_t, ScalarHash, ScalarEqual>;
 
+  // For mapping each subexpression to its transformed version.
   using ScalarMap =
       std::unordered_map<expressions::ScalarPtr,
                          expressions::CommonSubexpressionPtr,
                          ScalarHash,
                          ScalarEqual>;
 
-  using ScalarHashable = std::unordered_set<expressions::ScalarPtr>;
-
   std::vector<expressions::ExpressionPtr> transformExpressions(
       const std::vector<expressions::ExpressionPtr> &expressions);
 
   expressions::ExpressionPtr transformExpression(
       const expressions::ExpressionPtr &expression);
 
+  // Traverse the expression tree and increase the count of each subexpression.
   bool visitAndCount(
       const expressions::ExpressionPtr &expression,
       ScalarCounter *counter,
       ScalarHashable *hashable);
 
+  // Traverse the expression tree and transform subexpressions (to common
+  // subexpressions) if applicable.
   expressions::ExpressionPtr visitAndTransform(
       const expressions::ExpressionPtr &expression,
       const std::size_t max_reference_count,
@@ -123,8 +140,31 @@ class ExtractCommonSubexpression : public Rule<physical::Physical> {
     return output;
   }
 
+  struct ExpressionTypeHash {
+    using UnderT = std::underlying_type<expressions::ExpressionType>::type;
+
+    inline std::size_t operator()(const expressions::ExpressionType &et) const {
+      return std::hash<UnderT>()(static_cast<UnderT>(et));
+    }
+  };
+
+  // Here we define that an expression type is homogeneous if at execution time
+  // the result column vector of that expression has a one-to-one positional
+  // correspondence with all the result column vectors from its child expressions.
+  // E.g. aggregate functions and CASE expressions are not homogeneous.
+  //
+  // Being homogeneous enables common subexpressions to be hoisted through.
+  // For example, consider the case
+  // --
+  //   (x * 2) + F(x * 2)
+  // --
+  // where F is some unary expression. Then if F is homogenous, we can mark the
+  // two (x * 2) as common subexpressions. Otherwise, we cannot do that since
+  // the two subexpressions will generate different result column vectors.
+  std::unordered_set<expressions::ExpressionType,
+                     ExpressionTypeHash> homogeneous_expression_types_;
+
   OptimizerContext *optimizer_context_;
-  std::unordered_set<int> homogeneous_whitelist_;
 
   DISALLOW_COPY_AND_ASSIGN(ExtractCommonSubexpression);
 };

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6dc3a3b4/query_optimizer/tests/physical_generator/CommonSubexpression.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/physical_generator/CommonSubexpression.test b/query_optimizer/tests/physical_generator/CommonSubexpression.test
index b23a97d..0d7ed3f 100644
--- a/query_optimizer/tests/physical_generator/CommonSubexpression.test
+++ b/query_optimizer/tests/physical_generator/CommonSubexpression.test
@@ -127,13 +127,13 @@ TopLevelPlan
 | | +-grouping_expressions=
 | | | +-[]
 | | +-aggregate_expressions=
-| |   +-Alias[id=6,name=,alias=$aggregate0,relation=$aggregate,type=Double NULL]
+| |   +-Alias[id=6,name=,alias=$aggregate0,relation=,type=Double NULL]
 | |   | +-AggregateFunction[function=SUM]
 | |   |   +-CommonSubexpression[common_subexpression_id=8]
 | |   |     +-Operand=Add
 | |   |       +-AttributeReference[id=0,name=int_col,relation=test,type=Int NULL]
 | |   |       +-AttributeReference[id=2,name=float_col,relation=test,type=Float]
-| |   +-Alias[id=7,name=,alias=$aggregate1,relation=$aggregate,type=Float NULL]
+| |   +-Alias[id=7,name=,alias=$aggregate1,relation=,type=Float NULL]
 | |     +-AggregateFunction[function=MIN]
 | |       +-Multiply
 | |         +-CommonSubexpression[common_subexpression_id=8]