You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2017/02/03 03:27:25 UTC

[1/4] incubator-quickstep git commit: Add KEYS file for release [Forced Update!]

Repository: incubator-quickstep
Updated Branches:
  refs/heads/reduce-group-by-attrs 83d1b5924 -> 47e03182b (forced update)


Add KEYS file for release


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/50f3ba65
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/50f3ba65
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/50f3ba65

Branch: refs/heads/reduce-group-by-attrs
Commit: 50f3ba65fbd19d2dc07f704a36c4320dd7fdba23
Parents: 4ba819c
Author: Jignesh Patel <jm...@hotmail.com>
Authored: Thu Feb 2 18:12:04 2017 -0600
Committer: Jignesh Patel <jm...@hotmail.com>
Committed: Thu Feb 2 18:12:04 2017 -0600

----------------------------------------------------------------------
 KEYS | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/50f3ba65/KEYS
----------------------------------------------------------------------
diff --git a/KEYS b/KEYS
new file mode 100644
index 0000000..ce3964f
--- /dev/null
+++ b/KEYS
@@ -0,0 +1,51 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v1
+
+mQINBFiTyOUBEAC6iCMXSfoQPf29eall0puYj13vMESHG2lo8/wQXJPxarv0zUd0
+yiRQDwjzU9lqLaft8nIx38QrLzGxAr/HNPVMr2Ng7NIVcgadIjtumNvIt1ML1dMO
++c/k41xk629Kz+HQ+OiWg0t76EDv2Kf1EZ5Ai5Uay7wxOKjRU3QXl5QghvUMwgAo
+UlqShqNEyHyPOaZKa7EzBk3kBPjppuHs/3mWFXTISPHFKdNux0yroW/43tS7LmuC
+YNaM5+NZlzzNOqDgtcDR7klpKPvnFyuDqlZr3x3MBUDBOgBgovhVJqYCCihCkofK
+C1Dw5RhJE0sswhEcl9eoC+ANeSrQPUTabyb+Duud/UJ7cCAcH1HG26QQUpMQj7Fl
+NzyyZONbOuFQ2Nxc1lmua2/D5aSyQ20wrnxQ//2P1Da4PU/EbW0AR+VAUJM4c1do
+5b7Sy6kSK9SVhEJAavsNeOuldmUi9OPrxGD9PUD4OUmMxn/y+LDmEXvSTOB7+Wg+
+vSZOViwHVrAqrykZnLDKIvv7q+kzXaxQxFFtE6D5qjrdcdzw+2Cf+RZafSticIWi
+pPUJxn2ua9h94O9aYLOBp/bGkeb2f4eFW87RS6rBIWofIFnLIR3Kq64H1cKHVcXe
+gJcYB6usczpNDPgeoQVFAOYWpLQ6x3WRA4T5i8TLWohoqa2j5gjERgMOtQARAQAB
+tCJKaWduZXNoIFBhdGVsIDxqaWduZXNoQGFwYWNoZS5vcmc+iQI3BBMBCgAhBQJY
+k8jlAhsDBQsJCAcDBRUKCQgLBRYCAwEAAh4BAheAAAoJENt3gMCjwp4SVoMQAJM6
+z0TIdH7dNp7aF1EtksuekwzFO1dX2zz7qxF7O1a5ckCJppmFc0H1OjhHm0fG8Who
+tDiGue5NdyjcdXstmZoi+Xk7rMVIXSu+CYvoSDr3LKrKLJzSqWFmsd3CtwHl5vWD
+8MQTgmmdYJh+2lq5sAPJq4EUzAotdMgSLcv6+u/alR71WtzpKwuhOwfGLBe24w5A
+1pDbqso8rgX78Ga9q6Kqah/BM7YqPmxfgtazecl781KsaE6ODPW20XxrvvMq99+3
+LQ3NWpibZ6ed7T7ak2CEDRcnYayNQhvHWrygjO/JOE8xQ0BsBQ1hNmSBLkAVJa+L
+BQ2g1bR3bT7dQNr9V+fQPtZoA/ZNxttk+Nu+NZ1ZQ9jEyrkzpZmZlnBephQCNGeK
+5cfHNLJYwBmPDRZ6/UYwLj+OsfJAAAJHNbnV2d9pj5tp49EJ9UIJ4p71SlNHhM9H
+O1JXKJOD9jpomNeRlDTg7jh5h3LtBR3rtFX+kCo5Ktzoe4dTQ5Ei7qMpQ/ygrT7+
+QqUFZ3jvr1o5OzLUuDg0qBR2C0wPAJN+y3aqdU6Aw0o40UKsme687DhUPHGrZJ26
+YXYJ71iUg0G7VeEh75QylNkqbpUPMvMm7rxe1icfbxZu4sBdQlJj5G1DntMLeACB
+6UVgRwE5kn+V/59wL3GNg/CwT6cbjPZq4+tH0QkxuQINBFiTyOUBEAC6AaqV3ijL
+N9HJUobrkJZ6/MBEYkh37+XFF8swzW4RkGz9lTnim/FN341xfwmLQEjTfvX9Bd0q
+y2QyCSnP96C5i8cV8DNOK57JXipaWijFBBGOHOwDTJx2uxJJIWCtVJowievRBPW3
+T7r2e6WHepYtlDMjkJltwycRP8MsfMx1lcO9IACYakbZZAy0cqWgxdtTkucitcHO
+8/nJxSrIzlBnU0ktCH5/tvu/XN+TIEOHKgPMEldByeQ/m53CTU9X94YnUJ0DoqqG
+pCqO3LHV3iMONlfnBhpQfOD/Mzv2B+SKu4+LK1quUtfA0U089WrcrjBbF86gTsTj
+GLhzqX+/xb8Vf+/Jbx72uPNW0+TGqO0TzAujGL/wMZOPiBvVC17HI9qd8GC0pXe4
+y+pMiaCFcW0kDBLf69oaICrRVXOR75NyCyR9s0Dac+/DwiU3R3nafM6E9j7PXuNz
+PXmwlYoKLBwmOYOYN8j5fbaZHsZeDIoGkLMPpG1mLG82w9ONMwAt8BZPVwURa1SU
+Q0sv0QoO/QOYuk0QY7k8t6+q82oKIAcyqaLWx3wfCXdwhAF9jGVdShH3pVD2BiYC
+vRteQRQjy5XdSwjkASCNPMnIEpWWI7XADG0vBimdvHwLHXgp/eK6XtsqpAFmU0JJ
+WWy3AcWznUZnxKV1UgfmJkHmbD8jRMZndwARAQABiQIfBBgBCgAJBQJYk8jlAhsM
+AAoJENt3gMCjwp4SC/UP/1FAmrdH9hSAyTkJgU23iK+tZ7n4+C61B7pYS7J2r2Un
+tAJwAD/NjGUawfH6D32P97AMm/7Zls9K8PiMrHcLVRYbzR9327By/xTuq4voPg88
+/9u6gt57KbYrvQLTmGjsdpEkdAdGrKOuppuoOW5gge20pulSAO23JooNnyo6hQR4
+lnN02oGrGlRR6A6bVHfeXDtztr+NMjZ/32w8ZjeYppi96lZhJzrJhozchb7rBPEi
+I1Dkgb1IApfCHxvSXNlF3kyiDANwksCTE5otREE2/4hLu+KSs00OPYhJi/2/e69A
+CO01C5iVRpCs9/ofa0Rm+BbWVLWBcsZMR3TDJ/zHlf791FnsQJxFsf3uRmA+BzwK
+KSbcqUt0yc9/nuC4O1I/Ew00Gz5Vrk1Gaw54XXDBsJJvhnanw2E8qc1l9Cu5c/Ly
+AX5WxwPs31QMFNVvLVZ8qyqS5H6Z5kq1gLIjLYSLdnvDW6U6sDn9DOEqtFl72Y+3
+v2kCW3/ARGls2/G2wPPo9iWlYNkoF61o1rquvG1IUN8+mXCdjeAz3NvtDDPotxp9
+iYXZdNoVw3RZC2XRQB+as9wYnz/Ziqbrrw58/E5FdmC8U8+Fa/0lTUh6VsPjpu5u
+E7agqOm2ReVbNLPyHa2oGftKu0Cwyghbys5xNxqbNPQnFR9N9Soi+0n4IGCZ/tj5
+=qv6a
+-----END PGP PUBLIC KEY BLOCK-----


[4/4] incubator-quickstep git commit: Reduce the number of group-by attributes by pulling tables up aggregations

Posted by zu...@apache.org.
Reduce the number of group-by attributes by pulling tables up aggregations


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/47e03182
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/47e03182
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/47e03182

Branch: refs/heads/reduce-group-by-attrs
Commit: 47e03182bb930c045a8c17098a404f5698c8b9c9
Parents: 7727e77
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Sun Jan 29 18:36:14 2017 -0600
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Thu Feb 2 19:27:18 2017 -0800

----------------------------------------------------------------------
 query_optimizer/CMakeLists.txt                  |   1 +
 query_optimizer/Optimizer.cpp                   |   3 +-
 query_optimizer/Optimizer.hpp                   |   2 -
 query_optimizer/PhysicalGenerator.cpp           |   3 +
 query_optimizer/PhysicalGenerator.hpp           |  11 +-
 query_optimizer/rules/CMakeLists.txt            |  23 ++
 .../rules/ReduceGroupByAttributes.cpp           | 217 +++++++++++++++++++
 .../rules/ReduceGroupByAttributes.hpp           | 143 ++++++++++++
 query_optimizer/tests/OptimizerTest.cpp         |   2 +-
 .../tests/OptimizerTextTestRunner.cpp           |   7 +-
 .../tests/OptimizerTextTestRunner.hpp           |   3 +-
 third_party/src/iwyu/iwyu_helper.py             |  14 +-
 12 files changed, 412 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/47e03182/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 7f90e11..bc9a52f 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -212,6 +212,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
                       quickstep_queryoptimizer_rules_InjectJoinFilters
                       quickstep_queryoptimizer_rules_PruneColumns
                       quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
+                      quickstep_queryoptimizer_rules_ReduceGroupByAttributes
                       quickstep_queryoptimizer_rules_ReorderColumns
                       quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
                       quickstep_queryoptimizer_rules_SwapProbeBuild

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/47e03182/query_optimizer/Optimizer.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/Optimizer.cpp b/query_optimizer/Optimizer.cpp
index b14c938..1b91574 100644
--- a/query_optimizer/Optimizer.cpp
+++ b/query_optimizer/Optimizer.cpp
@@ -30,10 +30,11 @@ void Optimizer::generateQueryHandle(const ParseStatement &parse_statement,
                                     OptimizerContext *optimizer_context,
                                     QueryHandle *query_handle) {
   LogicalGenerator logical_generator(optimizer_context);
+  PhysicalGenerator physical_generator(optimizer_context);
   ExecutionGenerator execution_generator(catalog_database, query_handle);
 
   execution_generator.generatePlan(
-      physical_generator_.generatePlan(
+      physical_generator.generatePlan(
           logical_generator.generatePlan(*catalog_database, parse_statement)));
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/47e03182/query_optimizer/Optimizer.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/Optimizer.hpp b/query_optimizer/Optimizer.hpp
index 36f956a..227dd04 100644
--- a/query_optimizer/Optimizer.hpp
+++ b/query_optimizer/Optimizer.hpp
@@ -70,8 +70,6 @@ class Optimizer {
                            QueryHandle *query_handle);
 
  private:
-  PhysicalGenerator physical_generator_;
-
   DISALLOW_COPY_AND_ASSIGN(Optimizer);
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/47e03182/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 5dc0ffb..1b68f49 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -30,6 +30,7 @@
 #include "query_optimizer/rules/InjectJoinFilters.hpp"
 #include "query_optimizer/rules/PruneColumns.hpp"
 #include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
+#include "query_optimizer/rules/ReduceGroupByAttributes.hpp"
 #include "query_optimizer/rules/ReorderColumns.hpp"
 #include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
 #include "query_optimizer/rules/SwapProbeBuild.hpp"
@@ -127,6 +128,8 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
   // general FusePhysical optimization) in the future.
   rules.emplace_back(new PushDownLowCostDisjunctivePredicate());
 
+  rules.emplace_back(new ReduceGroupByAttributes(optimizer_context_));
+
   if (FLAGS_reorder_hash_joins) {
     rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
     rules.emplace_back(new PruneColumns());

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/47e03182/query_optimizer/PhysicalGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.hpp b/query_optimizer/PhysicalGenerator.hpp
index 886a173..42fea86 100644
--- a/query_optimizer/PhysicalGenerator.hpp
+++ b/query_optimizer/PhysicalGenerator.hpp
@@ -33,6 +33,8 @@
 namespace quickstep {
 namespace optimizer {
 
+class OptimizerContext;
+
 /** \addtogroup QueryOptimizer
  *  @{
  */
@@ -43,9 +45,12 @@ namespace optimizer {
 class PhysicalGenerator : public LogicalToPhysicalMapper {
  public:
   /**
-   * @brief Constructor
+   * @brief Constructor.
+   *
+   * @param optimizer_context The optimizer context.
    */
-  PhysicalGenerator() {
+  explicit PhysicalGenerator(OptimizerContext *optimizer_context)
+      : optimizer_context_(optimizer_context) {
     createStrategies();
   }
 
@@ -125,6 +130,8 @@ class PhysicalGenerator : public LogicalToPhysicalMapper {
    */
   std::unordered_map<logical::LogicalPtr, physical::PhysicalPtr> logical_to_physical_map_;
 
+  OptimizerContext *optimizer_context_;
+
   /**
    * @brief The complete physical plan.
    */

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/47e03182/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 223c78c..029d816 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -29,6 +29,9 @@ add_library(quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
             PushDownLowCostDisjunctivePredicate.cpp
             PushDownLowCostDisjunctivePredicate.hpp)
 add_library(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin PushDownSemiAntiJoin.cpp PushDownSemiAntiJoin.hpp)
+add_library(quickstep_queryoptimizer_rules_ReduceGroupByAttributes
+            ReduceGroupByAttributes.cpp
+            ReduceGroupByAttributes.hpp)
 add_library(quickstep_queryoptimizer_rules_ReorderColumns ReorderColumns.cpp ReorderColumns.hpp)
 add_library(quickstep_queryoptimizer_rules_Rule ../../empty_src.cpp Rule.hpp)
 add_library(quickstep_queryoptimizer_rules_RuleHelper RuleHelper.cpp RuleHelper.hpp)
@@ -143,6 +146,25 @@ target_link_libraries(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
                       quickstep_queryoptimizer_logical_PatternMatcher
                       quickstep_queryoptimizer_rules_TopDownRule
                       quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_ReduceGroupByAttributes
+                      ${GFLAGS_LIB_NAME}
+                      quickstep_catalog_CatalogRelation
+                      quickstep_queryoptimizer_OptimizerContext
+                      quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+                      quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_queryoptimizer_expressions_ExprId
+                      quickstep_queryoptimizer_expressions_ExpressionUtil
+                      quickstep_queryoptimizer_expressions_NamedExpression
+                      quickstep_queryoptimizer_physical_Aggregate
+                      quickstep_queryoptimizer_physical_HashJoin
+                      quickstep_queryoptimizer_physical_PatternMatcher
+                      quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_physical_PhysicalType
+                      quickstep_queryoptimizer_physical_TableReference
+                      quickstep_queryoptimizer_physical_TopLevelPlan
+                      quickstep_queryoptimizer_rules_PruneColumns
+                      quickstep_queryoptimizer_rules_Rule
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_rules_ReorderColumns
                       quickstep_queryoptimizer_expressions_AttributeReference
                       quickstep_queryoptimizer_expressions_ExprId
@@ -272,6 +294,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
                       quickstep_queryoptimizer_rules_PushDownFilter
                       quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
                       quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
+                      quickstep_queryoptimizer_rules_ReduceGroupByAttributes
                       quickstep_queryoptimizer_rules_ReorderColumns
                       quickstep_queryoptimizer_rules_Rule
                       quickstep_queryoptimizer_rules_RuleHelper

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/47e03182/query_optimizer/rules/ReduceGroupByAttributes.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReduceGroupByAttributes.cpp b/query_optimizer/rules/ReduceGroupByAttributes.cpp
new file mode 100644
index 0000000..dcdd27a
--- /dev/null
+++ b/query_optimizer/rules/ReduceGroupByAttributes.cpp
@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/ReduceGroupByAttributes.hpp"
+
+#include <algorithm>
+#include <map>
+#include <vector>
+#include <unordered_set>
+#include <utility>
+
+#include "catalog/CatalogRelation.hpp"
+#include "query_optimizer/OptimizerContext.hpp"
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "query_optimizer/rules/PruneColumns.hpp"
+
+#include "gflags/gflags.h"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+DEFINE_uint64(reduce_group_by_attributes_threshold, 3u,
+              "The threshold for a stored relation's number of attributes in a "
+              "group-by clause for the ReduceGroupByAttributes optimization "
+              "rule to pull the stored relation up the aggregation");
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr ReduceGroupByAttributes::apply(const P::PhysicalPtr &input) {
+  DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+  cost_model_.reset(new cost::StarSchemaSimpleCostModel(
+      std::static_pointer_cast<const P::TopLevelPlan>(input)->shared_subplans()));
+
+  P::PhysicalPtr output = applyInternal(input);
+  if (output != input) {
+    output = PruneColumns().apply(output);
+  }
+  return output;
+}
+
+P::PhysicalPtr ReduceGroupByAttributes::applyInternal(const P::PhysicalPtr &input) {
+  std::vector<P::PhysicalPtr> new_children;
+  for (const P::PhysicalPtr &child : input->children()) {
+    new_children.push_back(applyInternal(child));
+  }
+
+  if (new_children != input->children()) {
+    return applyToNode(input->copyWithNewChildren(new_children));
+  } else {
+    return applyToNode(input);
+  }
+}
+
+P::PhysicalPtr ReduceGroupByAttributes::applyToNode(const P::PhysicalPtr &input) {
+  P::TableReferencePtr table_reference;
+  if (P::SomeTableReference::MatchesWithConditionalCast(input, &table_reference)) {
+    // Collect the attributes-to-TableReference mapping info.
+    for (const auto &attr : table_reference->attribute_list()) {
+      source_.emplace(attr->id(), std::make_pair(table_reference, attr));
+    }
+    return input;
+  }
+
+  P::AggregatePtr aggregate;
+  if (!P::SomeAggregate::MatchesWithConditionalCast(input, &aggregate) ||
+      aggregate->grouping_expressions().size() <= 1u) {
+    return input;
+  }
+
+  // Divide the group-by attributes into groups based on their source table.
+  std::map<P::TableReferencePtr, std::vector<E::AttributeReferencePtr>> table_attributes;
+  for (const auto &expr : aggregate->grouping_expressions()) {
+    const auto source_it = source_.find(expr->id());
+    if (source_it != source_.end()) {
+      table_attributes[source_it->second.first].emplace_back(source_it->second.second);
+    }
+  }
+
+  std::unordered_set<E::ExprId> erased_grouping_attr_ids;
+  std::vector<std::pair<P::TableReferencePtr, E::AttributeReferencePtr>> hoisted_tables;
+
+  // For each group (i.e. each source table), if it is profitable then we pull
+  // the table up the aggregation.
+  for (const auto &pair : table_attributes) {
+    const P::TableReferencePtr table = pair.first;
+    const std::vector<E::AttributeReferencePtr> &attributes = pair.second;
+    // TODO(jianqiao): find a cost-based metic instead of hard-coding the threshold
+    // number of group-by attributes.
+    if (attributes.size() <= FLAGS_reduce_group_by_attributes_threshold) {
+      continue;
+    }
+
+    std::vector<AttributeInfo> attr_infos;
+    for (const auto &attr : attributes) {
+      attr_infos.emplace_back(attr,
+                              cost_model_->impliesUniqueAttributes(table, {attr}),
+                              !attr->getValueType().isVariableLength(),
+                              attr->getValueType().maximumByteLength());
+    }
+
+    std::vector<const AttributeInfo *> attr_info_refs;
+    for (const auto &info : attr_infos) {
+      attr_info_refs.emplace_back(&info);
+    }
+    std::sort(attr_info_refs.begin(),
+              attr_info_refs.end(),
+              AttributeInfo::IsBetterThan);
+
+    const AttributeInfo &best_candidate = *attr_info_refs.front();
+    if (!best_candidate.is_unique) {
+      // Cannot find a key attribute. Give up pulling this table up.
+      continue;
+    }
+
+    const E::AttributeReferencePtr key_attribute = best_candidate.attribute;
+    hoisted_tables.emplace_back(table, key_attribute);
+
+    for (const auto &attr : attributes) {
+      if (attr->id() != key_attribute->id()) {
+        erased_grouping_attr_ids.emplace(attr->id());
+      }
+    }
+  }
+
+  if (erased_grouping_attr_ids.empty()) {
+    return input;
+  }
+
+  // Reconstuct the Aggregate node with reduced group-by attributes and then
+  // construct HashJoin nodes on top of the Aggregate.
+  std::vector<E::NamedExpressionPtr> reduced_grouping_expressions;
+  for (const auto &expr : aggregate->grouping_expressions()) {
+    if (erased_grouping_attr_ids.find(expr->id()) == erased_grouping_attr_ids.end()) {
+      reduced_grouping_expressions.emplace_back(expr);
+    }
+  }
+
+  const P::AggregatePtr new_aggregate =
+      P::Aggregate::Create(aggregate->input(),
+                           reduced_grouping_expressions,
+                           aggregate->aggregate_expressions(),
+                           aggregate->filter_predicate());
+
+  P::PhysicalPtr output = new_aggregate;
+  std::vector<E::NamedExpressionPtr> project_expressions =
+      E::ToNamedExpressions(output->getOutputAttributes());
+  for (const auto &pair : hoisted_tables) {
+    const P::TableReferencePtr &source_table = pair.first;
+    const E::AttributeReferencePtr &probe_attribute = pair.second;
+
+    E::AttributeReferencePtr build_attribute;
+    std::vector<E::AttributeReferencePtr> new_attribute_list;
+    for (const auto &attr : source_table->attribute_list()) {
+      if (attr->id() == probe_attribute->id()) {
+        build_attribute =
+          E::AttributeReference::Create(optimizer_context_->nextExprId(),
+                                        attr->attribute_name(),
+                                        attr->attribute_alias(),
+                                        attr->relation_name(),
+                                        attr->getValueType(),
+                                        E::AttributeReferenceScope::kLocal);
+        new_attribute_list.emplace_back(build_attribute);
+      } else {
+        new_attribute_list.emplace_back(attr);
+        project_expressions.emplace_back(attr);
+      }
+    }
+
+    DCHECK(build_attribute != nullptr);
+    const P::TableReferencePtr build_side_table =
+        P::TableReference::Create(source_table->relation(),
+                                  source_table->relation()->getName(),
+                                  new_attribute_list);
+    output = P::HashJoin::Create(output,
+                                 build_side_table,
+                                 {probe_attribute},
+                                 {build_attribute},
+                                 nullptr,
+                                 project_expressions,
+                                 P::HashJoin::JoinType::kInnerJoin);
+  }
+
+  return output;
+}
+
+}  // namespace optimizer
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/47e03182/query_optimizer/rules/ReduceGroupByAttributes.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReduceGroupByAttributes.hpp b/query_optimizer/rules/ReduceGroupByAttributes.hpp
new file mode 100644
index 0000000..5a1f295
--- /dev/null
+++ b/query_optimizer/rules/ReduceGroupByAttributes.hpp
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_REDUCE_GROUP_BY_ATTRIBUTES_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_REDUCE_GROUP_BY_ATTRIBUTES_HPP_
+
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+class OptimizerContext;
+
+/**
+ * @brief Rule that applies to a physical plan to reduce the number of group-by
+ *        attributes for Aggregate nodes (to improve performance) by pulling
+ *        joins up the aggregations.
+ *
+ * For example, let R be a relation with PRIMARY KEY x and attributes y, z. Let
+ * S be a relation with FOREIGN KEY u refering to R(x) and attribute v. Then the
+ * optimization rule will transform the physical plan:
+ *   Aggregate(
+ *     [input relation]: HashJoin(
+ *                         [probe relation]: S
+ *                         [build relation]: R
+ *                         [join expression]: S.u = R.x
+ *                         [project attributes]: v, x, y, z
+ *                       )
+ *     [aggregate expression]: SUM(v) AS sum_v
+ *     [group-by attributes]: x, y, z
+ *   )
+ *
+ * into:
+ *   HashJoin(
+ *     [probe relation]: Aggregate(
+ *                         [input relation]: S
+ *                         [aggregate expression]: SUM(v) AS sum_v
+ *                         [group-by attribute]: u
+ *                       ) AS T
+ *     [build relation]: R
+ *     [join expression]: T.u = R.x
+ *     [project attributes]: sum_v, x, y, z
+ *   )
+ */
+class ReduceGroupByAttributes : public Rule<physical::Physical> {
+ public:
+  /**
+   * @brief Constructor.
+   *
+   * @param optimizer_context The optimizer context.
+   */
+  explicit ReduceGroupByAttributes(OptimizerContext *optimizer_context)
+      : optimizer_context_(optimizer_context) {}
+
+  ~ReduceGroupByAttributes() override {}
+
+  std::string getName() const override {
+    return "ReduceGroupByAttributes";
+  }
+
+  physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+  struct AttributeInfo {
+    AttributeInfo(const expressions::AttributeReferencePtr &attribute_in,
+                  const bool is_unique_in,
+                  const bool is_fixed_length_in,
+                  const std::size_t maximum_size_in)
+        : attribute(attribute_in),
+          is_unique(is_unique_in),
+          is_fixed_length(is_fixed_length_in),
+          maximum_size(maximum_size_in) {}
+
+    // In the situation that there are multiple attributes that can serve as the
+    // group-by key, we define an ordering based on aggregation performance (e.g.
+    // it is faster to do aggregation with a fix-length attribute as the group-by
+    // key than with a variable-length attribute).
+    inline static bool IsBetterThan(const AttributeInfo *lhs,
+                                    const AttributeInfo *rhs) {
+      if (lhs->is_unique != rhs->is_unique) {
+        return lhs->is_unique;
+      }
+      if (lhs->is_fixed_length != rhs->is_fixed_length) {
+        return lhs->is_fixed_length;
+      }
+      if (lhs->maximum_size != rhs->maximum_size) {
+        return lhs->maximum_size < rhs->maximum_size;
+      }
+      return lhs->attribute->id() < rhs->attribute->id();
+    }
+
+    const expressions::AttributeReferencePtr attribute;
+    const bool is_unique;
+    const bool is_fixed_length;
+    const std::size_t maximum_size;
+  };
+
+  physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input);
+  physical::PhysicalPtr applyToNode(const physical::PhysicalPtr &input);
+
+  OptimizerContext *optimizer_context_;
+  std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+
+  // Maps an attribute's id to the TableReference that generates the attribute.
+  std::unordered_map<expressions::ExprId,
+                     std::pair<physical::TableReferencePtr,
+                               expressions::AttributeReferencePtr>> source_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReduceGroupByAttributes);
+};
+
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_QUERY_OPTIMIZER_RULES_REDUCE_GROUP_BY_ATTRIBUTES_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/47e03182/query_optimizer/tests/OptimizerTest.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/OptimizerTest.cpp b/query_optimizer/tests/OptimizerTest.cpp
index 3838638..7eb7a11 100644
--- a/query_optimizer/tests/OptimizerTest.cpp
+++ b/query_optimizer/tests/OptimizerTest.cpp
@@ -62,7 +62,7 @@ OptimizerTest::OptimizerTest()
       catalog_database_(
           new CatalogDatabase(catalog_.get(), "TestDatabase" /* name */, 0)),
       optimizer_context_(new OptimizerContext),
-      physical_generator_(new PhysicalGenerator()) {}
+      physical_generator_(new PhysicalGenerator(optimizer_context_.get())) {}
 
 E::AliasPtr OptimizerTest::createAlias(const E::ExpressionPtr &expression,
                                        const std::string &attribute_name,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/47e03182/query_optimizer/tests/OptimizerTextTestRunner.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/OptimizerTextTestRunner.cpp b/query_optimizer/tests/OptimizerTextTestRunner.cpp
index b9238c9..cb8f153 100644
--- a/query_optimizer/tests/OptimizerTextTestRunner.cpp
+++ b/query_optimizer/tests/OptimizerTextTestRunner.cpp
@@ -80,7 +80,7 @@ void OptimizerTextTestRunner::runTestCase(const std::string &input,
       }
       if (output_physical_plan) {
         physical_plan =
-            generatePhysicalPlan(optimized_logical_plan);
+            generatePhysicalPlan(optimized_logical_plan, &optimizer_context);
         ++num_options;
       }
 
@@ -126,8 +126,9 @@ logical::LogicalPtr OptimizerTextTestRunner::generateLogicalPlan(
 }
 
 physical::PhysicalPtr OptimizerTextTestRunner::generatePhysicalPlan(
-    const logical::LogicalPtr &logical_plan) {
-  PhysicalGenerator physical_generator;
+    const logical::LogicalPtr &logical_plan,
+    OptimizerContext *optimizer_context) {
+  PhysicalGenerator physical_generator(optimizer_context);
   return physical_generator.generatePlan(logical_plan);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/47e03182/query_optimizer/tests/OptimizerTextTestRunner.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/OptimizerTextTestRunner.hpp b/query_optimizer/tests/OptimizerTextTestRunner.hpp
index 27fa14f..d8f604b 100644
--- a/query_optimizer/tests/OptimizerTextTestRunner.hpp
+++ b/query_optimizer/tests/OptimizerTextTestRunner.hpp
@@ -73,7 +73,8 @@ class OptimizerTextTestRunner : public TextBasedTestRunner {
                                           OptimizerContext *optimizer_context);
 
   physical::PhysicalPtr generatePhysicalPlan(
-      const logical::LogicalPtr &logical_plan);
+      const logical::LogicalPtr &logical_plan,
+      OptimizerContext *optimizer_context);
 
   SqlParserWrapper sql_parser_;
   TestDatabaseLoader test_database_loader_;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/47e03182/third_party/src/iwyu/iwyu_helper.py
----------------------------------------------------------------------
diff --git a/third_party/src/iwyu/iwyu_helper.py b/third_party/src/iwyu/iwyu_helper.py
index dff4d55..93ddbae 100755
--- a/third_party/src/iwyu/iwyu_helper.py
+++ b/third_party/src/iwyu/iwyu_helper.py
@@ -22,15 +22,15 @@ QUICKSTEP_INCLUDES = [ '.',
                        './build/third_party/gflags/include',
                        './build/third_party/protobuf/include',
                        './build/third_party/tmb/include',
-                       './third_party/benchmark/include',
-                       './third_party/glog/src',
-                       './third_party/googletest/googletest/include',
-                       './third_party/protobuf/src',
-                       './third_party/re2',
-                       './third_party/tmb/include']
+                       './third_party/src/benchmark/include',
+                       './third_party/src/glog/src',
+                       './third_party/src/googletest/googletest/include',
+                       './third_party/src/protobuf/src',
+                       './third_party/src/re2',
+                       './third_party/src/tmb/include']
 QUICKSTEP_DEFINES = [ '-DQUICKSTEP_DEBUG',
                       '-DQUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION', ]
-CLANG_FLAGS = [ '-std=c++14', '-x', 'c++', ]
+CLANG_FLAGS = [ '-std=c++14', '-x', 'c++', '-ferror-limit=-1' ]
 
 # Custom configuration filenames.
 CUSTOM_IWYU_MAPPINGS = '.iwyu.imp'


[2/4] incubator-quickstep git commit: Revert "Fixed the linking issue for the distributed cli."

Posted by zu...@apache.org.
Revert "Fixed the linking issue for the distributed cli."

This reverts commit dff4a145e2c2d3d7b84fb259e48e425310a52a8a.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/6ec9e9d3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/6ec9e9d3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/6ec9e9d3

Branch: refs/heads/reduce-group-by-attrs
Commit: 6ec9e9d3d33dd9e154a00c2a7d1beaf60ea9e279
Parents: 50f3ba6
Author: Zuyu Zhang <zu...@apache.org>
Authored: Thu Feb 2 18:14:12 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Thu Feb 2 18:14:12 2017 -0800

----------------------------------------------------------------------
 cli/distributed/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6ec9e9d3/cli/distributed/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cli/distributed/CMakeLists.txt b/cli/distributed/CMakeLists.txt
index 1069abd..a00ffda 100644
--- a/cli/distributed/CMakeLists.txt
+++ b/cli/distributed/CMakeLists.txt
@@ -28,7 +28,6 @@ target_link_libraries(quickstep_cli_distributed_Cli
                       glog
                       quickstep_catalog_CatalogRelation
                       quickstep_cli_Flags
-                      quickstep_cli_LineReader
                       quickstep_cli_PrintToScreen
                       quickstep_cli_distributed_Role
                       quickstep_parser_ParseStatement


[3/4] incubator-quickstep git commit: Fixed the linking issue for the distributed cli.

Posted by zu...@apache.org.
Fixed the linking issue for the distributed cli.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/7727e773
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/7727e773
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/7727e773

Branch: refs/heads/reduce-group-by-attrs
Commit: 7727e7735630a064c1feff9985143463b61002d6
Parents: 6ec9e9d
Author: Zuyu Zhang <zu...@apache.org>
Authored: Thu Feb 2 19:13:09 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Thu Feb 2 19:13:09 2017 -0800

----------------------------------------------------------------------
 CMakeLists.txt                 | 23 +++++++++++++++++++----
 cli/distributed/CMakeLists.txt | 20 --------------------
 2 files changed, 19 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7727e773/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ccb23a3..85210ef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -808,17 +808,32 @@ target_link_libraries(quickstep_cli_shell ${LIBS})
 
 if (ENABLE_DISTRIBUTED)
   # Build the quickstep_distributed_cli_shell executable.
-  add_executable (quickstep_distributed_cli_shell cli/distributed/QuickstepDistributedCli.cpp)
+  add_executable (quickstep_distributed_cli_shell
+                  cli/distributed/Cli.hpp
+                  cli/distributed/Cli.cpp
+                  cli/distributed/QuickstepDistributedCli.cpp)
   # Link against direct deps (will transitively pull in everything needed).
-  # NOTE(zuyu): Link quickstep_cli_LineReader on behalf of quickstep_cli_distributed_Cli,
-  # as a workaround for bypassing conditionally built target checks in validate_cmakelists.py.
   target_link_libraries(quickstep_distributed_cli_shell
                         glog
+                        quickstep_catalog_CatalogRelation
+                        quickstep_cli_Flags
                         quickstep_cli_LineReader
-                        quickstep_cli_distributed_Cli
+                        quickstep_cli_PrintToScreen
                         quickstep_cli_distributed_Conductor
                         quickstep_cli_distributed_Executor
+                        quickstep_cli_distributed_Role
+                        quickstep_parser_ParseStatement
+                        quickstep_parser_SqlParserWrapper
+                        quickstep_queryexecution_BlockLocatorUtil
+                        quickstep_queryexecution_QueryExecutionMessages_proto
+                        quickstep_queryexecution_QueryExecutionTypedefs
+                        quickstep_queryexecution_QueryExecutionUtil
+                        quickstep_storage_DataExchangerAsync
+                        quickstep_storage_StorageBlockInfo
+                        quickstep_storage_StorageManager
+                        quickstep_utility_Macros
                         quickstep_utility_StringUtil
+                        tmb
                         ${GFLAGS_LIB_NAME}
                         ${GRPCPLUSPLUS_LIBRARIES})
 endif(ENABLE_DISTRIBUTED)

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7727e773/cli/distributed/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cli/distributed/CMakeLists.txt b/cli/distributed/CMakeLists.txt
index a00ffda..b46082f 100644
--- a/cli/distributed/CMakeLists.txt
+++ b/cli/distributed/CMakeLists.txt
@@ -18,30 +18,11 @@
 set_gflags_lib_name ()
 
 # Declare micro-libs and link dependencies:
-add_library(quickstep_cli_distributed_Cli Cli.cpp Cli.hpp)
 add_library(quickstep_cli_distributed_Conductor Conductor.cpp Conductor.hpp)
 add_library(quickstep_cli_distributed_Executor Executor.cpp Executor.hpp)
 add_library(quickstep_cli_distributed_Role Role.cpp Role.hpp)
 
 # Link dependencies:
-target_link_libraries(quickstep_cli_distributed_Cli
-                      glog
-                      quickstep_catalog_CatalogRelation
-                      quickstep_cli_Flags
-                      quickstep_cli_PrintToScreen
-                      quickstep_cli_distributed_Role
-                      quickstep_parser_ParseStatement
-                      quickstep_parser_SqlParserWrapper
-                      quickstep_queryexecution_BlockLocatorUtil
-                      quickstep_queryexecution_QueryExecutionMessages_proto
-                      quickstep_queryexecution_QueryExecutionTypedefs
-                      quickstep_queryexecution_QueryExecutionUtil
-                      quickstep_storage_DataExchangerAsync
-                      quickstep_storage_StorageBlockInfo
-                      quickstep_storage_StorageManager
-                      quickstep_utility_Macros
-                      quickstep_utility_StringUtil
-                      tmb)
 target_link_libraries(quickstep_cli_distributed_Conductor
                       glog
                       quickstep_cli_DefaultsConfigurator
@@ -83,7 +64,6 @@ target_link_libraries(quickstep_cli_distributed_Role
 add_library(quickstep_cli_distributed ../../empty_src.cpp CliDistributedModule.hpp)
 
 target_link_libraries(quickstep_cli_distributed
-                      quickstep_cli_distributed_Cli
                       quickstep_cli_distributed_Conductor
                       quickstep_cli_distributed_Executor
                       quickstep_cli_distributed_Role)